
250
2
부
빌딩 블록
for i in range(len(text) - len(term) + 1):
j = i+len(term)
end_of_word = j == len(text) or not text[j].isalpha()
begin_of_word = i == 0 or not text[i-1].isalpha()
if begin_of_word and \
end_of_word and \
term == text[i:i+len(term)]:
yield (term, i, j)
timing = []
for texts in [raw_10, raw_50, raw_100]:
start = time() # milliseconds
references = get_match_counts(texts, simple_match, en_stopwords)
timing.append((len(texts), int(time() - start) * 1000))
print('the', sum(references['the'].values()))
print('about', sum(references['about'].values()))
print('wouldn\'t', sum(references['wouldn\'t'].values()))
print('{} documents in {} ms'.format(*timing[-1])) ...