Skip to content

Commit b81fbf6

Browse files
author
shmelevik
committed
add ranking by frequency
1 parent 86ecf2f commit b81fbf6

File tree

3 files changed

+29
-6
lines changed

3 files changed

+29
-6
lines changed

.coverage

0 Bytes
Binary file not shown.

search_engine/search_engine.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@ def search(docs: list, word: str) -> list:
66
"""
77
def preprocess(text):
88
return re.sub(r'[^\w\s]', '', text).lower()
9-
9+
1010
result = []
1111
word = preprocess(word)
12+
1213
for doc in docs:
1314
cleaned_text = preprocess(doc['text'])
14-
if word in cleaned_text.split():
15-
result.append(doc['id'])
16-
return result
15+
word_count = cleaned_text.split().count(word)
16+
17+
if word_count:
18+
result.append((doc['id'], word_count))
19+
20+
result.sort(key=lambda x: x[1], reverse=True)
21+
return [id for id, _ in result]

tests/test_search_engine.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,30 @@ def test_search_empty_word(self, word):
6868
assert search(docs, word) == [], f'Expected [], but got {search(docs, word)}'
6969

7070

71-
def test_with_punctuation(self):
71+
def test_search_word_with_punctuation(self):
7272
"""
7373
Test that the search word can be with puntcuation
7474
"""
7575
doc1 = {'id': 'doc1', 'text': "I can't shoot straight unless I've had a pint!"}
7676
docs = [doc1]
7777

7878
assert search(docs, 'pint') == ['doc1']
79-
assert search(docs, 'pint!') == ['doc1']
79+
assert search(docs, 'pint!') == ['doc1']
80+
81+
def test_search_ranging(self):
82+
"""
83+
Test that the search function returns results sorted by the frequency
84+
of the search word's occurrence in the document text.
85+
"""
86+
doc1 = "I can't shoot straight unless I've had a pint!"
87+
doc2 = "Don't shoot shoot shoot that thing at me."
88+
doc3 = "I'm your shooter."
89+
90+
docs = [
91+
{'id': 'doc1', 'text': doc1},
92+
{'id': 'doc2', 'text': doc2},
93+
{'id': 'doc3', 'text': doc3},
94+
]
95+
96+
result = search(docs, 'shoot')
97+
assert result == ['doc2', 'doc1']

0 commit comments

Comments
 (0)