Skip to content

Commit 86ecf2f

Browse files
author
shmelevik
committed
add search word procession
1 parent 2b0b10d commit 86ecf2f

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

.coverage

52 KB
Binary file not shown.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ readme = "README.md"
66
requires-python = ">=3.11"
77
dependencies = [
88
"pytest>=8.3.4",
9+
"pytest-cov>=6.0.0",
910
"ruff>=0.9.6",
1011
]

search_engine/search_engine.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@ def search(docs: list, word: str) -> list:
44
"""
55
Searches for a word in the documents and returns their IDs.
66
"""
7+
def preprocess(text):
8+
return re.sub(r'[^\w\s]', '', text).lower()
9+
710
result = []
8-
word = word.lower()
11+
word = preprocess(word)
912
for doc in docs:
10-
cleaned_text = re.sub(r'[^\w\s]', '', doc['text'].lower())
13+
cleaned_text = preprocess(doc['text'])
1114
if word in cleaned_text.split():
1215
result.append(doc['id'])
1316
return result

tests/basic_test.py renamed to tests/test_search_engine.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,14 @@ def test_search_empty_word(self, word):
6666
Test that the search function returns an empty list for empty or whitespace-only queries.
6767
"""
6868
assert search(docs, word) == [], f'Expected [], but got {search(docs, word)}'
69+
70+
71+
def test_with_punctuation(self):
72+
"""
73+
Test that the search word can be with puntcuation
74+
"""
75+
doc1 = {'id': 'doc1', 'text': "I can't shoot straight unless I've had a pint!"}
76+
docs = [doc1]
77+
78+
assert search(docs, 'pint') == ['doc1']
79+
assert search(docs, 'pint!') == ['doc1']

0 commit comments

Comments
 (0)