File tree Expand file tree Collapse file tree 2 files changed +41
-7
lines changed Expand file tree Collapse file tree 2 files changed +41
-7
lines changed Original file line number Diff line number Diff line change 11import re
2+ from collections import defaultdict
23
3- def search (docs : list , word : str ) -> list :
4+
5+ def search (docs : list , words : str ) -> list :
46 """
5- Searches for a word in the documents and returns their IDs.
7+ Searches for words in the documents and returns their IDs,
8+ ranked by the total count of word occurrences in the document text.
69 """
710 def preprocess (text ):
811 return re .sub (r'[^\w\s]' , '' , text ).lower ()
912
1013 result = []
11- word = preprocess (word )
14+ words = preprocess (words ).split ()
15+
1216
1317 for doc in docs :
1418 cleaned_text = preprocess (doc ['text' ])
15- word_count = cleaned_text .split ().count (word )
19+ word_count = 0
20+
21+ for word in words :
22+ word_count += cleaned_text .split ().count (word )
1623
1724 if word_count :
18- result .append ((doc ['id' ], word_count ))
25+ result .append ({
26+ 'id' : doc ['id' ],
27+ 'word_count' : word_count
28+ })
29+
30+ result .sort (key = lambda x : x ['word_count' ], reverse = True )
1931
20- result .sort (key = lambda x : x [1 ], reverse = True )
21- return [id for id , _ in result ]
32+ return [item ['id' ] for item in result ]
Original file line number Diff line number Diff line change @@ -94,4 +94,27 @@ def test_search_ranging(self):
9494 ]
9595
9696 result = search (docs , 'shoot' )
97+ assert result == ['doc2' , 'doc1' ]
98+
99+ def test_search_multiple_words (self ):
100+ """
101+ Test that the search function returns documents containing all the words
102+ in the search query, and sorts them based on the frequency of the words' occurrence.
103+
104+ The test ensures the following:
105+ - The search can handle multiple words (i.e., 'shoot at me').
106+ - Documents are returned in the order of their relevance (based on word frequency).
107+ - Words in the search query are treated independently (i.e., not as a single phrase).
108+ """
109+ doc1 = "I can't shoot straight unless I've had a pint!"
110+ doc2 = "Don't shoot shoot shoot that thing at me."
111+ doc3 = "I'm your shooter."
112+
113+ docs = [
114+ {'id' : 'doc1' , 'text' : doc1 },
115+ {'id' : 'doc2' , 'text' : doc2 },
116+ {'id' : 'doc3' , 'text' : doc3 },
117+ ]
118+
119+ result = search (docs , 'shoot at me' )
97120 assert result == ['doc2' , 'doc1' ]
You can’t perform that action at this time.
0 commit comments