Skip to content

Commit f9b67ff

Browse files
author
shmelevik
committed
tfidf - 1st attempt
1 parent ad51492 commit f9b67ff

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

search_engine/search_engine.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,38 @@ def get_inverted_index(docs: list) -> dict:
5151
result[word] = search(docs, word)
5252

5353
return result
54+
55+
56+
def get_tf(doc: list, all_terms:dict = None):
57+
""" Compute TF """
58+
cleaned_text = preprocess(doc['text'])
59+
unique_terms = cleaned_text.split()
60+
61+
for word in unique_terms:
62+
all_terms[word]['quant_in_doc'] += cleaned_text.split().count(word)
63+
64+
return all_terms
65+
66+
67+
def get_tf_idf(docs):
68+
""" Compute TF-IDF """
69+
N
70+
71+
for doc in docs:
72+
unique_words_in_doc = set(preprocess(doc['text']).split())
73+
for word in unique_words_in_doc:
74+
75+
pass
76+
77+
78+
doc1 = "I can't shoot straight unless I've had a pint!"
79+
doc2 = "Don't shoot shoot shoot that thing at me."
80+
doc3 = "I'm your shooter."
81+
82+
docs = [
83+
{'id': 'doc1', 'text': doc1},
84+
{'id': 'doc2', 'text': doc2},
85+
{'id': 'doc3', 'text': doc3},
86+
]
87+
88+
print(get_tf_idf(docs))

0 commit comments

Comments
 (0)