File tree Expand file tree Collapse file tree 1 file changed +35
-0
lines changed Expand file tree Collapse file tree 1 file changed +35
-0
lines changed Original file line number Diff line number Diff line change @@ -51,3 +51,38 @@ def get_inverted_index(docs: list) -> dict:
5151 result [word ] = search (docs , word )
5252
5353 return result
54+
55+
56+ def get_tf (doc : list , all_terms :dict = None ):
57+ """ Compute TF """
58+ cleaned_text = preprocess (doc ['text' ])
59+ unique_terms = cleaned_text .split ()
60+
61+ for word in unique_terms :
62+ all_terms [word ]['quant_in_doc' ] += cleaned_text .split ().count (word )
63+
64+ return all_terms
65+
66+
67+ def get_tf_idf (docs ):
68+ """ Compute TF-IDF """
69+ N
70+
71+ for doc in docs :
72+ unique_words_in_doc = set (preprocess (doc ['text' ]).split ())
73+ for word in unique_words_in_doc :
74+
75+ pass
76+
77+
78+ doc1 = "I can't shoot straight unless I've had a pint!"
79+ doc2 = "Don't shoot shoot shoot that thing at me."
80+ doc3 = "I'm your shooter."
81+
82+ docs = [
83+ {'id' : 'doc1' , 'text' : doc1 },
84+ {'id' : 'doc2' , 'text' : doc2 },
85+ {'id' : 'doc3' , 'text' : doc3 },
86+ ]
87+
88+ print (get_tf_idf (docs ))
You can’t perform that action at this time.
0 commit comments