88import operator
99from simple_NER .settings import STOPLIST
1010
11+
1112def is_number (s ):
1213 try :
1314 float (s ) if '.' in s else int (s )
@@ -43,7 +44,7 @@ def separate_words(text, min_word_return_size):
4344 # leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
4445 if len (
4546 current_word ) > min_word_return_size and current_word != '' and not is_number (
46- current_word ):
47+ current_word ):
4748 words .append (current_word )
4849 return words
4950
@@ -104,7 +105,7 @@ def calculate_word_scores(phraseList):
104105 for item in word_frequency :
105106 word_score .setdefault (item , 0 )
106107 word_score [item ] = word_degree [item ] / (
107- word_frequency [item ] * 1.0 ) # orig.
108+ word_frequency [item ] * 1.0 ) # orig.
108109 # word_score[item] = word_frequency[item]/(word_degree[item] * 1.0) #exp.
109110 return word_score
110111
@@ -121,7 +122,7 @@ def generate_candidate_keyword_scores(phrase_list, word_score):
121122 return keyword_candidates
122123
123124
124- class Rake ( object ) :
125+ class Rake :
125126 def __init__ (self , stop_words_path = STOPLIST ):
126127 self .stop_words_path = stop_words_path
127128 self .__stop_words_pattern = build_stop_word_regex (stop_words_path )
@@ -179,4 +180,5 @@ def run(self, text):
179180 rake = Rake ("SmartStoplist.txt" )
180181 keywords = rake .run (text )
181182 from pprint import pprint
183+
182184 pprint (keywords )
0 commit comments