@@ -193,6 +193,8 @@ export class LatinDictionary : public Dictionary {
193193 std::vector<fl::str::UniString> ngram;
194194 std::map<WordIdT, fl::str::UniString> id_to_words_map;
195195 WordIdT current_word_id = 1 ;
196+ total_scores_.clear ();
197+ vocab_sizes_.clear ();
196198
197199 while (std::getline (istream, line)) {
198200 fl::str::trim (line);
@@ -236,6 +238,10 @@ export class LatinDictionary : public Dictionary {
236238 }
237239 // Assign word to word ID map
238240 id_to_words_map[current_word_id++] = std::move (word);
241+ // compute frequency scores
242+ auto type = EntryType::word ();
243+ total_scores_[type] += properties->absolute_score ;
244+ vocab_sizes_[type]++;
239245 } else if (section == LatinDictionarySection::NGRAMS) {
240246 fl::str::split (line, FLDIC_SEPARATOR, line_components);
241247 if (line_components.size () < 2 ) {
@@ -251,6 +257,9 @@ export class LatinDictionary : public Dictionary {
251257 auto node = insertNgram (ngram);
252258 auto properties = node->value (dict_id_)->ngramProperties ();
253259 properties->absolute_score = std::stoll (line_components[1 ]);
260+ auto type = EntryType::ngram (line_components.size ());
261+ total_scores_[type] += properties->absolute_score ;
262+ vocab_sizes_[type]++;
254263 } else if (section == LatinDictionarySection::SHORTCUTS) {
255264 fl::str::split (line, FLDIC_SEPARATOR, line_components);
256265 if (line_components.size () < 2 ) {
@@ -262,12 +271,14 @@ export class LatinDictionary : public Dictionary {
262271 auto properties = node->valueOrCreate (dict_id_)->shortcutPropertiesOrCreate ();
263272 properties->absolute_score = 1 ;
264273 properties->shortcut_phrase = line_components[1 ];
274+ auto type = EntryType::shortcut ();
275+ total_scores_[type] += properties->absolute_score ;
276+ vocab_sizes_[type]++;
265277 }
266278 }
267279
268- // TODO: do this directly when reading the words/ngrams and avoid this heavy op
269- // TODO: this is necessary for the performance to be good during gradual loading
270- recalculateAllFrequencyScores ();
280+ // This is already done as we go.
281+ // recalculateAllFrequencyScores();
271282 }
272283
273284 void serializeContent (std::ostream& ostream) override {
0 commit comments