@@ -95,20 +95,19 @@ namespace JSMdict {
9595 return result;
9696 }
9797
98- int levenshteinDistance (const std::string &s1, const std::string &s2) {
99- size_t len1 = s1.size (), len2 = s2.size ();
100- std::vector<std::vector<int >> dp (len1 + 1 , std::vector<int >(len2 + 1 ));
101-
102- for (size_t i = 0 ; i <= len1; ++i) dp[i][0 ] = i;
103- for (size_t j = 0 ; j <= len2; ++j) dp[0 ][j] = j;
104-
105- for (size_t i = 1 ; i <= len1; ++i) {
106- for (size_t j = 1 ; j <= len2; ++j) {
107- dp[i][j] = std::min ({dp[i - 1 ][j] + 1 , dp[i][j - 1 ] + 1 , dp[i - 1 ][j - 1 ] + (s1[i - 1 ] != s2[j - 1 ])});
108- }
109- }
110-
111- return dp[len1][len2];
98+ int bigramJaccard (std::string_view a, std::string_view b) {
99+ auto bigrams = [](std::string_view s, auto &out) {
100+ for (size_t i = 0 ; i + 1 < s.size (); ++i)
101+ out.insert ((unsigned char ) std::tolower (s[i]) << 8 |
102+ (unsigned char ) std::tolower (s[i + 1 ]));
103+ };
104+ std::unordered_set<unsigned > A, B;
105+ bigrams (a, A);
106+ bigrams (b, B);
107+ if (A.empty () || B.empty ()) return 0 ;
108+ size_t comm = 0 ;
109+ for (auto v: A) if (B.count (v)) ++comm;
110+ return static_cast <int >(std::round (float (comm) / float (A.size () + B.size () - comm) * 100 ));
112111 }
113112
114113 int computeScore (const std::string &candidateOriginal, const std::string &queryOriginal) {
@@ -118,28 +117,25 @@ namespace JSMdict {
118117 if (candidate == query) return 100 ;
119118 if (candidate.starts_with (query)) return 80 ;
120119 if (candidate.find (query) != std::string::npos) return 50 ;
121- return std::max ( 0 , 100 - levenshteinDistance ( candidate, query) * 10 );
120+ return bigramJaccard ( candidate, query);
122121 }
123122
124123 std::vector<JSVariant>
125124 sortKeyListByKeywordRelevance (std::vector<mdict::key_list_item *> &items, const std::string &query) {
126125 auto compare = [&query](const mdict::key_list_item *a, const mdict::key_list_item *b) {
127126 int scoreA = computeScore (a->key_word , query);
128127 int scoreB = computeScore (b->key_word , query);
129- return scoreA > scoreB || (scoreA == scoreB && a->record_start < b->record_start );
128+ if (a->key_word .size () != b->key_word .size ()) return a->key_word .size () < b->key_word .size ();
129+ return a->record_start < b->record_start ;
130130 };
131- if (items.size () >= 10 ) {
132- std::partial_sort (items.begin (), items.begin () + 10 , items.end (), compare);
133- items.resize (10 );
134- } else {
135- std::sort (items.begin (), items.begin (), compare);
136- }
131+ size_t k = std::min (size_t (10 ), items.size ());
132+ std::partial_sort (items.begin (), items.begin () + k, items.end (), compare);
133+ items.resize (k);
137134 auto list = std::vector<JSVariant>();
138135 for (auto item: items) list.push_back (JSVariant (item->key_word ));
139136 return list;
140137 }
141138
142-
143139 jsi::Value JSMdict::keyList (jsi::Runtime &runtime, std::string query) {
144140 checkInitialized (runtime);
145141 return Promise::createPromise (
@@ -159,4 +155,3 @@ namespace JSMdict {
159155 });
160156 }
161157}
162-
0 commit comments