Skip to content

Commit 8a1f39f

Browse files
committed
fix: optimistic word sort
1 parent bc719e5 commit 8a1f39f

3 files changed

Lines changed: 21 additions & 26 deletions

File tree

.nvmrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v20.19.0
1+
v20

cpp/JSMdict.cpp

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -95,20 +95,19 @@ namespace JSMdict {
9595
return result;
9696
}
9797

98-
int levenshteinDistance(const std::string &s1, const std::string &s2) {
99-
size_t len1 = s1.size(), len2 = s2.size();
100-
std::vector<std::vector<int>> dp(len1 + 1, std::vector<int>(len2 + 1));
101-
102-
for (size_t i = 0; i <= len1; ++i) dp[i][0] = i;
103-
for (size_t j = 0; j <= len2; ++j) dp[0][j] = j;
104-
105-
for (size_t i = 1; i <= len1; ++i) {
106-
for (size_t j = 1; j <= len2; ++j) {
107-
dp[i][j] = std::min({dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + (s1[i - 1] != s2[j - 1])});
108-
}
109-
}
110-
111-
return dp[len1][len2];
98+
int bigramJaccard(std::string_view a, std::string_view b) {
99+
auto bigrams = [](std::string_view s, auto &out) {
100+
for (size_t i = 0; i + 1 < s.size(); ++i)
101+
out.insert((unsigned char) std::tolower(s[i]) << 8 |
102+
(unsigned char) std::tolower(s[i + 1]));
103+
};
104+
std::unordered_set<unsigned> A, B;
105+
bigrams(a, A);
106+
bigrams(b, B);
107+
if (A.empty() || B.empty()) return 0;
108+
size_t comm = 0;
109+
for (auto v: A) if (B.count(v)) ++comm;
110+
return static_cast<int>(std::round(float(comm) / float(A.size() + B.size() - comm) * 100));
112111
}
113112

114113
int computeScore(const std::string &candidateOriginal, const std::string &queryOriginal) {
@@ -118,28 +117,25 @@ namespace JSMdict {
118117
if (candidate == query) return 100;
119118
if (candidate.starts_with(query)) return 80;
120119
if (candidate.find(query) != std::string::npos) return 50;
121-
return std::max(0, 100 - levenshteinDistance(candidate, query) * 10);
120+
return bigramJaccard(candidate, query);
122121
}
123122

124123
std::vector<JSVariant>
125124
sortKeyListByKeywordRelevance(std::vector<mdict::key_list_item *> &items, const std::string &query) {
126125
auto compare = [&query](const mdict::key_list_item *a, const mdict::key_list_item *b) {
127126
int scoreA = computeScore(a->key_word, query);
128127
int scoreB = computeScore(b->key_word, query);
129-
return scoreA > scoreB || (scoreA == scoreB && a->record_start < b->record_start);
128+
if (a->key_word.size() != b->key_word.size()) return a->key_word.size() < b->key_word.size();
129+
return a->record_start < b->record_start;
130130
};
131-
if (items.size() >= 10) {
132-
std::partial_sort(items.begin(), items.begin() + 10, items.end(), compare);
133-
items.resize(10);
134-
} else {
135-
std::sort(items.begin(), items.begin(), compare);
136-
}
131+
size_t k = std::min(size_t(10), items.size());
132+
std::partial_sort(items.begin(), items.begin() + k, items.end(), compare);
133+
items.resize(k);
137134
auto list = std::vector<JSVariant>();
138135
for (auto item: items) list.push_back(JSVariant(item->key_word));
139136
return list;
140137
}
141138

142-
143139
jsi::Value JSMdict::keyList(jsi::Runtime &runtime, std::string query) {
144140
checkInitialized(runtime);
145141
return Promise::createPromise(
@@ -159,4 +155,3 @@ namespace JSMdict {
159155
});
160156
}
161157
}
162-

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "react-native-mdict",
3-
"version": "0.3.1",
3+
"version": "0.3.2",
44
"description": "A mdict(*.mdx/mdd) file parser.",
55
"main": "./lib/module/index.js",
66
"types": "./lib/typescript/src/index.d.ts",

0 commit comments

Comments
 (0)