Skip to content

Commit 2440ba6

Browse files
authored
feat: fuzzy tool search
1 parent 08131c1 commit 2440ba6

13 files changed

Lines changed: 440 additions & 142 deletions

File tree

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package com.kylecorry.trail_sense.shared.text.search
2+
3+
import com.kylecorry.trail_sense.shared.text.TextUtils
4+
import kotlin.math.max
5+
6+
class EnglishFuzzySearchStrategy(
7+
private val preservedWords: Set<String> = emptySet(),
8+
private val additionalStopWords: Set<String> = emptySet(),
9+
private val synonyms: List<Set<String>> = emptyList(),
10+
private val additionalContractions: Map<String, List<String>> = emptyMap(),
11+
private val additionalStemWords: Map<String, String> = emptyMap(),
12+
private val parentMatchBoost: Float = 1.15f,
13+
private val goodMatchThreshold: Float = 0.8f,
14+
private val titleMatchBoost: Float = 1.1f
15+
) : SearchStrategy {
16+
override fun getSearchScore(query: String, item: SearchItem): Float {
17+
val sectionKeywords = item.keywords.joinToString(", ")
18+
19+
val additionalPreservedWords =
20+
item.keywords.filter { it.contains(" ") || it.contains("-") }.toMutableSet()
21+
22+
// Any keywords with a dash should have a synonym with a space
23+
val additionalSynonyms = item.keywords
24+
.filter { it.contains("-") }
25+
.map { setOf(it, it.replace("-", " ")) }
26+
27+
// Add the synonyms to the preserved words
28+
additionalPreservedWords.addAll(additionalSynonyms.flatten())
29+
30+
var itemMatch = TextUtils.getQueryMatchPercent(
31+
query,
32+
sectionKeywords,
33+
preservedWords = preservedWords + additionalPreservedWords,
34+
additionalStopWords = additionalStopWords,
35+
synonyms = synonyms + additionalSynonyms,
36+
additionalContractions = additionalContractions,
37+
additionalStemWords = additionalStemWords
38+
)
39+
40+
var titleMatch = TextUtils.getQueryMatchPercent(
41+
query,
42+
item.title,
43+
preservedWords = preservedWords + additionalPreservedWords,
44+
additionalStopWords = additionalStopWords,
45+
synonyms = synonyms + additionalSynonyms,
46+
additionalContractions = additionalContractions,
47+
additionalStemWords = additionalStemWords
48+
)
49+
50+
val inverseTitleMatch = TextUtils.getQueryMatchPercent(
51+
item.title,
52+
query,
53+
preservedWords = preservedWords + additionalPreservedWords,
54+
additionalStopWords = additionalStopWords,
55+
synonyms = synonyms + additionalSynonyms,
56+
additionalContractions = additionalContractions,
57+
additionalStemWords = additionalStemWords
58+
)
59+
60+
val parentMatch = item.parent?.let {
61+
TextUtils.getQueryMatchPercent(
62+
query,
63+
item.parent.title,
64+
preservedWords = preservedWords + additionalPreservedWords,
65+
additionalStopWords = additionalStopWords,
66+
synonyms = synonyms + additionalSynonyms,
67+
additionalContractions = additionalContractions,
68+
additionalStemWords = additionalStemWords
69+
)
70+
}
71+
72+
itemMatch *= item.scoreMultiplier
73+
74+
if (parentMatch != null && parentMatch > goodMatchThreshold) {
75+
// If the parent matches, boost the item match a little
76+
itemMatch *= parentMatchBoost
77+
}
78+
79+
// If the title has a good match, increase it
80+
if (titleMatch == 1f && inverseTitleMatch == 1f) {
81+
titleMatch = titleMatchBoost
82+
}
83+
84+
return max(itemMatch, titleMatch)
85+
}
86+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package com.kylecorry.trail_sense.shared.text.search
2+
3+
import com.kylecorry.trail_sense.shared.text.LevenshteinDistance
4+
import com.kylecorry.trail_sense.shared.text.nlp.processors.LowercaseProcessor
5+
import com.kylecorry.trail_sense.shared.text.nlp.processors.SequentialProcessor
6+
import com.kylecorry.trail_sense.shared.text.nlp.tokenizers.PostProcessedTokenizer
7+
import com.kylecorry.trail_sense.shared.text.nlp.tokenizers.SimpleWordTokenizer
8+
import kotlin.math.max
9+
10+
class MultilingualFuzzySearchStrategy(
11+
private val parentMatchBoost: Float = 1.15f,
12+
private val goodMatchThreshold: Float = 0.8f,
13+
private val titleMatchBoost: Float = 1.1f
14+
) : SearchStrategy {
15+
16+
private val tokenizer = PostProcessedTokenizer(
17+
SimpleWordTokenizer(),
18+
SequentialProcessor(
19+
LowercaseProcessor()
20+
)
21+
)
22+
23+
override fun getSearchScore(
24+
query: String,
25+
item: SearchItem
26+
): Float {
27+
val queryKeywords = tokenizer.tokenize(query).toSet()
28+
29+
val textKeywords = item.keywords.flatMap { it.split("-") }.toSet()
30+
val headerKeywords = tokenizer.tokenize(item.title).toSet()
31+
val chapterKeywords = tokenizer.tokenize(item.parent?.title ?: "").toSet()
32+
var itemMatch = percentMatch(queryKeywords, textKeywords)
33+
var titleMatch = percentMatch(queryKeywords, headerKeywords)
34+
val inverseTitleMatch = percentMatch(headerKeywords, queryKeywords)
35+
val parentMatch = percentMatch(queryKeywords, chapterKeywords)
36+
37+
if (parentMatch > goodMatchThreshold) {
38+
// If the chapter matches, boost the section match a little
39+
itemMatch *= parentMatchBoost
40+
}
41+
42+
// If the user exactly matched the header, they probably want to see that
43+
if (titleMatch == 1f && inverseTitleMatch == 1f) {
44+
titleMatch = titleMatchBoost
45+
}
46+
47+
return max(itemMatch, titleMatch)
48+
}
49+
50+
private fun percentMatch(queryKeywords: Set<String>, textKeywords: Set<String>): Float {
51+
val distanceMetric = LevenshteinDistance()
52+
val scores = mutableMapOf<String, Float>()
53+
54+
for (qWord in queryKeywords) {
55+
if (qWord in textKeywords) {
56+
scores[qWord] = 1f
57+
continue
58+
}
59+
60+
for (lWord in textKeywords) {
61+
val distance = distanceMetric.percentSimilarity(qWord, lWord)
62+
if (qWord !in scores) {
63+
scores[qWord] = distance
64+
} else {
65+
scores[qWord] = maxOf(scores[qWord] ?: 0f, distance)
66+
}
67+
}
68+
}
69+
70+
var total = 0f
71+
for (word in queryKeywords) {
72+
if (word in scores) {
73+
total += scores[word] ?: 0f
74+
}
75+
}
76+
77+
return total / queryKeywords.size
78+
}
79+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package com.kylecorry.trail_sense.shared.text.search
2+
3+
class SearchItem(
4+
val id: String,
5+
val title: String,
6+
val keywords: Set<String> = emptySet(),
7+
val parent: SearchItem? = null,
8+
val scoreMultiplier: Float = 1f
9+
)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package com.kylecorry.trail_sense.shared.text.search
2+
3+
interface SearchStrategy {
4+
5+
fun getSearchScore(query: String, item: SearchItem): Float
6+
7+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.kylecorry.trail_sense.shared.text.search
2+
3+
import com.kylecorry.sol.math.MathExtensions.real
4+
5+
class TitleScoreBoostSearchStrategy(
6+
private val strategy: SearchStrategy,
7+
private val containsBoost: Float = 1.1f,
8+
private val startsWithBoost: Float = 1.1f,
9+
private val containsMinScore: Float = 0.5f,
10+
private val startsWithMinScore: Float = 0.6f
11+
) : SearchStrategy {
12+
13+
override fun getSearchScore(query: String, item: SearchItem): Float {
14+
val score = strategy.getSearchScore(query, item).real(0f)
15+
val title = item.title
16+
val boost = when {
17+
title.startsWith(query, ignoreCase = true) -> startsWithBoost
18+
title.contains(query, ignoreCase = true) -> containsBoost
19+
else -> 1f
20+
}
21+
val minScore = when {
22+
title.startsWith(query, ignoreCase = true) -> startsWithMinScore
23+
title.contains(query, ignoreCase = true) -> containsMinScore
24+
else -> 0f
25+
}
26+
return maxOf(score * boost, minScore)
27+
}
28+
}

app/src/main/java/com/kylecorry/trail_sense/tools/survival_guide/infrastructure/EnglishSurvivalGuideFuzzySearch.kt

Lines changed: 23 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ package com.kylecorry.trail_sense.tools.survival_guide.infrastructure
22

33
import android.content.Context
44
import com.kylecorry.trail_sense.R
5-
import com.kylecorry.trail_sense.shared.text.TextUtils
6-
import kotlin.math.max
5+
import com.kylecorry.trail_sense.shared.text.search.EnglishFuzzySearchStrategy
6+
import com.kylecorry.trail_sense.shared.text.search.SearchItem
77

88
class EnglishSurvivalGuideFuzzySearch(private val context: Context, loader: GuideLoader) :
99
BaseSurvivalGuideSearch(loader) {
@@ -289,78 +289,34 @@ class EnglishSurvivalGuideFuzzySearch(private val context: Context, loader: Guid
289289
),
290290
)
291291

292+
private val search = EnglishFuzzySearchStrategy(
293+
preservedWords,
294+
additionalStopWords,
295+
synonyms,
296+
additionalContractions,
297+
additionalStemWords
298+
)
299+
292300
override fun getSectionScore(
293301
query: String,
294302
section: GuideSection
295303
): Float {
296-
val sectionKeywords = section.keywords.joinToString(", ")
297-
298-
val additionalPreservedWords =
299-
section.keywords.filter { it.contains(" ") || it.contains("-") }.toMutableSet()
300-
301-
// Any keywords with a dash should have a synonym with a space
302-
val additionalSynonyms = section.keywords
303-
.filter { it.contains("-") }
304-
.map { setOf(it, it.replace("-", " ")) }
305-
306-
// Add the synonyms to the preserved words
307-
additionalPreservedWords.addAll(additionalSynonyms.flatten())
308-
309-
var sectionMatch = TextUtils.getQueryMatchPercent(
310-
query,
311-
sectionKeywords,
312-
preservedWords = preservedWords + additionalPreservedWords,
313-
additionalStopWords = additionalStopWords,
314-
synonyms = synonyms + additionalSynonyms,
315-
additionalContractions = additionalContractions,
316-
additionalStemWords = additionalStemWords
317-
)
318-
319-
var headerMatch = TextUtils.getQueryMatchPercent(
320-
query,
321-
section.title ?: context.getString(R.string.overview),
322-
preservedWords = preservedWords + additionalPreservedWords,
323-
additionalStopWords = additionalStopWords,
324-
synonyms = synonyms + additionalSynonyms,
325-
additionalContractions = additionalContractions,
326-
additionalStemWords = additionalStemWords
327-
)
304+
// Rank the be prepared and overview sections lower
305+
val scoreMultiplier =
306+
if (section.title?.uppercase()?.trim() == "BE PREPARED" || section.title == null) {
307+
0.9f
308+
} else {
309+
1f
310+
}
328311

329-
var inverseHeaderMatch = TextUtils.getQueryMatchPercent(
312+
val item = SearchItem(
313+
"${section.chapter.title} ${section.title}",
330314
section.title ?: context.getString(R.string.overview),
331-
query,
332-
preservedWords = preservedWords + additionalPreservedWords,
333-
additionalStopWords = additionalStopWords,
334-
synonyms = synonyms + additionalSynonyms,
335-
additionalContractions = additionalContractions,
336-
additionalStemWords = additionalStemWords
337-
)
338-
339-
val chapterMatch = TextUtils.getQueryMatchPercent(
340-
query,
341-
section.chapter.title,
342-
preservedWords = preservedWords + additionalPreservedWords,
343-
additionalStopWords = additionalStopWords,
344-
synonyms = synonyms + additionalSynonyms,
345-
additionalContractions = additionalContractions,
346-
additionalStemWords = additionalStemWords
315+
section.keywords,
316+
parent = SearchItem(section.chapter.title, section.chapter.title),
317+
scoreMultiplier = scoreMultiplier
347318
)
348319

349-
// Rank the be prepared and overview sections lower
350-
if (section.title?.uppercase()?.trim() == "BE PREPARED" || section.title == null) {
351-
sectionMatch *= 0.9f
352-
}
353-
354-
if (chapterMatch > 0.8f) {
355-
// If the chapter matches, boost the section match a little
356-
sectionMatch *= 1.15f
357-
}
358-
359-
// If the header has a good match, increase it
360-
if (headerMatch == 1f && inverseHeaderMatch == 1f) {
361-
headerMatch = 1.1f
362-
}
363-
364-
return max(sectionMatch, headerMatch)
320+
return search.getSearchScore(query, item)
365321
}
366322
}

0 commit comments

Comments
 (0)