Skip to content

Commit a68f1f5

Browse files
committed
Handle un-escape
1 parent 07aa17e commit a68f1f5

File tree

3 files changed

+63
-33
lines changed

3 files changed

+63
-33
lines changed

src/packages/override.gleam

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ pub fn is_ignored_package(name: String) -> Bool {
3232
}
3333
}
3434

35-
/// Some words have common misspellings or associated words so we add those to
36-
/// the search to get all appropriate results.
35+
/// Some words have common alterative words so we add those to the search to
36+
/// get all appropriate results.
37+
///
38+
/// Common spelling mistakes and dialect variations are handled in
39+
/// `text_search.normalise` instead.
40+
///
3741
pub fn expand_search_term(term: String) -> List(String) {
3842
case term {
3943
"postgres" | "postgresql" -> ["postgres", "postgresql"]

src/packages/text_search.gleam

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -153,39 +153,53 @@ fn split_and_normalise_words(text: String) -> List(String) {
153153
|> string.replace("'", "")
154154
|> string.split(" ")
155155
|> list.filter(fn(word) { word != "" })
156-
|> list.map(normalise_spelling)
156+
|> normalise([])
157157
}
158158

159-
fn normalise_spelling(word: String) -> String {
160-
case word {
161-
"analyze" -> "analyse"
162-
"authorize" -> "authorise"
163-
"behavior" -> "behaviour"
164-
"categorize" -> "categorise"
165-
"color" -> "colour"
166-
"customization" -> "customisation"
167-
"customize" -> "customise"
168-
"honor" -> "honour"
169-
"initialize" -> "initialise"
170-
"labeled" -> "labelled"
171-
"labor" -> "labour"
172-
"license" -> "licence"
173-
"modeling" -> "modelling"
174-
"normalization" -> "normalisation"
175-
"normalize" -> "normalise"
176-
"optimization" -> "optimisation"
177-
"optimize" -> "optimise"
178-
"organize" -> "organise"
179-
"parameterize" -> "parameterise"
180-
"deserialization" -> "deserialisation"
181-
"deserialize" -> "deserialise"
182-
"serialization" -> "serialisation"
183-
"serialize" -> "serialise"
184-
"standardize" -> "standardise"
185-
"summarize" -> "summarise"
186-
"synchronize" -> "synchronise"
187-
"tokenize" -> "tokenise"
188-
_ -> word
159+
/// Normalise spelling, unifying different English dialects and correcting some
160+
/// spelling mistakes, to get more permissive search results.
161+
///
162+
fn normalise(words: List(String), out: List(String)) -> List(String) {
163+
case words {
164+
[] -> list.reverse(out)
165+
166+
// The
167+
["un", "escape", ..words] -> normalise(words, ["unescape", ..out])
168+
["un", "escaping", ..words] -> normalise(words, ["unescaping", ..out])
169+
170+
[word, ..words] -> {
171+
let word = case word {
172+
"analyze" -> "analyse"
173+
"authorize" -> "authorise"
174+
"behavior" -> "behaviour"
175+
"categorize" -> "categorise"
176+
"color" -> "colour"
177+
"customization" -> "customisation"
178+
"customize" -> "customise"
179+
"honor" -> "honour"
180+
"initialize" -> "initialise"
181+
"labeled" -> "labelled"
182+
"labor" -> "labour"
183+
"license" -> "licence"
184+
"modeling" -> "modelling"
185+
"normalization" -> "normalisation"
186+
"normalize" -> "normalise"
187+
"optimization" -> "optimisation"
188+
"optimize" -> "optimise"
189+
"organize" -> "organise"
190+
"parameterize" -> "parameterise"
191+
"deserialization" -> "deserialisation"
192+
"deserialize" -> "deserialise"
193+
"serialization" -> "serialisation"
194+
"serialize" -> "serialise"
195+
"standardize" -> "standardise"
196+
"summarize" -> "summarise"
197+
"synchronize" -> "synchronise"
198+
"tokenize" -> "tokenise"
199+
_ -> word
200+
}
201+
normalise(words, [word, ..out])
202+
}
189203
}
190204
}
191205

test/packages/text_search_test.gleam

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,15 @@ pub fn lookup_ist_test() {
236236
let assert Ok(value) = text_search.lookup(index, "geology")
237237
assert value == [Found("geologist", 1)]
238238
}
239+
240+
// Odysseus writes "unescape" as "un-escape"
241+
pub fn un_escape_test() {
242+
let index = text_search.new()
243+
let assert Ok(_) =
244+
text_search.insert(index, "houdini", "🪄 Fast HTML escaping")
245+
let assert Ok(_) = text_search.insert(index, "odysseus", "UN-escaping HTML")
246+
247+
assert text_search.lookup(index, "escape") == Ok([Found("houdini", 1)])
248+
assert text_search.lookup(index, "un-escape") == Ok([Found("odysseus", 1)])
249+
assert text_search.lookup(index, "unescape") == Ok([Found("odysseus", 1)])
250+
}

0 commit comments

Comments
 (0)