@@ -153,39 +153,53 @@ fn split_and_normalise_words(text: String) -> List(String) {
153153 |> string . replace ( "'" , "" )
154154 |> string . split ( " " )
155155 |> list . filter ( fn ( word ) { word != "" } )
156- |> list . map ( normalise_spelling )
156+ |> normalise ( [ ] )
157157}
158158
159- fn normalise_spelling ( word : String ) -> String {
160- case word {
161- "analyze" -> "analyse"
162- "authorize" -> "authorise"
163- "behavior" -> "behaviour"
164- "categorize" -> "categorise"
165- "color" -> "colour"
166- "customization" -> "customisation"
167- "customize" -> "customise"
168- "honor" -> "honour"
169- "initialize" -> "initialise"
170- "labeled" -> "labelled"
171- "labor" -> "labour"
172- "license" -> "licence"
173- "modeling" -> "modelling"
174- "normalization" -> "normalisation"
175- "normalize" -> "normalise"
176- "optimization" -> "optimisation"
177- "optimize" -> "optimise"
178- "organize" -> "organise"
179- "parameterize" -> "parameterise"
180- "deserialization" -> "deserialisation"
181- "deserialize" -> "deserialise"
182- "serialization" -> "serialisation"
183- "serialize" -> "serialise"
184- "standardize" -> "standardise"
185- "summarize" -> "summarise"
186- "synchronize" -> "synchronise"
187- "tokenize" -> "tokenise"
188- _ -> word
159+ /// Normalise spelling, unifying different English dialects and correcting some
160+ /// spelling mistakes, to get more permissive search results.
161+ ///
162+ fn normalise ( words : List ( String ) , out : List ( String ) ) -> List ( String ) {
163+ case words {
164+ [ ] -> list . reverse ( out )
165+
166+ // The
167+ [ "un" , "escape" , .. words ] -> normalise ( words , [ "unescape" , .. out ] )
168+ [ "un" , "escaping" , .. words ] -> normalise ( words , [ "unescaping" , .. out ] )
169+
170+ [ word , .. words ] -> {
171+ let word = case word {
172+ "analyze" -> "analyse"
173+ "authorize" -> "authorise"
174+ "behavior" -> "behaviour"
175+ "categorize" -> "categorise"
176+ "color" -> "colour"
177+ "customization" -> "customisation"
178+ "customize" -> "customise"
179+ "honor" -> "honour"
180+ "initialize" -> "initialise"
181+ "labeled" -> "labelled"
182+ "labor" -> "labour"
183+ "license" -> "licence"
184+ "modeling" -> "modelling"
185+ "normalization" -> "normalisation"
186+ "normalize" -> "normalise"
187+ "optimization" -> "optimisation"
188+ "optimize" -> "optimise"
189+ "organize" -> "organise"
190+ "parameterize" -> "parameterise"
191+ "deserialization" -> "deserialisation"
192+ "deserialize" -> "deserialise"
193+ "serialization" -> "serialisation"
194+ "serialize" -> "serialise"
195+ "standardize" -> "standardise"
196+ "summarize" -> "summarise"
197+ "synchronize" -> "synchronise"
198+ "tokenize" -> "tokenise"
199+ _ -> word
200+ }
201+ normalise ( words , [ word , .. out ] )
202+ }
189203 }
190204}
191205
0 commit comments