Skip to content

Commit 0b8a482

Browse files
match_fuzzy: simplify logic to determine best match
Co-authored-by: Olivier Dormond <odormond@users.noreply.github.com>
1 parent 481c273 commit 0b8a482

File tree

1 file changed

+10
-29
lines changed

1 file changed

+10
-29
lines changed

main.go

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"bufio"
88
"fmt"
99
"io"
10-
"math"
1110
"os"
1211
"regexp"
1312
"sort"
@@ -229,37 +228,19 @@ func match_fuzzy(create, destroy *strset.Set) (map[string]string, map[string]str
229228
downMatches := map[string]string{}
230229

231230
type candidate struct {
232-
Word string
233-
Distance int
231+
word string
232+
distance int
234233
}
235-
distancesD2C := map[string][]candidate{}
234+
reverse := map[string]candidate{}
236235

237236
for _, d := range destroy.List() {
238237
for _, c := range create.List() {
239238
// Here we could also use a custom NGramSizes via
240239
// stringosim.QGramSimilarityOptions
241240
dist := stringosim.QGram([]rune(d), []rune(c))
242-
distancesD2C[d] = append(distancesD2C[d], candidate{c, dist})
243-
}
244-
}
245-
246-
// Sort evaluations from best to worse.
247-
for _, eval := range distancesD2C {
248-
sort.Slice(eval, func(i, j int) bool {
249-
return eval[i].Distance < eval[j].Distance
250-
})
251-
}
252-
253-
// Create a reverse index; this is used to resolve ties.
254-
reverse := map[string]candidate{}
255-
for src, eval := range distancesD2C {
256-
for _, dst := range eval {
257-
curr, ok := reverse[dst.Word]
258-
if !ok {
259-
curr = candidate{"", math.MaxInt32}
260-
}
261-
if dst.Distance < curr.Distance {
262-
reverse[dst.Word] = candidate{src, dst.Distance}
241+
curr, ok := reverse[c]
242+
if !ok || dist < curr.distance {
243+
reverse[c] = candidate{d, dist}
263244
}
264245
}
265246
}
@@ -274,12 +255,12 @@ func match_fuzzy(create, destroy *strset.Set) (map[string]string, map[string]str
274255
fmt.Printf("WARNING fuzzy match enabled. Double-check the following matches:\n")
275256
for _, k := range keys {
276257
v := reverse[k]
277-
fmt.Printf("%2d %-50s -> %s\n", v.Distance, v.Word, k)
278-
upMatches[v.Word] = k
279-
downMatches[k] = v.Word
258+
fmt.Printf("%2d %-50s -> %s\n", v.distance, v.word, k)
259+
upMatches[v.word] = k
260+
downMatches[k] = v.word
280261

281262
// Remove matched elements from the two sets.
282-
destroy.Remove(v.Word)
263+
destroy.Remove(v.word)
283264
create.Remove(k)
284265
}
285266

0 commit comments

Comments
 (0)