Skip to content

Commit 676446c

Browse files
committed
Use LevenshteinRatio to more fairly compare results
- When comparing "Ilium" to "Olympus" LevenshteinDistance = 94 LevenshteinRatio = 14 Note: should be part of Util but errors as not found
1 parent b618c00 commit 676446c

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

Contents/Code/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,12 @@ def findDateInTitle(self, title):
262262
return Datetime.ParseDate(result.group(0)).date()
263263
return None
264264

265+
def LevenshteinRatio(self, first, second):
266+
if len(first) == 0 or len(second) == 0:
267+
return 0.0
268+
else:
269+
return 1 - (Util.LevenshteinDistance(first, second) / float(max(len(first), len(second))))
270+
265271
def doSearch(self, url, ctx):
266272
html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY)
267273
found = []
@@ -391,14 +397,15 @@ def search(self, results, media, lang, manual):
391397
#self.Log('scorebase1: %s', scorebase1)
392398
#self.Log('scorebase2: %s', scorebase2)
393399

394-
score = INITIAL_SCORE - Util.LevenshteinDistance(scorebase1, scorebase2)
400+
# use LevenshteinRatio as distance smaller for short strings
401+
score = int(round(self.LevenshteinRatio(scorebase1, scorebase2) * 100.0))
395402

396403
if media.artist:
397404
scorebase3 = media.artist
398405
scorebase4 = author
399406
#self.Log('scorebase3: %s', scorebase3)
400407
#self.Log('scorebase4: %s', scorebase4)
401-
score = int(round((score + INITIAL_SCORE - Util.LevenshteinDistance(scorebase3, scorebase4)) /2))
408+
score = int(round((score + self.LevenshteinRatio(scorebase3, scorebase4) * 100.0)) /2)
402409

403410

404411
self.Log('* Title is %s', title)

0 commit comments

Comments
 (0)