Skip to content

Commit d10acb4

Browse files
committed
Scoring against normalised search/result titles
1 parent 676446c commit d10acb4

File tree

1 file changed

+35
-13
lines changed

1 file changed

+35
-13
lines changed

Contents/Code/__init__.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,37 @@ def getDateFromString(self, string):
237237
except:
238238
return None
239239

240+
def strippedAlbumName(self, name):
241+
stripped = name.strip()
242+
243+
# Chop off "unabridged"
244+
# stripped = re.sub(r"[\(\[].*?[\)\]]", "", stripped)
245+
# Log('chopping bracketed text = %s', stripped)
246+
247+
# Chop off more agressively
248+
stripped = re.sub(r"[\(\[].*?$", "", stripped)
249+
Log('agressively chopping bracketed text = %s', stripped)
250+
251+
# remove series subtitle
252+
# TODO: use subtitle tag to replace series
253+
stripped = re.sub(r": .*, \w+ \d+$", "", stripped)
254+
Log('chopping book series info = %s', stripped)
255+
256+
return stripped.strip()
257+
258+
def normalizeAlbumName(self, name, stripped=True):
259+
normalized = String.StripDiacritics(name)
260+
261+
if len(normalized) == 0:
262+
normalized = name
263+
Log('normalizedName = %s', normalized)
264+
265+
# strip "unabridged", series, subtitle etc.
266+
if stripped:
267+
normalized = self.strippedAlbumName(normalized)
268+
269+
return normalized.strip()
270+
240271
def getStringContentFromXPath(self, source, query):
241272
return source.xpath('string(' + query + ')')
242273

@@ -325,16 +356,7 @@ def search(self, results, media, lang, manual):
325356
self.Log('-----------------------------------------------------------------------')
326357

327358
# Normalize the name
328-
normalizedName = String.StripDiacritics(media.album)
329-
if len(normalizedName) == 0:
330-
normalizedName = media.album
331-
Log('normalizedName = %s', normalizedName)
332-
333-
# Chop off "unabridged"
334-
normalizedName = re.sub(r"[\(\[].*?[\)\]]", "", normalizedName)
335-
Log('chopping bracketed text = %s', normalizedName)
336-
normalizedName = normalizedName.strip()
337-
Log('normalizedName stripped = %s', normalizedName)
359+
normalizedName = self.normalizeAlbumName(media.album)
338360

339361
self.Log('***** SEARCHING FOR "%s" - AUDIBLE v.%s *****', normalizedName, VERSION_NO)
340362

@@ -391,9 +413,9 @@ def search(self, results, media, lang, manual):
391413
if date is not None:
392414
year = date.year
393415

394-
# Score the album name
395-
scorebase1 = media.album
396-
scorebase2 = title.encode('utf-8')
416+
# Score the normalized album name against normalized title
417+
scorebase1 = normalizedName
418+
scorebase2 = self.normalizeAlbumName(title.encode('utf-8'))
397419
#self.Log('scorebase1: %s', scorebase1)
398420
#self.Log('scorebase2: %s', scorebase2)
399421

0 commit comments

Comments
 (0)