Skip to content
13 changes: 10 additions & 3 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,11 @@ def get_text(
url = self.format_url(url, params)
self.debug("Fetching HTML from {}", url)
r = self.get(url, **kwargs)
r.encoding = None
"""Trust server's encoding,
but default to UTF-8 if not specified
"""
if not r.encoding:
r.encoding = 'utf-8'
return r.text

def get_json(self, url: str, params: JSONDict | None = None, **kwargs):
Expand Down Expand Up @@ -557,11 +561,14 @@ def search(self, artist: str, title: str) -> Iterable[SearchResult]:
def scrape(cls, html: str) -> str | None:
if m := cls.LYRICS_IN_JSON_RE.search(html):
html_text = cls.remove_backslash(m[0]).replace(r"\n", "\n")
return cls.get_soup(html_text).get_text().strip()
lyrics = cls.get_soup(html_text).get_text().strip()
# Clean up any remaining escaped quotes (may need multiple passes)
Copy link
Contributor

@semohr semohr Dec 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments should be minimal and focus on the why rather than the how:

#  Strip HTML and normalize quotes that were left because of JSON...
return re.sub(r'\\+"', '"', lyrics)

It should explain why the escaped quotes exist without repeating how the regex works. You have quite some information on this in your PR, maybe we can condense it into a sentence?

while '\\"' in lyrics:
lyrics = lyrics.replace('\\"', '"')
return lyrics

return None


class Tekstowo(SearchBackend):
"""Fetch lyrics from Tekstowo.pl."""

Expand Down
Loading