Skip to content

Commit 067f497

Browse files
committed
Added song lyrics to GUI
1 parent c29a002 commit 067f497

File tree

10 files changed

+219
-14
lines changed

10 files changed

+219
-14
lines changed

Cargo.lock

Lines changed: 19 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ byteorder = "1.3.4" # Used for reading and writing binary structures
1919
crc32fast = "1.2.0" # Used for the CRC-32 checksum in the binary signature
2020
base64 = "0.12.3"
2121
reqwest = { version = "0.10.7", features = ["blocking", "json"] }
22+
html-escape = "0.2.13"
23+
unicode-normalization = "0.1.23"
2224
rodio = "0.13.1" # For reading WAV/MP3/FLAG/OGG files, resampling and playing audio.
2325
clap = "2.33.2" # For argument parsing
2426
cpal = "=0.13.3" # For recording audio

src/core/http_thread.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::core::thread_messages::*;
88

99
use crate::fingerprinting::signature_format::DecodedSignature;
1010
use crate::fingerprinting::communication::{recognize_song_from_signature, obtain_raw_cover_image};
11+
use crate::fingerprinting::lyrics::fetch_genius_lyrics;
1112

1213
fn try_recognize_song(signature: DecodedSignature) -> Result<SongRecognizedMessage, Box<dyn Error>> {
1314
let json_object = recognize_song_from_signature(&signature)?;
@@ -99,6 +100,17 @@ pub fn http_thread(http_rx: mpsc::Receiver<HTTPMessage>, gui_tx: glib::Sender<GU
99100

100101
microphone_tx.send(MicrophoneMessage::ProcessingDone).unwrap();
101102
}
103+
HTTPMessage::FetchLyrics(info) => {
104+
match fetch_genius_lyrics(&info) {
105+
Ok(lyrics) => {
106+
gui_tx.send(GUIMessage::LyricsRecognized(lyrics)).unwrap();
107+
}
108+
Err(_) => {
109+
// Clear lyrics if not found or on error.
110+
gui_tx.send(GUIMessage::LyricsRecognized(String::new())).unwrap();
111+
}
112+
}
113+
}
102114
}
103115
}
104116

src/core/thread_messages.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::fingerprinting::signature_format::DecodedSignature;
33
use crate::gui::preferences::Preferences;
44
#[cfg(feature = "gui")]
55
use crate::utils::csv_song_history::SongHistoryRecord;
6+
use crate::fingerprinting::lyrics::LyricSearchInfo;
67

78
use std::thread;
89

@@ -59,6 +60,7 @@ pub enum GUIMessage {
5960
MicrophoneRecording,
6061
MicrophoneVolumePercent(f32),
6162
SongRecognized(Box<SongRecognizedMessage>),
63+
LyricsRecognized(String),
6264
}
6365

6466
pub enum MicrophoneMessage {
@@ -74,4 +76,5 @@ pub enum ProcessingMessage {
7476

7577
pub enum HTTPMessage {
7678
RecognizeSignature(Box<DecodedSignature>),
79+
FetchLyrics(LyricSearchInfo),
7780
}

src/fingerprinting/communication.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ use reqwest::header::HeaderMap;
33
use std::time::SystemTime;
44
use std::error::Error;
55
use std::time::Duration;
6-
use rand::seq::SliceRandom;
76
use uuid::Uuid;
87

98
use crate::fingerprinting::signature_format::DecodedSignature;
10-
use crate::fingerprinting::user_agent::USER_AGENTS;
9+
use crate::fingerprinting::user_agent;
1110

1211
pub fn recognize_song_from_signature(signature: &DecodedSignature) -> Result<Value, Box<dyn Error>> {
1312

@@ -35,7 +34,7 @@ pub fn recognize_song_from_signature(signature: &DecodedSignature) -> Result<Val
3534

3635
let mut headers = HeaderMap::new();
3736

38-
headers.insert("User-Agent", USER_AGENTS.choose(&mut rand::thread_rng()).unwrap().parse()?);
37+
headers.insert("User-Agent", user_agent::random().parse()?);
3938
headers.insert("Content-Language", "en_US".parse()?);
4039

4140
let client = reqwest::blocking::Client::new();
@@ -62,7 +61,7 @@ pub fn obtain_raw_cover_image(url: &str) -> Result<Vec<u8>, Box<dyn Error>> {
6261

6362
let mut headers = HeaderMap::new();
6463

65-
headers.insert("User-Agent", USER_AGENTS.choose(&mut rand::thread_rng()).unwrap().parse()?);
64+
headers.insert("User-Agent", user_agent::random().parse()?);
6665
headers.insert("Content-Language", "en_US".parse()?);
6766

6867
let client = reqwest::blocking::Client::new();

src/fingerprinting/lyrics.rs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
use crate::fingerprinting::user_agent;
2+
use regex::Regex;
3+
use reqwest::header::HeaderMap;
4+
use reqwest::StatusCode;
5+
use std::error::Error;
6+
use std::sync::OnceLock;
7+
use std::time::Duration;
8+
use unicode_normalization::UnicodeNormalization;
9+
10+
pub struct LyricSearchInfo {
11+
pub artist_name: String,
12+
pub song_name: String,
13+
}
14+
15+
pub fn fetch_genius_lyrics(info: &LyricSearchInfo) -> Result<String, Box<dyn Error>> {
16+
static RE_PAREN: OnceLock<Regex> = OnceLock::new();
17+
static RE_FEAT: OnceLock<Regex> = OnceLock::new();
18+
static RE_TAG_START: OnceLock<Regex> = OnceLock::new();
19+
static RE_TAG_END: OnceLock<Regex> = OnceLock::new();
20+
21+
let re_paren = RE_PAREN.get_or_init(|| Regex::new(r#"\(.*?\)"#).unwrap());
22+
let re_feat = RE_FEAT.get_or_init(|| Regex::new(r#"\(.*?(?:feat\.|ft\.).*?\)"#).unwrap());
23+
let re_tag_start = RE_TAG_START.get_or_init(|| Regex::new(r#"<.+?>"#).unwrap());
24+
let re_tag_end = RE_TAG_END.get_or_init(|| Regex::new(r#"<.+?/>"#).unwrap());
25+
26+
// Remove parens with feat. or ft. in them e.g. Song Title (feat. XXX).
27+
let song = re_feat.replace_all(&info.song_name, "");
28+
29+
let url = make_url(&format!("{}-{}", info.artist_name, song));
30+
31+
let html = match fetch_lyrics_html(&url)? {
32+
Some(lyrics) => Some(lyrics),
33+
None => {
34+
// Try one more time, this time with all parens removed from the song title.
35+
if song.contains('(') {
36+
let song = re_paren.replace_all(&song, "");
37+
let url = make_url(&format!("{}-{}", info.artist_name, song));
38+
fetch_lyrics_html(&url)?
39+
} else {
40+
None
41+
}
42+
}
43+
}
44+
.ok_or("lyrics not found")?;
45+
46+
// Reduce the amount of text we need to look at to find the lyrics. Lyrics are in between
47+
// the <div id="lyrics-root> and <div class="LyricsFooter"> tags.
48+
let root = &html[html
49+
.find("id=\"lyrics-root\"")
50+
.ok_or("lyrics-root not found")?
51+
..html
52+
.find("class=\"LyricsFooter")
53+
.ok_or("LyricsFooter not found")?];
54+
55+
let mut lyrics = String::new();
56+
57+
for container in root.split("data-lyrics-container=\"true\"").skip(1) {
58+
let container = container.trim().replace("<br/>", "\n");
59+
60+
for line in container.lines() {
61+
// Remove all opening and closing HTML tags.
62+
let replaced = re_tag_start.replace_all(line, "").to_string();
63+
let replaced = re_tag_end.replace_all(&replaced, "").to_string();
64+
// Clean up some remaining garbage.
65+
let replaced = replaced.replace("<div", "");
66+
let replaced = replaced.split("\">").last().unwrap();
67+
68+
// Exclude annotation lines.
69+
if replaced.get(0..1) != Some("[") {
70+
lyrics.push_str(&html_escape::decode_html_entities(&replaced));
71+
lyrics.push('\n');
72+
}
73+
}
74+
}
75+
Ok(lyrics.trim().to_string())
76+
}
77+
78+
fn fetch_lyrics_html(url: &str) -> Result<Option<String>, Box<dyn Error>> {
79+
let mut headers = HeaderMap::new();
80+
headers.insert("User-Agent", user_agent::random().parse()?);
81+
headers.insert("Content-Language", "en_US".parse()?);
82+
83+
let client = reqwest::blocking::Client::new();
84+
let response = client
85+
.get(url)
86+
.timeout(Duration::from_secs(20))
87+
.headers(headers)
88+
.send()?;
89+
90+
if response.status() == StatusCode::NOT_FOUND {
91+
Ok(None)
92+
} else {
93+
Ok(Some(response.text()?))
94+
}
95+
}
96+
97+
fn make_url(query: &str) -> String {
98+
// Convert accents and umlauts etc. to plain ascii as otherwise the lyric lookup fails.
99+
let query = query.nfd().filter(char::is_ascii).collect::<String>();
100+
101+
// Other replacements.
102+
let query = query.replace('&', "and");
103+
104+
let lower = query.to_lowercase();
105+
let mut chars = lower.chars();
106+
let mut mangled = String::new();
107+
let Some(first) = chars.next() else {
108+
return mangled;
109+
};
110+
mangled.extend(first.to_uppercase());
111+
112+
let exclude = [
113+
'\'', '"', '’', '`', '(', ')', '[', ']', '{', '}', '!', '?', ',', '.', '/', '|',
114+
];
115+
let mut skip = false;
116+
for char in chars {
117+
if char.is_whitespace() || char == '-' {
118+
if !skip {
119+
mangled.push('-');
120+
skip = true;
121+
}
122+
} else if !exclude.contains(&char) {
123+
mangled.push(char);
124+
skip = false;
125+
}
126+
}
127+
let last = mangled.pop().unwrap();
128+
if last != '-' {
129+
mangled.push(last);
130+
}
131+
format!("https://genius.com/{mangled}-lyrics")
132+
}

src/fingerprinting/user_agent.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
// From https://github.com/SaswatPadhi/FlashProfileDemo/blob/c1e3f05d09f6443568a606dc0a439d6ebb057ae1/tests/hetero/user_agents.json
44

5+
use rand::prelude::SliceRandom;
6+
57
pub const USER_AGENTS: [&'static str; 100] = [
68
"Dalvik/2.1.0 (Linux; U; Android 5.0.2; VS980 4G Build/LRX22G)",
79
"Dalvik/1.6.0 (Linux; U; Android 4.4.2; SM-T210 Build/KOT49H)",
@@ -104,3 +106,7 @@ pub const USER_AGENTS: [&'static str; 100] = [
104106
"Dalvik/1.6.0 (Linux; U; Android 4.2.2; SM-T217S Build/JDQ39)",
105107
"Dalvik/1.6.0 (Linux; U; Android 4.4.4; SAMSUNG-SM-N900A Build/KTU84P)"
106108
];
109+
110+
pub fn random() -> &'static str {
111+
USER_AGENTS.choose(&mut rand::thread_rng()).unwrap()
112+
}

src/gui/interface.glade

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,16 +527,41 @@ Contributors https://github.com/marin-m/SongRec/graphs/contributors</property>
527527
<property name="visible">True</property>
528528
<property name="can-focus">True</property>
529529
<property name="hscrollbar-policy">external</property>
530-
<property name="vscrollbar-policy">external</property>
531530
<property name="min-content-width">100</property>
532531
<property name="min-content-height">100</property>
533532
<child>
534533
<object class="GtkViewport">
535534
<property name="visible">True</property>
536535
<property name="can-focus">False</property>
537536
<child>
538-
<object class="GtkImage" id="recognized_song_cover">
537+
<object class="GtkBox">
538+
<property name="visible">True</property>
539539
<property name="can-focus">False</property>
540+
<property name="orientation">vertical</property>
541+
<child>
542+
<object class="GtkImage" id="recognized_song_cover">
543+
<property name="can-focus">False</property>
544+
</object>
545+
<packing>
546+
<property name="expand">False</property>
547+
<property name="fill">True</property>
548+
<property name="position">0</property>
549+
</packing>
550+
</child>
551+
<child>
552+
<object class="GtkLabel" id="recognized_song_lyrics">
553+
<property name="visible">True</property>
554+
<property name="can-focus">False</property>
555+
<property name="ypad">20</property>
556+
<property name="wrap">True</property>
557+
<property name="selectable">True</property>
558+
</object>
559+
<packing>
560+
<property name="expand">False</property>
561+
<property name="fill">True</property>
562+
<property name="position">1</property>
563+
</packing>
564+
</child>
540565
</object>
541566
</child>
542567
</object>

0 commit comments

Comments
 (0)