Skip to content

Commit 1d0951b

Browse files
Normalized Urls for Yahoo Search Engine No more redirect Urls
1 parent 5da3346 commit 1d0951b

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

Cargo.lock

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ reqwest = { version = "0.12.5", default-features = false, features = [
1414
"http2",
1515
"socks",
1616
] }
17+
urlencoding = "2.1.3"
1718
tokio = { version = "1.43.1", features = [
1819
"rt-multi-thread",
1920
"macros",

src/engines/yahoo.rs

+25-3
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,27 @@ impl Yahoo {
5656
// Ok(final_url)
5757
// }
5858
}
59+
fn parse_yahoo_redirect_url(raw_url: &str) -> String {
60+
// Look for the /RU= marker
61+
if let Some(start_idx) = raw_url.find("/RU=") {
62+
let encoded_start = &raw_url[start_idx + 4..]; // skip "/RU="
63+
let end_markers = ["/RS", "/RK"];
64+
let end_idx = end_markers
65+
.iter()
66+
.filter_map(|marker| encoded_start.find(marker))
67+
.min()
68+
.unwrap_or(encoded_start.len());
69+
70+
let encoded_url = &encoded_start[..end_idx];
71+
72+
match urlencoding::decode(encoded_url) {
73+
Ok(decoded) => decoded.into_owned(),
74+
Err(_) => raw_url.to_string(), // fallback
75+
}
76+
} else {
77+
raw_url.to_string()
78+
}
79+
}
5980

6081
#[async_trait::async_trait]
6182
impl SearchEngine for Yahoo {
@@ -107,11 +128,12 @@ impl SearchEngine for Yahoo {
107128
.unwrap_or("No Title Found")
108129
.trim()
109130
.to_owned();
110-
let cleaned_url = url
131+
let raw_url = url
111132
.value()
112133
.attr("href")
113-
.unwrap_or("No Link Found")
114-
.to_owned();
134+
.unwrap_or("No Link Found");
135+
136+
let cleaned_url = parse_yahoo_redirect_url(raw_url);
115137

116138
let cleaned_description = desc.inner_html().trim().to_owned();
117139
Some(SearchResult::new(

0 commit comments

Comments
 (0)