Skip to content

Commit 28b48e4

Browse files
committed
Fix subtitle fetching
1 parent 724dca0 commit 28b48e4

3 files changed

Lines changed: 134 additions & 74 deletions

File tree

Cargo.lock

Lines changed: 9 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@ cargo-features = ["trim-paths"]
22

33
[package]
44
name = "YouTubeTLDR"
5-
version = "1.6.0"
5+
version = "1.7.0"
66
edition = "2024"
77
readme = "README.md"
88
license = "MIT"
99

1010
[dependencies]
1111
minreq = { git = "https://github.com/Milkshiift/minreq", features = ["proxy"] }
1212
miniserde = { git = "https://github.com/Milkshiift/miniserde" }
13-
flume = "0.11.1"
13+
flume = "0.12.0"
1414

1515
[build-dependencies]
16-
minifier = "0.3"
17-
flate2 = "1.1"
16+
minifier = "0.3.6"
17+
flate2 = "1.1.9"
1818

1919
[features]
2020
default = ["native-tls"]

src/subtitle.rs

Lines changed: 121 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use miniserde::{Deserialize, json};
1+
use miniserde::{json, Deserialize};
22
use std::error::Error;
33

44
#[derive(Deserialize)]
@@ -48,43 +48,55 @@ struct CaptionSegment {
4848
utf8: String,
4949
}
5050

51-
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36";
52-
const API_KEY: &str = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
51+
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36";
52+
53+
struct PlayerConfig {
54+
client_version: String,
55+
signature_timestamp: u64,
56+
api_key: String,
57+
}
5358

5459
pub fn get_video_data(video_url: &str, language: &str) -> Result<(String, String), Box<dyn Error>> {
55-
let video_id =
56-
extract_video_id(video_url).ok_or_else(|| format!("Invalid YouTube URL: {video_url}"))?;
60+
let video_id = extract_video_id(video_url)
61+
.ok_or_else(|| format!("Invalid YouTube URL: {video_url}"))?;
5762

58-
get_transcript_and_title(video_id, language)
59-
}
63+
let config = fetch_player_config(video_id)?;
6064

61-
fn get_transcript_and_title(
62-
video_id: &str,
63-
language: &str,
64-
) -> Result<(String, String), Box<dyn Error>> {
6565
let request_body = format!(
6666
r#"{{
6767
"context": {{
6868
"client": {{
6969
"clientName": "WEB",
70-
"clientVersion": "2.20251113.00.00"
70+
"clientVersion": "{client_version}"
7171
}}
7272
}},
73-
"videoId": "{video_id}"
74-
}}"#
73+
"videoId": "{video_id}",
74+
"playbackContext": {{
75+
"contentPlaybackContext": {{
76+
"signatureTimestamp": {sts}
77+
}}
78+
}}
79+
}}"#,
80+
client_version = config.client_version,
81+
sts = config.signature_timestamp,
7582
);
7683

77-
let player_response = minreq::post(format!("https://www.youtube.com/youtubei/v1/player?prettyPrint=false&key={API_KEY}"))
78-
.with_header("User-Agent", USER_AGENT)
79-
.with_header("Referer", "https://www.youtube.com/")
80-
.with_body(request_body)
81-
.send()?;
84+
let api_url = format!(
85+
"https://www.youtube.com/youtubei/v1/player?prettyPrint=false&key={}",
86+
config.api_key
87+
);
88+
89+
let player_response = minreq::post(api_url)
90+
.with_header("User-Agent", USER_AGENT)
91+
.with_header("Referer", "https://www.youtube.com/")
92+
.with_body(request_body)
93+
.send()?;
8294

8395
let player_data: PlayerDataResponse = json::from_slice(player_response.as_bytes())?;
8496

8597
let video_title = player_data
8698
.video_details
87-
.ok_or("Video not found or server IP blocked by YouTube")?
99+
.ok_or("Video details not found")?
88100
.title;
89101

90102
let tracks = player_data
@@ -96,13 +108,86 @@ fn get_transcript_and_title(
96108
let track = select_best_track(&tracks, language)?;
97109

98110
let url = format!("{}&fmt=json3", track.base_url.replace("\\u0026", "&"));
99-
let caption_response: JsonCaptionResponse =
100-
json::from_slice(minreq::get(url).send()?.as_bytes())?;
111+
let caption_response: JsonCaptionResponse = json::from_slice(minreq::get(url).send()?.as_bytes())?;
112+
101113
let transcript = process_json_captions(caption_response.events);
102114

103115
Ok((transcript, video_title))
104116
}
105117

118+
fn fetch_player_config(video_id: &str) -> Result<PlayerConfig, Box<dyn Error>> {
119+
let page_url = format!("https://www.youtube.com/watch?v={video_id}");
120+
let page_response = minreq::get(&page_url)
121+
.with_header("User-Agent", USER_AGENT)
122+
.send()?;
123+
let page_html = page_response.as_str()?;
124+
125+
let js_path = extract_json_string_value(page_html, "jsUrl")
126+
.ok_or("Could not find jsUrl in video page")?;
127+
128+
let client_version = extract_json_string_value(page_html, "clientVersion")
129+
.ok_or("Could not find clientVersion")?
130+
.to_string();
131+
132+
let api_key = extract_json_string_value(page_html, "INNERTUBE_API_KEY")
133+
.ok_or("Could not find INNERTUBE_API_KEY")?
134+
.to_string();
135+
136+
let js_url = if js_path.starts_with("http") {
137+
js_path.to_string()
138+
} else {
139+
format!("https://www.youtube.com{js_path}")
140+
};
141+
142+
let js_response = minreq::get(&js_url)
143+
.with_header("User-Agent", USER_AGENT)
144+
.send()?;
145+
146+
let signature_timestamp = extract_signature_timestamp(js_response.as_str()?)
147+
.ok_or("Could not find signatureTimestamp in JS player")?;
148+
149+
Ok(PlayerConfig {
150+
client_version,
151+
signature_timestamp,
152+
api_key,
153+
})
154+
}
155+
156+
fn extract_json_string_value<'a>(text: &'a str, key: &str) -> Option<&'a str> {
157+
let search = format!("\"{}\":\"", key);
158+
let mut start = 0;
159+
while let Some(pos) = text[start..].find(&search) {
160+
let value_start = start + pos + search.len();
161+
if let Some(end_offset) = text[value_start..].find('"') {
162+
return Some(&text[value_start..value_start + end_offset]);
163+
}
164+
start = value_start;
165+
}
166+
None
167+
}
168+
169+
fn extract_signature_timestamp(js_code: &str) -> Option<u64> {
170+
for needle in &["signatureTimestamp:", "sts:"] {
171+
let mut search_from = 0;
172+
while let Some(pos) = js_code[search_from..].find(needle) {
173+
let abs_pos = search_from + pos + needle.len();
174+
175+
let digits: String = js_code[abs_pos..]
176+
.chars()
177+
.take_while(|c| c.is_ascii_digit())
178+
.collect();
179+
180+
if !digits.is_empty() {
181+
if let Ok(val) = digits.parse::<u64>() {
182+
return Some(val);
183+
}
184+
}
185+
search_from = abs_pos;
186+
}
187+
}
188+
None
189+
}
190+
106191
fn extract_video_id(url: &str) -> Option<&str> {
107192
const PATTERNS: &[&str] = &["v=", "/embed/", "/live/", "/v/", "/shorts/", "youtu.be/"];
108193

@@ -115,38 +200,19 @@ fn extract_video_id(url: &str) -> Option<&str> {
115200
None
116201
}
117202

118-
fn select_best_track<'a>(
119-
tracks: &'a [CaptionTrack],
120-
language: &str,
121-
) -> Result<&'a CaptionTrack, Box<dyn Error>> {
122-
// manual > punctuated ASR > plain ASR
123-
let mut best = None;
124-
let mut priority = 999;
125-
126-
for track in tracks {
127-
if track.language_code == language {
128-
let track_priority = if !track.base_url.contains("kind=asr") {
129-
0 // Manual
130-
} else if track.base_url.contains("variant=punctuated") {
131-
1 // Punctuated ASR
132-
} else {
133-
2 // Plain ASR
134-
};
135-
136-
if track_priority < priority {
137-
best = Some(track);
138-
priority = track_priority;
139-
if priority == 0 {
140-
break;
141-
} // Found manual, stop searching
142-
}
143-
}
144-
}
145-
146-
best.ok_or_else(|| {
147-
let available: Vec<_> = tracks.iter().map(|t| &t.language_code).collect();
148-
format!("No captions for '{language}'. Available: {available:?}").into()
149-
})
203+
fn select_best_track<'a>(tracks: &'a [CaptionTrack], language: &str) -> Result<&'a CaptionTrack, Box<dyn Error>> {
204+
tracks
205+
.iter()
206+
.filter(|t| t.language_code == language)
207+
.min_by_key(|t| {
208+
if !t.base_url.contains("kind=asr") { 0 }
209+
else if t.base_url.contains("variant=punctuated") { 1 }
210+
else { 2 }
211+
})
212+
.ok_or_else(|| {
213+
let available: Vec<_> = tracks.iter().map(|t| &t.language_code).collect();
214+
format!("No captions for '{language}'. Available: {available:?}").into()
215+
})
150216
}
151217

152218
fn process_json_captions(events: Vec<JsonCaptionEvent>) -> String {
@@ -167,4 +233,4 @@ fn process_json_captions(events: Vec<JsonCaptionEvent>) -> String {
167233
}
168234

169235
result
170-
}
236+
}

0 commit comments

Comments
 (0)