1- use miniserde:: { Deserialize , json } ;
1+ use miniserde:: { json , Deserialize } ;
22use std:: error:: Error ;
33
44#[ derive( Deserialize ) ]
@@ -48,43 +48,55 @@ struct CaptionSegment {
4848 utf8 : String ,
4949}
5050
51- const USER_AGENT : & str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36" ;
52- const API_KEY : & str = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" ;
51+ const USER_AGENT : & str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36" ;
52+
53+ struct PlayerConfig {
54+ client_version : String ,
55+ signature_timestamp : u64 ,
56+ api_key : String ,
57+ }
5358
5459pub fn get_video_data ( video_url : & str , language : & str ) -> Result < ( String , String ) , Box < dyn Error > > {
55- let video_id =
56- extract_video_id ( video_url ) . ok_or_else ( || format ! ( "Invalid YouTube URL: {video_url}" ) ) ?;
60+ let video_id = extract_video_id ( video_url )
61+ . ok_or_else ( || format ! ( "Invalid YouTube URL: {video_url}" ) ) ?;
5762
58- get_transcript_and_title ( video_id, language)
59- }
63+ let config = fetch_player_config ( video_id) ?;
6064
61- fn get_transcript_and_title (
62- video_id : & str ,
63- language : & str ,
64- ) -> Result < ( String , String ) , Box < dyn Error > > {
6565 let request_body = format ! (
6666 r#"{{
6767 "context": {{
6868 "client": {{
6969 "clientName": "WEB",
70- "clientVersion": "2.20251113.00.00 "
70+ "clientVersion": "{client_version} "
7171 }}
7272 }},
73- "videoId": "{video_id}"
74- }}"#
73+ "videoId": "{video_id}",
74+ "playbackContext": {{
75+ "contentPlaybackContext": {{
76+ "signatureTimestamp": {sts}
77+ }}
78+ }}
79+ }}"# ,
80+ client_version = config. client_version,
81+ sts = config. signature_timestamp,
7582 ) ;
7683
77- let player_response = minreq:: post ( format ! ( "https://www.youtube.com/youtubei/v1/player?prettyPrint=false&key={API_KEY}" ) )
78- . with_header ( "User-Agent" , USER_AGENT )
79- . with_header ( "Referer" , "https://www.youtube.com/" )
80- . with_body ( request_body)
81- . send ( ) ?;
84+ let api_url = format ! (
85+ "https://www.youtube.com/youtubei/v1/player?prettyPrint=false&key={}" ,
86+ config. api_key
87+ ) ;
88+
89+ let player_response = minreq:: post ( api_url)
90+ . with_header ( "User-Agent" , USER_AGENT )
91+ . with_header ( "Referer" , "https://www.youtube.com/" )
92+ . with_body ( request_body)
93+ . send ( ) ?;
8294
8395 let player_data: PlayerDataResponse = json:: from_slice ( player_response. as_bytes ( ) ) ?;
8496
8597 let video_title = player_data
8698 . video_details
87- . ok_or ( "Video not found or server IP blocked by YouTube " ) ?
99+ . ok_or ( "Video details not found" ) ?
88100 . title ;
89101
90102 let tracks = player_data
@@ -96,13 +108,86 @@ fn get_transcript_and_title(
96108 let track = select_best_track ( & tracks, language) ?;
97109
98110 let url = format ! ( "{}&fmt=json3" , track. base_url. replace( "\\ u0026" , "&" ) ) ;
99- let caption_response: JsonCaptionResponse =
100- json :: from_slice ( minreq :: get ( url ) . send ( ) ? . as_bytes ( ) ) ? ;
111+ let caption_response: JsonCaptionResponse = json :: from_slice ( minreq :: get ( url ) . send ( ) ? . as_bytes ( ) ) ? ;
112+
101113 let transcript = process_json_captions ( caption_response. events ) ;
102114
103115 Ok ( ( transcript, video_title) )
104116}
105117
118+ fn fetch_player_config ( video_id : & str ) -> Result < PlayerConfig , Box < dyn Error > > {
119+ let page_url = format ! ( "https://www.youtube.com/watch?v={video_id}" ) ;
120+ let page_response = minreq:: get ( & page_url)
121+ . with_header ( "User-Agent" , USER_AGENT )
122+ . send ( ) ?;
123+ let page_html = page_response. as_str ( ) ?;
124+
125+ let js_path = extract_json_string_value ( page_html, "jsUrl" )
126+ . ok_or ( "Could not find jsUrl in video page" ) ?;
127+
128+ let client_version = extract_json_string_value ( page_html, "clientVersion" )
129+ . ok_or ( "Could not find clientVersion" ) ?
130+ . to_string ( ) ;
131+
132+ let api_key = extract_json_string_value ( page_html, "INNERTUBE_API_KEY" )
133+ . ok_or ( "Could not find INNERTUBE_API_KEY" ) ?
134+ . to_string ( ) ;
135+
136+ let js_url = if js_path. starts_with ( "http" ) {
137+ js_path. to_string ( )
138+ } else {
139+ format ! ( "https://www.youtube.com{js_path}" )
140+ } ;
141+
142+ let js_response = minreq:: get ( & js_url)
143+ . with_header ( "User-Agent" , USER_AGENT )
144+ . send ( ) ?;
145+
146+ let signature_timestamp = extract_signature_timestamp ( js_response. as_str ( ) ?)
147+ . ok_or ( "Could not find signatureTimestamp in JS player" ) ?;
148+
149+ Ok ( PlayerConfig {
150+ client_version,
151+ signature_timestamp,
152+ api_key,
153+ } )
154+ }
155+
156+ fn extract_json_string_value < ' a > ( text : & ' a str , key : & str ) -> Option < & ' a str > {
157+ let search = format ! ( "\" {}\" :\" " , key) ;
158+ let mut start = 0 ;
159+ while let Some ( pos) = text[ start..] . find ( & search) {
160+ let value_start = start + pos + search. len ( ) ;
161+ if let Some ( end_offset) = text[ value_start..] . find ( '"' ) {
162+ return Some ( & text[ value_start..value_start + end_offset] ) ;
163+ }
164+ start = value_start;
165+ }
166+ None
167+ }
168+
169+ fn extract_signature_timestamp ( js_code : & str ) -> Option < u64 > {
170+ for needle in & [ "signatureTimestamp:" , "sts:" ] {
171+ let mut search_from = 0 ;
172+ while let Some ( pos) = js_code[ search_from..] . find ( needle) {
173+ let abs_pos = search_from + pos + needle. len ( ) ;
174+
175+ let digits: String = js_code[ abs_pos..]
176+ . chars ( )
177+ . take_while ( |c| c. is_ascii_digit ( ) )
178+ . collect ( ) ;
179+
180+ if !digits. is_empty ( ) {
181+ if let Ok ( val) = digits. parse :: < u64 > ( ) {
182+ return Some ( val) ;
183+ }
184+ }
185+ search_from = abs_pos;
186+ }
187+ }
188+ None
189+ }
190+
106191fn extract_video_id ( url : & str ) -> Option < & str > {
107192 const PATTERNS : & [ & str ] = & [ "v=" , "/embed/" , "/live/" , "/v/" , "/shorts/" , "youtu.be/" ] ;
108193
@@ -115,38 +200,19 @@ fn extract_video_id(url: &str) -> Option<&str> {
115200 None
116201}
117202
118- fn select_best_track < ' a > (
119- tracks : & ' a [ CaptionTrack ] ,
120- language : & str ,
121- ) -> Result < & ' a CaptionTrack , Box < dyn Error > > {
122- // manual > punctuated ASR > plain ASR
123- let mut best = None ;
124- let mut priority = 999 ;
125-
126- for track in tracks {
127- if track. language_code == language {
128- let track_priority = if !track. base_url . contains ( "kind=asr" ) {
129- 0 // Manual
130- } else if track. base_url . contains ( "variant=punctuated" ) {
131- 1 // Punctuated ASR
132- } else {
133- 2 // Plain ASR
134- } ;
135-
136- if track_priority < priority {
137- best = Some ( track) ;
138- priority = track_priority;
139- if priority == 0 {
140- break ;
141- } // Found manual, stop searching
142- }
143- }
144- }
145-
146- best. ok_or_else ( || {
147- let available: Vec < _ > = tracks. iter ( ) . map ( |t| & t. language_code ) . collect ( ) ;
148- format ! ( "No captions for '{language}'. Available: {available:?}" ) . into ( )
149- } )
203+ fn select_best_track < ' a > ( tracks : & ' a [ CaptionTrack ] , language : & str ) -> Result < & ' a CaptionTrack , Box < dyn Error > > {
204+ tracks
205+ . iter ( )
206+ . filter ( |t| t. language_code == language)
207+ . min_by_key ( |t| {
208+ if !t. base_url . contains ( "kind=asr" ) { 0 }
209+ else if t. base_url . contains ( "variant=punctuated" ) { 1 }
210+ else { 2 }
211+ } )
212+ . ok_or_else ( || {
213+ let available: Vec < _ > = tracks. iter ( ) . map ( |t| & t. language_code ) . collect ( ) ;
214+ format ! ( "No captions for '{language}'. Available: {available:?}" ) . into ( )
215+ } )
150216}
151217
152218fn process_json_captions ( events : Vec < JsonCaptionEvent > ) -> String {
@@ -167,4 +233,4 @@ fn process_json_captions(events: Vec<JsonCaptionEvent>) -> String {
167233 }
168234
169235 result
170- }
236+ }
0 commit comments