-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathtestSubtitles.js
More file actions
61 lines (49 loc) · 1.92 KB
/
testSubtitles.js
File metadata and controls
61 lines (49 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
function decodeXml(xml) {
return xml.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'");
}
async function getYoutubeTranscript(videoId, language = "en") {
const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
const html = await fetch(videoUrl).then(res => res.text());
const apiKeyMatch = html.match(/"INNERTUBE_API_KEY":"([^"]+)"/);
if (!apiKeyMatch) throw new Error("INNERTUBE_API_KEY not found.");
const apiKey = apiKeyMatch[1];
const playerData = await fetch(`https://www.youtube.com/youtubei/v1/player?key=${apiKey}`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
context: {
client: {
clientName: "WEB",
clientVersion: "2.20240401.01.00"
}
},
videoId
})
}).then(res => res.json());
console.log(playerData);
const tracks = playerData?.captions?.playerCaptionsTracklistRenderer?.captionTracks;
if (!tracks) throw new Error("No captions found.");
const track = tracks.find(t => t.languageCode === language);
if (!track) throw new Error(`No captions for language: ${language}`);
const baseUrl = track.baseUrl.replace(/&fmt=\w+$/, "");
const xml = await fetch(baseUrl).then(res => res.text());
const transcript = [];
const regex = /<text start="([^"]+)" dur="([^"]+)">(.+?)<\/text>/g;
const matches = xml.matchAll(regex);
for (const match of matches) {
const start = parseFloat(match[1]);
const duration = parseFloat(match[2]);
const caption = decodeXml(match[3]);
transcript.push({
caption,
startTime: start,
endTime: start + duration
});
}
return transcript;
}
console.log(await getYoutubeTranscript("X9BblS3qGaU", "en"));