Skip to content

Commit 276bfb1

Browse files
Add subtitle processing to media download (#164)
Co-authored-by: Harrison Fisher <HarrisonFisher@users.noreply.github.com>
1 parent 682ddb0 commit 276bfb1

2 files changed

Lines changed: 76 additions & 4 deletions

File tree

docs/media/download.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ The `/v1/BETA/media/download` endpoint provides a powerful interface for downloa
7373
"subtitles": {
7474
"download": boolean, // Whether to download subtitles
7575
"languages": ["string"], // Array of language codes for subtitles
76-
"formats": ["string"] // Array of subtitle formats to download
76+
"format": "string", // Subtitle format to download (e.g., 'srt', 'vtt', 'json3')
77+
"cloud_upload": boolean // Whether to upload subtitles to cloud storage (defaults to true)
7778
}
7879
```
7980

@@ -106,6 +107,12 @@ The `/v1/BETA/media/download` endpoint provides a powerful interface for downloa
106107
},
107108
"thumbnails": {
108109
"download": true
110+
},
111+
"subtitles": {
112+
"download": true,
113+
"languages": ["en", "es-419"],
114+
"format": "srt",
115+
"cloud_upload": true
109116
}
110117
}
111118
```
@@ -133,6 +140,12 @@ curl -X POST \
133140
},
134141
"thumbnails": {
135142
"download": true
143+
},
144+
"subtitles": {
145+
"download": true,
146+
"languages": ["en", "es-419"],
147+
"format": "srt",
148+
"cloud_upload": true
136149
}
137150
}'
138151
```

routes/v1/media/download.py

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from services.authentication import authenticate
1111
from services.file_management import download_file
1212
from urllib.parse import quote, urlparse
13+
import requests
1314

1415
v1_media_download_bp = Blueprint('v1_media_download', __name__)
1516
logger = logging.getLogger(__name__)
@@ -57,7 +58,15 @@
5758
"properties": {
5859
"download": {"type": "boolean"},
5960
"languages": {"type": "array", "items": {"type": "string"}},
60-
"formats": {"type": "array", "items": {"type": "string"}}
61+
"format": {
62+
"type": "string",
63+
"enum": [
64+
"srt", # SubRip Subtitle (most common)
65+
"vtt", # Web Video Text Tracks
66+
"json3" # YouTube's JSON format
67+
]
68+
},
69+
"cloud_upload": {"type": "boolean"}
6170
}
6271
},
6372
"download": {
@@ -150,8 +159,8 @@ def download_media(job_id, data):
150159
ydl_opts['writesubtitles'] = subtitle_options.get('download', False)
151160
if subtitle_options.get('languages'):
152161
ydl_opts['subtitleslangs'] = subtitle_options['languages']
153-
if subtitle_options.get('formats'):
154-
ydl_opts['subtitlesformat'] = subtitle_options['formats']
162+
if subtitle_options.get('format'):
163+
ydl_opts['subtitlesformat'] = subtitle_options['format']
155164

156165
# Add download options if specified
157166
if download_options:
@@ -222,6 +231,56 @@ def download_media(job_id, data):
222231
except Exception as e:
223232
logger.error(f"Error processing thumbnail: {str(e)}")
224233
continue
234+
235+
# Process subtitles if available
236+
if 'subtitles' in info and subtitle_options.get('download', False):
237+
logger.info(f"Job {job_id}: Found subtitles in info: {info['subtitles']}")
238+
response["subtitles"] = {} # Changed from array to object
239+
requested_languages = subtitle_options.get('languages', [])
240+
requested_format = subtitle_options.get('format', 'srt')
241+
subtitle_cloud_upload = subtitle_options.get('cloud_upload', True) # Default to True
242+
243+
# If no languages specified, use all available languages
244+
if not requested_languages:
245+
requested_languages = list(info['subtitles'].keys())
246+
logger.info(f"Job {job_id}: No languages specified, using all available: {requested_languages}")
247+
248+
for lang, subtitle_list in info['subtitles'].items():
249+
# Skip if language not in requested list
250+
if lang not in requested_languages:
251+
continue
252+
253+
try:
254+
logger.info(f"Job {job_id}: Processing subtitle for language {lang}")
255+
# Find the requested format
256+
subtitle_data = None
257+
for subtitle in subtitle_list:
258+
if subtitle['ext'] == requested_format:
259+
subtitle_data = subtitle
260+
break
261+
262+
if not subtitle_data:
263+
logger.warning(f"Job {job_id}: Requested format {requested_format} not available for {lang}")
264+
continue
265+
266+
# If cloud upload is requested, download and upload the subtitle
267+
if subtitle_cloud_upload:
268+
try:
269+
subtitle_path = download_file(subtitle_data['url'], temp_dir)
270+
cloud_url = upload_file(subtitle_path)
271+
subtitle_data['url'] = cloud_url
272+
except Exception as e:
273+
logger.warning(f"Job {job_id}: Failed to download subtitle for {lang}: {str(e)}")
274+
continue
275+
276+
# Add subtitle data to response using language code as key
277+
response["subtitles"][lang] = subtitle_data
278+
logger.info(f"Job {job_id}: Successfully processed subtitle for {lang}")
279+
except Exception as e:
280+
logger.error(f"Job {job_id}: Error processing subtitle: {str(e)}")
281+
continue
282+
else:
283+
logger.info(f"Job {job_id}: No subtitles found in info or download not requested")
225284

226285
return response, "/v1/media/download", 200
227286

0 commit comments

Comments
 (0)