Skip to content

Commit 2ec058c

Browse files
subtitles: support srt files and convert to vtt
1 parent 6bd4523 commit 2ec058c

File tree

4 files changed

+84
-13
lines changed

4 files changed

+84
-13
lines changed

cds/modules/deposit/ext.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,18 @@
2626

2727
import re
2828
import mimetypes
29+
import tempfile
30+
import os
31+
from io import BytesIO
2932

3033
from invenio_base.signals import app_loaded
3134
from invenio_db import db
32-
from invenio_files_rest.models import ObjectVersionTag
35+
from invenio_files_rest.models import ObjectVersion, ObjectVersionTag
3336
from invenio_files_rest.signals import file_uploaded
3437
from invenio_files_rest.errors import InvalidKeyError
3538
from invenio_indexer.signals import before_record_index
3639
from invenio_records_files.utils import sorted_files_from_bucket
40+
from srt_to_vtt import srt_to_vtt
3741

3842
from ..invenio_deposit.signals import post_action
3943
from .indexer import cdsdeposit_indexer_receiver
@@ -45,6 +49,59 @@
4549
)
4650

4751

52+
def _create_vtt_from_srt(srt_obj):
53+
"""Create a VTT file from an SRT file.
54+
55+
:param srt_obj: ObjectVersion of the SRT file
56+
:returns: ObjectVersion of the created VTT file or None
57+
"""
58+
# Generate VTT filename from SRT filename
59+
vtt_key = srt_obj.key.rsplit('.', 1)[0] + '.vtt'
60+
61+
# Check if VTT file already exists
62+
existing_vtt = ObjectVersion.get(srt_obj.bucket_id, vtt_key)
63+
if existing_vtt:
64+
# If it exists, skip
65+
return existing_vtt
66+
67+
# Ensure the SRT file has a file instance
68+
if not srt_obj.file or not srt_obj.file.uri:
69+
return None
70+
71+
srt_path = srt_obj.file.uri
72+
vtt_path = None
73+
try:
74+
# Create temporary VTT file for output
75+
with tempfile.NamedTemporaryFile(mode='w', suffix='.vtt', delete=False, encoding='utf-8') as vtt_file:
76+
vtt_path = vtt_file.name
77+
78+
# Convert using srt-to-vtt library
79+
srt_to_vtt(srt_path, vtt_path)
80+
81+
# Read the converted VTT content
82+
with open(vtt_path, 'rb') as f:
83+
vtt_content = f.read()
84+
85+
# Create VTT ObjectVersion
86+
vtt_obj = ObjectVersion.create(
87+
bucket=srt_obj.bucket,
88+
key=vtt_key,
89+
stream=BytesIO(vtt_content),
90+
size=len(vtt_content)
91+
)
92+
_create_tags(vtt_obj)
93+
return vtt_obj
94+
except (OSError, IOError, AttributeError, Exception):
95+
return None
96+
finally:
97+
# Clean up temporary VTT file
98+
if vtt_path and os.path.exists(vtt_path):
99+
try:
100+
os.unlink(vtt_path)
101+
except OSError:
102+
pass
103+
104+
48105
def _create_tags(obj):
49106
"""Create additional tags for file."""
50107
pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$")
@@ -53,6 +110,7 @@ def _create_tags(obj):
53110
# Get the media_type and content_type(file ext)
54111
file_name = obj.key
55112
mimetypes.add_type("subtitle/vtt", ".vtt")
113+
mimetypes.add_type("text/srt", ".srt")
56114
guessed_type = mimetypes.guess_type(file_name)[0]
57115
if guessed_type is None:
58116
raise InvalidKeyError(description=f"Unsupported File: {file_name}")
@@ -73,6 +131,12 @@ def _create_tags(obj):
73131
# other tags
74132
ObjectVersionTag.create_or_update(obj, "content_type", "vtt")
75133
ObjectVersionTag.create_or_update(obj, "context_type", "subtitle")
134+
elif file_ext == "srt":
135+
# Create VTT version from SRT
136+
try:
137+
_create_vtt_from_srt(obj)
138+
except Exception:
139+
pass
76140
# poster tag
77141
elif pattern_poster.match(file_name):
78142
ObjectVersionTag.create_or_update(obj, "context_type", "poster")

cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
148148
ngf-model-options="{allowInvalid: false}"
149149
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
150150
ngf-select=""
151-
ngf-pattern="'.vtt'"
152-
ngf-accept="'.vtt'"
151+
ngf-pattern="'.vtt,.srt'"
152+
ngf-accept="'.vtt,.srt'"
153153
ngf-validate-fn="$ctrl.validateSubtitles($file)"
154154
ngf-max-size="500GB"
155155
><i class="fa fa-plus-square"></i></a>
@@ -167,8 +167,8 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
167167
ng-if="!$ctrl.cdsDepositCtrl.isPublished()"
168168
ngf-select=""
169169
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
170-
ngf-pattern="'.vtt'"
171-
ngf-accept="'text/vtt'"
170+
ngf-pattern="'.vtt,.srt'"
171+
ngf-accept="'text/vtt,.vtt,.srt'"
172172
ngf-validate-fn="$ctrl.validateSubtitles($file)"
173173
ngf-max-size="500GB"
174174
ngf-multiple="true"
@@ -183,15 +183,15 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
183183
ngf-model-options="{allowInvalid: false}"
184184
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
185185
ngf-select=""
186-
ngf-pattern="'.vtt'"
187-
ngf-accept="'.vtt'"
186+
ngf-pattern="'.vtt,.srt'"
187+
ngf-accept="'.vtt,.srt'"
188188
ngf-validate-fn="$ctrl.validateSubtitles($file)"
189-
ngf-max-size="500GB">select</a> <mark>.vtt</mark> files.
189+
ngf-max-size="500GB">select</a> <mark>.vtt</mark> or <mark>.srt</mark> files.
190190
<hr class="my-10" />
191191
<div class="text-muted text-left">
192192
<h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
193193
<ul>
194-
<li>Subtitle filename should have a valid ISO language code. Example: <mark>subtitles_fr.vtt</mark> </li>
194+
<li>Subtitle filename should have a valid ISO language code. Example: <mark>subtitles_fr.vtt</mark> or <mark>subtitles_fr.srt</mark> </li>
195195
</ul>
196196
</div>
197197
</p>

cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,12 @@ function cdsUploaderCtrl(
284284

285285
// Filter out files without a valid MIME type or with zero size
286286
_files = _files.filter((file) => {
287-
if (!file.type || file.type.trim() === "") {
287+
// Allow SRT and VTT files even if they don't have a MIME type
288+
var fileName = file.name.toLowerCase();
289+
var isSubtitleFile =
290+
fileName.endsWith(".vtt") || fileName.endsWith(".srt");
291+
292+
if ((!file.type || file.type.trim() === "") && !isSubtitleFile) {
288293
toaster.pop(
289294
"warning",
290295
"Invalid File Type",
@@ -544,13 +549,14 @@ function cdsUploaderCtrl(
544549
this.validateSubtitles = function (_file) {
545550
// Check if the filename matches the pattern and is a valid ISO language
546551
// i.e. jessica_jones-en.vtt
547-
var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2}).vtt/) || [];
552+
var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2})\.(vtt|srt)/) || [];
548553
return match.length > 1 && match[1] in isoLanguages;
549554
};
550555

551556
this.validateAdditionalFiles = function (_file) {
552-
// If it's a .vtt file, validate as subtitle
553-
if (_file.name.toLowerCase().endsWith(".vtt")) {
557+
// If it's a .vtt or .srt file, validate as subtitle
558+
var fileName = _file.name.toLowerCase();
559+
if (fileName.endsWith(".vtt") || fileName.endsWith(".srt")) {
554560
return this.validateSubtitles(_file);
555561
}
556562
// Accept other types

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ simplekv==0.14.1
184184
six==1.17.0
185185
soupsieve==2.6
186186
speaklater==1.3
187+
srt-to-vtt==1.0.0
187188
SQLAlchemy==1.4.54
188189
SQLAlchemy-Continuum==1.4.1
189190
SQLAlchemy-Utils==0.38.3

0 commit comments

Comments
 (0)