Skip to content

Commit 283bfd9

Browse files
cfsmp3claude
andauthored
feat(mp4): add dvdsub/VobSub support to FFmpeg MP4 demuxer (#2269)
* feat(mp4): add dvdsub/VobSub support to FFmpeg MP4 demuxer Wire the existing vobsub_decoder (OCR-based bitmap subtitle decoder) into the FFmpeg MP4 demuxer path. Previously, dvdsub tracks in MP4 containers were documented as unsupported — GPAC could extract them but the FFmpeg path could not. Changes: - Add DvdSub track type detection (AV_CODEC_ID_DVD_SUBTITLE) in mp4.rs - Add C bridge functions (ccx_mp4_vobsub_init/process/free) that call the existing vobsub_decoder module - Buffer dvdsub packets to compute end times from next-packet PTS - Add --undefined linker flags for bridge symbols Tested on sample 1f3e951d516b.mp4 (dvdsub in MP4): - GPAC: 5405 bytes output - FFmpeg + this patch: 5405 bytes, byte-identical to GPAC With this change, the FFmpeg path extracts captions from every sample that GPAC can, plus one additional sample (ad9f9e03240e.m4v) that GPAC cannot handle. * style: apply clang-format to vobsub bridge --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e4443a7 commit 283bfd9

4 files changed

Lines changed: 159 additions & 7 deletions

File tree

src/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,9 @@ if (NOT WIN32 AND NOT APPLE)
307307
-Wl,--undefined=ccx_mp4_process_tx3g_packet
308308
-Wl,--undefined=ccx_mp4_flush_tx3g
309309
-Wl,--undefined=ccx_mp4_report_progress
310+
-Wl,--undefined=ccx_mp4_vobsub_init
311+
-Wl,--undefined=ccx_mp4_vobsub_process
312+
-Wl,--undefined=ccx_mp4_vobsub_free
310313
-Wl,--undefined=mprint
311314
-Wl,--undefined=update_decoder_list
312315
-Wl,--undefined=update_encoder_list)

src/lib_ccx/mp4_rust_bridge.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "ccx_encoders_mcc.h"
1616
#include "ccx_mp4.h"
1717
#include "mp4_rust_bridge.h"
18+
#include "vobsub_decoder.h"
1819

1920
/* Walk a length-prefixed AVCC/HVCC sample, invoking do_NAL() per NAL unit.
2021
* AVC and HEVC share the iteration; is_hevc only flips the decoder state and
@@ -280,4 +281,70 @@ void ccx_mp4_report_progress(struct lib_ccx_ctx *ctx, unsigned int cur, unsigned
280281
}
281282
}
282283

284+
/* ── VobSub / DVD subtitle bridge ───────────────────────────────── */
285+
286+
void *ccx_mp4_vobsub_init(void)
287+
{
288+
if (!vobsub_ocr_available())
289+
{
290+
mprint("VOBSUB to text conversion requires OCR support.\n"
291+
"Please rebuild CCExtractor with -DWITH_OCR=ON\n");
292+
return NULL;
293+
}
294+
return init_vobsub_decoder();
295+
}
296+
297+
int ccx_mp4_vobsub_process(void *vob_opaque, struct lib_ccx_ctx *ctx,
298+
unsigned char *data, unsigned int data_length,
299+
long long start_ms, long long end_ms,
300+
struct cc_subtitle *sub)
301+
{
302+
struct vobsub_ctx *vob_ctx = (struct vobsub_ctx *)vob_opaque;
303+
struct lib_cc_decode *dec_ctx = update_decoder_list(ctx);
304+
struct encoder_ctx *enc_ctx = update_encoder_list(ctx);
305+
306+
set_current_pts(dec_ctx->timing, start_ms * MPEG_CLOCK_FREQ / 1000);
307+
set_fts(dec_ctx->timing);
308+
309+
struct cc_subtitle vob_sub;
310+
memset(&vob_sub, 0, sizeof(vob_sub));
311+
312+
int ret = vobsub_decode_spu(vob_ctx, data, (size_t)data_length,
313+
start_ms, end_ms, &vob_sub);
314+
315+
if (ret == 0 && vob_sub.got_output)
316+
{
317+
encode_sub(enc_ctx, &vob_sub);
318+
sub->got_output = 1;
319+
320+
if (vob_sub.data)
321+
{
322+
struct cc_bitmap *rect = (struct cc_bitmap *)vob_sub.data;
323+
for (int j = 0; j < vob_sub.nb_data; j++)
324+
{
325+
if (rect[j].data0)
326+
free(rect[j].data0);
327+
if (rect[j].data1)
328+
free(rect[j].data1);
329+
#ifdef ENABLE_OCR
330+
if (rect[j].ocr_text)
331+
free(rect[j].ocr_text);
332+
#endif
333+
}
334+
free(vob_sub.data);
335+
}
336+
}
337+
338+
return ret;
339+
}
340+
341+
void ccx_mp4_vobsub_free(void *vob_opaque)
342+
{
343+
struct vobsub_ctx *vob_ctx = (struct vobsub_ctx *)vob_opaque;
344+
if (vob_ctx)
345+
{
346+
delete_vobsub_decoder(&vob_ctx);
347+
}
348+
}
349+
283350
#endif /* ENABLE_FFMPEG_MP4 */

src/lib_ccx/mp4_rust_bridge.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,17 @@ extern "C"
9191
*/
9292
void ccx_mp4_report_progress(struct lib_ccx_ctx *ctx, unsigned int cur, unsigned int total);
9393

94+
/*
95+
* VobSub (DVD subtitle) bridge.
96+
* Wraps vobsub_decoder.c for use from the Rust FFmpeg path.
97+
*/
98+
void *ccx_mp4_vobsub_init(void);
99+
int ccx_mp4_vobsub_process(void *vob_ctx, struct lib_ccx_ctx *ctx,
100+
unsigned char *data, unsigned int data_length,
101+
long long start_ms, long long end_ms,
102+
struct cc_subtitle *sub);
103+
void ccx_mp4_vobsub_free(void *vob_ctx);
104+
94105
#ifdef __cplusplus
95106
}
96107
#endif

src/rust/src/demuxer/mp4.rs

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,7 @@
1111
//! - `c708` subtitle (CEA-708 via ccdp)
1212
//! - `tx3g` / `mov_text` timed-text subtitles
1313
//!
14-
//! # Known limitations
15-
//! - **dvdsub / bitmap subtitles in MP4** are not supported. Samples such as
16-
//! `1f3e951d516b.mp4` contain `subp` tracks with DVD-style bitmap subtitles,
17-
//! which neither the GPAC backend nor this FFmpeg backend currently decodes.
18-
//! Extracting these requires rendering bitmaps and running OCR, which is out
19-
//! of scope for the MP4 demuxer itself; track it separately if needed.
14+
//! - `dvdsub` (DVD bitmap subtitles) via OCR through vobsub_decoder
2015
2116
#[cfg(feature = "enable_mp4_ffmpeg")]
2217
use rsmpeg::avformat::AVFormatContextInput;
@@ -67,6 +62,18 @@ extern "C" {
6762

6863
fn ccx_mp4_report_progress(ctx: *mut lib_ccx_ctx, cur: c_uint, total: c_uint);
6964

65+
fn ccx_mp4_vobsub_init() -> *mut std::ffi::c_void;
66+
fn ccx_mp4_vobsub_process(
67+
vob_ctx: *mut std::ffi::c_void,
68+
ctx: *mut lib_ccx_ctx,
69+
data: *mut u8,
70+
data_length: c_uint,
71+
start_ms: i64,
72+
end_ms: i64,
73+
sub: *mut cc_subtitle,
74+
) -> c_int;
75+
fn ccx_mp4_vobsub_free(vob_ctx: *mut std::ffi::c_void);
76+
7077
fn update_decoder_list(ctx: *mut lib_ccx_ctx) -> *mut lib_cc_decode;
7178
fn update_encoder_list(ctx: *mut lib_ccx_ctx) -> *mut encoder_ctx;
7279

@@ -83,6 +90,7 @@ enum TrackType {
8390
Cea608,
8491
Cea708,
8592
Tx3g,
93+
DvdSub,
8694
}
8795

8896
/// Information about a track we want to process
@@ -195,6 +203,8 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
195203
Some(TrackType::Cea708)
196204
} else if codec_tag == FOURCC_TX3G || codec_id == ffi::AV_CODEC_ID_MOV_TEXT {
197205
Some(TrackType::Tx3g)
206+
} else if codec_id == ffi::AV_CODEC_ID_DVD_SUBTITLE {
207+
Some(TrackType::DvdSub)
198208
} else {
199209
None
200210
}
@@ -209,6 +219,7 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
209219
TrackType::Cea608 => "CEA-608",
210220
TrackType::Cea708 => "CEA-708",
211221
TrackType::Tx3g => "tx3g",
222+
TrackType::DvdSub => "dvdsub",
212223
};
213224
let msg = format!(
214225
"Track {}, type={} timescale={}\n\0",
@@ -238,7 +249,7 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
238249
.filter(|t| {
239250
matches!(
240251
t.track_type,
241-
TrackType::Cea608 | TrackType::Cea708 | TrackType::Tx3g
252+
TrackType::Cea608 | TrackType::Cea708 | TrackType::Tx3g | TrackType::DvdSub
242253
)
243254
})
244255
.count();
@@ -279,13 +290,23 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
279290
}
280291
}
281292

293+
// Init vobsub decoder if we have dvdsub tracks
294+
let has_dvdsub = tracks.iter().any(|t| t.track_type == TrackType::DvdSub);
295+
let vob_ctx = if has_dvdsub {
296+
ccx_mp4_vobsub_init()
297+
} else {
298+
std::ptr::null_mut()
299+
};
300+
282301
// Read packets and dispatch
283302
let mut mp4_ret: c_int = 0;
284303
let mut pkt: ffi::AVPacket = std::mem::zeroed();
285304
ffi::av_init_packet(&mut pkt);
286305

287306
let mut packet_count: u32 = 0;
288307
let mut has_tx3g = false;
308+
let mut prev_dvdsub_pts: i64 = -1;
309+
let mut prev_dvdsub_data: Vec<u8> = Vec::new();
289310

290311
loop {
291312
let ret = ffi::av_read_frame(fmt_ctx.as_ptr() as *mut _, &mut pkt);
@@ -386,6 +407,37 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
386407
}
387408
}
388409
}
410+
TrackType::DvdSub => {
411+
if pkt.size > 0 && !pkt.data.is_null() && !vob_ctx.is_null() {
412+
let stream = *(*fmt_ctx.as_ptr()).streams.add(track.stream_index);
413+
let tb = (*stream).time_base;
414+
let cur_pts_ms = if pts != AV_NOPTS_VALUE && tb.den > 0 {
415+
pts * 1000 * tb.num as i64 / tb.den as i64
416+
} else {
417+
0
418+
};
419+
420+
// Flush previous dvdsub packet now that we know its end time
421+
if !prev_dvdsub_data.is_empty() && prev_dvdsub_pts >= 0 {
422+
let end_ms = cur_pts_ms;
423+
ccx_mp4_vobsub_process(
424+
vob_ctx,
425+
ctx,
426+
prev_dvdsub_data.as_mut_ptr(),
427+
prev_dvdsub_data.len() as c_uint,
428+
prev_dvdsub_pts,
429+
end_ms,
430+
sub,
431+
);
432+
mp4_ret = 1;
433+
}
434+
435+
// Buffer current packet
436+
let data_slice = std::slice::from_raw_parts(pkt.data, pkt.size as usize);
437+
prev_dvdsub_data = data_slice.to_vec();
438+
prev_dvdsub_pts = cur_pts_ms;
439+
}
440+
}
389441
}
390442
}
391443

@@ -402,6 +454,25 @@ pub unsafe fn processmp4_rust(ctx: *mut lib_ccx_ctx, path: &CStr, sub: *mut cc_s
402454
ccx_mp4_flush_tx3g(ctx, sub);
403455
}
404456

457+
// Flush last dvdsub packet (use 5s default duration)
458+
if !prev_dvdsub_data.is_empty() && prev_dvdsub_pts >= 0 && !vob_ctx.is_null() {
459+
let end_ms = prev_dvdsub_pts + 5000;
460+
ccx_mp4_vobsub_process(
461+
vob_ctx,
462+
ctx,
463+
prev_dvdsub_data.as_mut_ptr(),
464+
prev_dvdsub_data.len() as c_uint,
465+
prev_dvdsub_pts,
466+
end_ms,
467+
sub,
468+
);
469+
}
470+
471+
// Free vobsub decoder
472+
if !vob_ctx.is_null() {
473+
ccx_mp4_vobsub_free(vob_ctx);
474+
}
475+
405476
// End-of-stream: encode any caption that finished on the last processed
406477
// sample but hasn't been flushed by slice_header yet. GPAC's mp4.c does
407478
// the equivalent via encode_sub after its per-track loop returns.

0 commit comments

Comments
 (0)