Skip to content

Commit e800984

Browse files
committed
fix(srt): ignore embedded bom markers in subtitle input
Summary: - Strip BOM characters from normalized input to avoid malformed SRT cue indices - Add fixture test coverage for an embedded-BOM SRT sample file - Add edge-case test proving embedded BOM does not break cue parsing
1 parent 41d9dc9 commit e800984

3 files changed

Lines changed: 30 additions & 2 deletions

File tree

Sources/SubtitleKit/Utilities/SubtitleNormalizer.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ enum SubtitleNormalizer {
1515
}
1616

1717
static func stripByteOrderMark(_ text: String) -> String {
18-
guard text.unicodeScalars.first == "\u{FEFF}" else { return text }
19-
return String(text.unicodeScalars.dropFirst())
18+
text.replacingOccurrences(of: "\u{FEFF}", with: "")
2019
}
2120

2221
static func inferLineEnding(_ text: String) -> LineEnding {

Tests/SubtitleKitTests/FixtureBasedTests.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@ import Testing
44

55
@Suite("Fixture Parsing")
66
struct FixtureParsingTests {
7+
@Test("Parses embedded BOM SRT fixture")
8+
func parseEmbeddedBOMSRTFixture() throws(any Error) {
9+
let content = try FixtureSupport.fixtureText(
10+
"embedded-bom-srt-fixture",
11+
ext: "srt"
12+
)
13+
let subtitle = try Subtitle.parse(content, options: .init(format: .srt))
14+
#expect(!subtitle.cues.isEmpty)
15+
}
16+
717
@Test("Parses real fixtures", arguments: FixtureSupport.fixtureFormatNames)
818
func parseFixture(for formatName: String) throws(any Error) {
919
let format = FixtureSupport.format(named: formatName)
@@ -56,6 +66,14 @@ struct EdgeCaseTests {
5666
#expect(output.contains("\r\n"))
5767
}
5868

69+
@Test("Handles embedded BOM inside SRT stream")
70+
func embeddedBOMInSRTStream() throws(any Error) {
71+
let input = "0\n00:00:00,000 --> 00:00:01,000\nLead in\n\n\u{FEFF}1\n00:00:01,500 --> 00:00:03,000\nHello\n"
72+
let subtitle = try Subtitle.parse(input, options: .init(format: .srt))
73+
#expect(subtitle.cues.count == 2)
74+
#expect(subtitle.cues[1].startTime == 1500)
75+
}
76+
5977
@Test("Throws on malformed timestamp")
6078
func malformedTimestampThrows() {
6179
let broken = "1\n00:AB:00,500 --> 00:00:02,000\nHello\n"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
0
2+
00:00:00,000 --> 00:00:01,500
3+
Synthetic fixture header
4+
5+
1
6+
00:00:01,600 --> 00:00:03,000
7+
<i>Embedded BOM cue line.</i>
8+
9+
2
10+
00:00:03,100 --> 00:00:04,500
11+
<i>Follow-up cue line.</i>

0 commit comments

Comments
 (0)