|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | | -"""Tests for STT data types, config, formatting, and audio pipeline.""" |
| 2 | +"""Tests for STT data types, config, and audio pipeline.""" |
3 | 3 |
|
4 | 4 | from __future__ import annotations |
5 | 5 |
|
|
19 | 19 | get_whisper_languages, |
20 | 20 | validate_language, |
21 | 21 | ) |
22 | | -from vllm_metal.stt.formatting import format_as_srt, format_as_vtt |
23 | 22 | from vllm_metal.stt.protocol import TranscriptionSegment |
24 | 23 |
|
25 | 24 | # =========================================================================== |
@@ -140,73 +139,6 @@ def test_default_values(self) -> None: |
140 | 139 | assert seg.no_speech_prob == 0.0 |
141 | 140 |
|
142 | 141 |
|
143 | | -# =========================================================================== |
144 | | -# Formatting (SRT / VTT) |
145 | | -# =========================================================================== |
146 | | - |
147 | | - |
148 | | -class TestFormatting: |
149 | | - """Tests for SRT and VTT subtitle formatting.""" |
150 | | - |
151 | | - @pytest.fixture() |
152 | | - def sample_segments(self) -> list[TranscriptionSegment]: |
153 | | - return [ |
154 | | - TranscriptionSegment( |
155 | | - id=0, |
156 | | - seek=0, |
157 | | - start=0.0, |
158 | | - end=2.5, |
159 | | - text=" Hello world.", |
160 | | - tokens=[1, 2, 3], |
161 | | - ), |
162 | | - TranscriptionSegment( |
163 | | - id=1, |
164 | | - seek=250, |
165 | | - start=2.5, |
166 | | - end=5.0, |
167 | | - text=" How are you?", |
168 | | - tokens=[4, 5, 6], |
169 | | - ), |
170 | | - ] |
171 | | - |
172 | | - def test_srt_format(self, sample_segments: list[TranscriptionSegment]) -> None: |
173 | | - srt = format_as_srt(sample_segments) |
174 | | - lines = srt.split("\n") |
175 | | - assert lines[0] == "1" |
176 | | - assert "00:00:00,000 --> 00:00:02,500" in lines[1] |
177 | | - assert "Hello world." in lines[2] |
178 | | - assert lines[4] == "2" |
179 | | - assert "00:00:02,500 --> 00:00:05,000" in lines[5] |
180 | | - |
181 | | - def test_vtt_format(self, sample_segments: list[TranscriptionSegment]) -> None: |
182 | | - vtt = format_as_vtt(sample_segments) |
183 | | - lines = vtt.split("\n") |
184 | | - assert lines[0] == "WEBVTT" |
185 | | - assert lines[1] == "" |
186 | | - assert "00:00:00.000 --> 00:00:02.500" in lines[2] |
187 | | - assert "Hello world." in lines[3] |
188 | | - |
189 | | - def test_srt_empty_segments(self) -> None: |
190 | | - assert format_as_srt([]) == "" |
191 | | - |
192 | | - def test_vtt_empty_segments(self) -> None: |
193 | | - vtt = format_as_vtt([]) |
194 | | - assert vtt.startswith("WEBVTT") |
195 | | - |
196 | | - def test_srt_long_timestamps(self) -> None: |
197 | | - seg = TranscriptionSegment( |
198 | | - id=0, |
199 | | - seek=0, |
200 | | - start=3661.123, |
201 | | - end=3665.456, |
202 | | - text=" One hour in.", |
203 | | - tokens=[1], |
204 | | - ) |
205 | | - srt = format_as_srt([seg]) |
206 | | - assert "01:01:01,123" in srt |
207 | | - assert "01:01:05,456" in srt |
208 | | - |
209 | | - |
210 | 142 | # =========================================================================== |
211 | 143 | # Audio pipeline (log_mel_spectrogram, _stft) |
212 | 144 | # =========================================================================== |
|
0 commit comments