Skip to content

Commit e82989f

Browse files
authored
Merge pull request #14 from EveryVoiceTTS/dev.ej/326
Several improvements to wav2vec2aligner system: early errors, testing, py 3.8 compatibility, etc
2 parents a7beb49 + 0f41adc commit e82989f

File tree

10 files changed

+176
-21
lines changed

10 files changed

+176
-21
lines changed

.coveragerc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[run]
2+
source_pkgs = aligner
3+
omit =
4+
*tmp*
5+
*/run_tests.py
6+
*/tests/*
7+
*/__main__.py
8+
9+
[report]
10+
precision = 2
11+
exclude_lines =
12+
pragma: no cover
13+
if 0:
14+
if __name__ == .__main__.:

.github/workflows/test.yml

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,38 @@ jobs:
1313
run:
1414
shell: bash -l {0}
1515
steps:
16-
- name: Checkout repository
17-
uses: actions/checkout@v4
18-
- name: Set up Python
19-
uses: actions/setup-python@v5
16+
- uses: actions/checkout@v4
17+
18+
- uses: actions/setup-python@v5
2019
with:
2120
python-version: "3.8"
2221
cache: "pip"
22+
23+
- uses: FedericoCarboni/setup-ffmpeg@v2
24+
2325
- name: Install dependencies and package
24-
run: pip install -e . mypy
25-
- name: Minimal test, --help should work
26-
run: ctc-segmenter --help
27-
- name: Code quality test, mypy should pass
26+
run: pip install -e . mypy coverage
27+
28+
- name: Minimal code quality test, mypy should pass
2829
run: mypy aligner
30+
31+
- uses: actions/cache@v4
32+
with:
33+
path: /home/runner/.cache/torch
34+
key: torch-cache
35+
36+
- name: Run unit tests
37+
run: |
38+
coverage run -m unittest discover aligner.tests -v
39+
coverage xml
40+
41+
- name: Upload coverage report to Codecov
42+
uses: codecov/codecov-action@v4
43+
with:
44+
token: ${{ secrets.CODECOV_TOKEN }}
45+
46+
- run: coverage report
47+
2948
- name: Make sure the CLI stays fast
3049
id: cli-load-time
3150
run: |
@@ -44,6 +63,7 @@ jobs:
4463
echo "Please run 'PYTHONPROFILEIMPORTTIME=1 ctc-segmenter -h 2> importtime.txt; tuna importtime.txt' and tuck away expensive imports so that the CLI doesn't load them until it uses them."; \
4564
false; \
4665
fi
66+
4767
- name: Report help speed in PR
4868
if: github.event_name == 'pull_request'
4969
uses: mshick/add-pr-comment@v2

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
*.egg-info
22
__pycache__
33
*.wav
4-
*.TextGrid
4+
*.TextGrid.coverage

aligner/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .cli import app
2+
3+
app()

aligner/cli.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,14 @@ def align_single(
134134
),
135135
debug: bool = typer.Option(False, help="Print debug statements"),
136136
):
137-
print("loading model...")
137+
# Do fast error checking before loading expensive dependencies
138+
sentence_list = read_text(text_path)
139+
if not sentence_list or not any(sentence_list):
140+
raise typer.BadParameter(
141+
f"TEXT_PATH file '{text_path}' is empty; it should contain sentences to align.",
142+
)
143+
144+
print("loading pytorch...")
138145
import torch
139146
import torchaudio
140147

@@ -155,7 +162,6 @@ def align_single(
155162
audio_path = Path(fn + f"-{sample_rate}-mono" + ext)
156163
torchaudio.save(str(audio_path), wav, sample_rate)
157164
print("processing text")
158-
sentence_list = read_text(text_path)
159165
transducer = create_transducer("".join(sentence_list), labels, debug)
160166
text_hash = TextHash(sentence_list, transducer)
161167
print("performing alignment")
@@ -180,7 +186,3 @@ def align_single(
180186
tg_path = audio_path.with_suffix(".TextGrid")
181187
print(f"writing file to {tg_path}")
182188
tg.to_file(tg_path)
183-
184-
185-
if __name__ == "__main__":
186-
align_single()

aligner/heavy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def compute_alignments(
7979
token_index += 1
8080
frames.append(Frame(token_index, i, score))
8181
prev_hyp = ali
82-
words_to_match = [v | {"key": k} for k, v in transcript_hash.items() if "w" in k]
82+
words_to_match = [{**v, "key": k} for k, v in transcript_hash.items() if "w" in k]
8383
i1, i2 = 0, 0
8484
segments = []
8585
while i1 < len(frames):

aligner/tests/__init__.py

Whitespace-only changes.

aligner/tests/test_cli.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
Run wav2vec2aligner unit tests.
3+
How to run this test suite:
4+
If you installed wav2vec2aligner:
5+
python -m unittest aligner.tests.test_cli
6+
If you installed everyvoice:
7+
python -m unittest everyvoice.model.aligner.wav2vec2aligner.aligner.tests.test_cli
8+
"""
9+
10+
import os
11+
import subprocess
12+
import tempfile
13+
from pathlib import Path
14+
from unittest import TestCase
15+
16+
from typer.testing import CliRunner
17+
18+
from ..classes import Segment
19+
from ..cli import app, complete_path
20+
21+
22+
class CLITest(TestCase):
23+
def setUp(self) -> None:
24+
self.runner = CliRunner()
25+
26+
def test_main_help(self):
27+
for help in "-h", "--help":
28+
with self.subTest(help=help):
29+
result = self.runner.invoke(app, [help])
30+
self.assertEqual(result.exit_code, 0)
31+
self.assertIn("align", result.stdout)
32+
self.assertIn("extract", result.stdout)
33+
34+
def test_sub_help(self):
35+
for cmd in "align", "extract":
36+
for help in "-h", "--help":
37+
with self.subTest(cmd=cmd, help=help):
38+
result = self.runner.invoke(app, [cmd, help])
39+
self.assertEqual(result.exit_code, 0)
40+
self.assertIn("Usage:", result.stdout)
41+
self.assertIn(cmd, result.stdout)
42+
43+
def test_align_empty_file(self):
44+
with self.subTest("empty file"):
45+
result = self.runner.invoke(app, ["align", os.devnull, os.devnull])
46+
self.assertNotEqual(result.exit_code, 0)
47+
self.assertIn("is empty", result.stdout)
48+
49+
with self.subTest("file with only empty lines"):
50+
with tempfile.TemporaryDirectory() as tmpdir:
51+
textfile = os.path.join(tmpdir, "emptylines.txt")
52+
with open(textfile, "w", encoding="utf8") as f:
53+
f.write("\n \n \n")
54+
result = self.runner.invoke(app, ["align", textfile, os.devnull])
55+
self.assertNotEqual(result.exit_code, 0)
56+
self.assertIn("is empty", result.stdout)
57+
58+
def fetch_ras_test_file(self, filename, outputdir):
59+
from urllib.request import Request, urlopen
60+
61+
repo, path = "https://github.com/ReadAlongs/Studio/", "/test/data/"
62+
request = Request(repo + "raw/refs/heads/main" + path + filename)
63+
request.add_header("Referer", repo + "blob/main" + path + filename)
64+
response = urlopen(request)
65+
with open(os.path.join(outputdir, filename), "wb") as f:
66+
f.write(response.read())
67+
68+
def test_align_something(self):
69+
with tempfile.TemporaryDirectory() as tmpdir:
70+
tmppath = Path(tmpdir)
71+
self.fetch_ras_test_file("ej-fra.txt", tmpdir)
72+
txt = tmppath / "ej-fra.txt"
73+
self.fetch_ras_test_file("ej-fra.m4a", tmpdir)
74+
m4a = tmppath / "ej-fra.m4a"
75+
wav = tmppath / "ej-fra.wav"
76+
# Under most circumstances, align can take a .m4a input file, but not
77+
# in CI. Since it's not a hard requirement, so just convert to .wav.
78+
subprocess.run(["ffmpeg", "-i", m4a, wav], capture_output=True)
79+
# os.system("ls -la " + tmpdir)
80+
textgrid = tmppath / "ej-fra-16000.TextGrid"
81+
wav_out = tmppath / "ej-fra-16000.wav"
82+
83+
with self.subTest("ctc-segmenter align"):
84+
result = self.runner.invoke(app, ["align", str(txt), str(wav)])
85+
if result.exit_code != 0:
86+
os.system("ls -la " + tmpdir)
87+
print(result.stdout)
88+
self.assertEqual(result.exit_code, 0)
89+
self.assertTrue(textgrid.exists())
90+
self.assertTrue(wav_out.exists())
91+
92+
with self.subTest("ctc-segmenter extract"):
93+
result = self.runner.invoke(
94+
app, ["extract", str(textgrid), str(wav_out), str(tmppath / "out")]
95+
)
96+
if result.exit_code != 0:
97+
print(result.stdout)
98+
self.assertEqual(result.exit_code, 0)
99+
self.assertTrue((tmppath / "out/metadata.psv").exists())
100+
with open(txt, encoding="utf8") as txt_f:
101+
non_blank_line_count = sum(1 for line in txt_f if line.strip())
102+
for i in range(non_blank_line_count):
103+
self.assertTrue((tmppath / f"out/wavs/segment{i}.wav"))
104+
105+
106+
class MiscTests(TestCase):
107+
def test_shell_complete(self):
108+
self.assertEqual(complete_path(), [])
109+
self.assertEqual(complete_path(None, None, None), [])
110+
111+
def test_segment(self):
112+
segment = Segment("text", 500, 700, 0.42)
113+
self.assertEqual(len(segment), 200)
114+
self.assertEqual(repr(segment), "text (0.42): [ 500, 700)")

aligner/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def create_transducer(text, labels_dictionary, debug=False):
3232
if char not in allowable_chars and char not in fallback_mapping:
3333
fallback_mapping[char] = ""
3434
for k in fallback_mapping.keys():
35-
if debug:
35+
if debug: # pragma: no cover
3636
print(
3737
f"Found {k} which is not modelled by Wav2Vec2; skipping for alignment"
3838
)
@@ -49,7 +49,7 @@ def create_transducer(text, labels_dictionary, debug=False):
4949

5050

5151
def read_text(text_path):
52-
with open(text_path) as f:
52+
with open(text_path, encoding="utf8") as f:
5353
return [x.strip() for x in f]
5454

5555

requirements.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
torch>=2.1.0
2-
torchaudio>=2.1.0
31
g2p>=1.0.20230417
2+
pydub>=0.23.1
43
pympi-ling
5-
typer>=0.9.0
64
rich>=10.11.0
75
shellingham>=1.3.0
6+
soundfile>=0.10.2
7+
torch>=2.1.0
8+
torchaudio>=2.1.0
9+
typer>=0.9.0

0 commit comments

Comments
 (0)