-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinterlinearify.py
More file actions
98 lines (75 loc) · 3.87 KB
/
interlinearify.py
File metadata and controls
98 lines (75 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import argparse
from pathlib import Path
from pydub import AudioSegment
from pydub.effects import normalize
from model import load_model
from utils import align_matches
from timestamp_types import File
import json
# Load MMS languages for validation (if needed)
mms_languages = json.load(open("mms_languages.json"))
# Load the alignment model and dictionary
model, dictionary = load_model()
def validate_language(language):
"""Check if the language is supported for alignment."""
return any(lang["iso"] == language and lang["align"] for lang in mms_languages)
def process_pair(audio_path, text_path, language, separator='lineBreak'):
"""Process a single audio-text pair to get aligned timestamps."""
audio_filename = Path(audio_path).name
text_filename = Path(text_path).name
directory = str(Path(audio_path).parent)
files = [
(audio_filename, audio_path),
(text_filename, text_path)
]
matched_files = [(files[0], files[1])] # Manually pair audio and text
if not validate_language(language):
raise ValueError(f"Unsupported language: {language}")
return align_matches(directory, language, separator, matched_files, model, dictionary)
def combine_audio(audio1_path, timestamps1, audio2_path, timestamps2):
"""Combine audio segments from both pairs into an interlinear audio file."""
audio1 = AudioSegment.from_file(audio1_path)
audio2 = AudioSegment.from_file(audio2_path)
sections1 = sorted(timestamps1[0]['sections'], key=lambda x: int(x['verse_id'].split('.')[-1]))
sections2 = sorted(timestamps2[0]['sections'], key=lambda x: int(x['verse_id'].split('.')[-1]))
if len(sections1) != len(sections2):
raise ValueError("Mismatched number of sections between the two alignments")
combined = AudioSegment.empty()
for sec1, sec2 in zip(sections1, sections2):
# Extract timings and ensure they are within audio bounds
start1, end1 = [int(float(t) * 1000) for t in sec1['timings']]
start2, end2 = [int(float(t) * 1000) for t in sec2['timings']]
end1 = min(end1, len(audio1))
end2 = min(end2, len(audio2))
# Extract and normalize segments
seg1 = normalize(audio1[start1:end1])
seg2 = normalize(audio2[start2:end2])
combined += seg1 + seg2
return combined
def main():
parser = argparse.ArgumentParser(description="Generate interlinear audio from two language pairs.")
parser.add_argument('--audio1', required=True, help="Path to the first audio file")
parser.add_argument('--txt1', required=True, help="Path to the first text file")
parser.add_argument('--audio2', required=True, help="Path to the second audio file")
parser.add_argument('--txt2', required=True, help="Path to the second text file")
parser.add_argument('--language1', required=True, help="Language code for the first pair")
parser.add_argument('--language2', required=True, help="Language code for the second pair")
parser.add_argument('--output', default='interlinear.mp3', help="Output filename (default: interlinear.mp3)")
args = parser.parse_args()
try:
# Process each pair
print("Processing first language pair...")
timestamps1 = process_pair(args.audio1, args.txt1, args.language1)
print("Processing second language pair...")
timestamps2 = process_pair(args.audio2, args.txt2, args.language2)
print("Combining audio segments...")
final_audio = combine_audio(args.audio1, timestamps1, args.audio2, timestamps2)
# Export the result
output_path = args.output
final_audio.export(output_path, format="mp3")
print(f"✅ Interlinear audio saved to {output_path}")
except Exception as e:
print(f"❌ Error: {str(e)}")
exit(1)
if __name__ == "__main__":
main()