-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
282 lines (250 loc) · 8.38 KB
/
main.py
File metadata and controls
282 lines (250 loc) · 8.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/usr/bin/env python3
# main.py
"""
whispr - CLI Entry Point
Description:
Command-line interface for the modular transcription system.
Supports local files, YouTube URLs, and generic HTTP URLs.
Multiple backends: whisper.cpp, faster-whisper, OpenAI API.
Created By : Franck FERMAN
Version : 2.0.0
Usage examples:
python main.py --config config.json
python main.py --url https://youtube.com/watch?v=... --backend whisper_cpp --chunks 4 --language fr
python main.py --file video.mp4 --backend faster_whisper --workers 2
python main.py --file audio.wav --backend openai
python main.py --dry-run --url https://... --backend whisper_cpp
"""
import argparse
import sys
from transcriber.config import TranscriptionConfig
from transcriber.logger import setup_logging
from transcriber.managers.transcription import TranscriptionManager
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="main.py",
description="Modular video/audio transcription system with multi-backend support.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
examples:
Load full config from file:
python main.py --config config.json
Transcribe a YouTube video with whisper.cpp, 4 workers, French:
python main.py --url https://youtube.com/watch?v=... --backend whisper_cpp --workers 4 --language fr
Transcribe a local video with faster-whisper, 2 parallel workers:
python main.py --file video.mp4 --backend faster_whisper --workers 2
Transcribe a local audio file via OpenAI API:
python main.py --file audio.wav --backend openai --openai-key sk-...
Dry run (no output written):
python main.py --dry-run --file audio.mp3 --backend whisper_cpp
""",
)
# ---- Input ----
input_group = parser.add_mutually_exclusive_group()
input_group.add_argument(
"--file", "-f",
metavar="PATH",
help="Path to a local audio/video file.",
)
input_group.add_argument(
"--url", "-u",
metavar="URL",
help="URL to a YouTube video or direct audio/video URL.",
)
# ---- Config file ----
parser.add_argument(
"--config", "-c",
metavar="FILE",
help="Path to a JSON configuration file. CLI arguments override config file values.",
)
# ---- Backend ----
parser.add_argument(
"--backend", "-b",
choices=["whisper_cpp", "faster_whisper", "openai"],
metavar="BACKEND",
help="Transcription backend: whisper_cpp | faster_whisper | openai (default: whisper_cpp).",
)
parser.add_argument(
"--fallback-backend",
choices=["whisper_cpp", "faster_whisper", "openai"],
metavar="BACKEND",
help="Fallback backend if the primary backend fails all retries.",
)
# ---- Backend-specific ----
parser.add_argument(
"--whisper-binary",
metavar="PATH",
help="Path to the whisper.cpp binary (default: 'whisper').",
)
parser.add_argument(
"--whisper-model",
metavar="PATH",
help="Path to the GGML model file for whisper.cpp.",
)
parser.add_argument(
"--fw-model",
metavar="NAME",
help="faster-whisper model size (tiny/base/small/medium/large-v2, default: base).",
)
parser.add_argument(
"--fw-device",
choices=["cpu", "cuda"],
metavar="DEVICE",
help="faster-whisper inference device (cpu or cuda, default: cpu).",
)
parser.add_argument(
"--openai-key",
metavar="KEY",
help="OpenAI API key (can also be set via OPENAI_API_KEY env var).",
)
parser.add_argument(
"--openai-model",
metavar="MODEL",
help="OpenAI model name (default: whisper-1).",
)
# ---- Processing ----
parser.add_argument(
"--language", "-l",
metavar="LANG",
help="ISO 639-1 language code (e.g. 'fr', 'en'). Auto-detect if omitted.",
)
parser.add_argument(
"--chunk-duration",
type=int,
metavar="SECONDS",
help="Duration of each audio chunk in seconds (default: 600).",
)
parser.add_argument(
"--workers", "-w",
type=int,
metavar="N",
help="Number of parallel transcription workers (default: 2).",
)
parser.add_argument(
"--temp-dir",
metavar="DIR",
help="Directory for temporary files (default: OS temp dir).",
)
# ---- Output ----
parser.add_argument(
"--format", "-F",
dest="output_format",
metavar="FMT",
help=(
"Output format(s), comma-separated: txt,json,srt,vtt "
"(default: txt). Example: --format txt,srt"
),
)
parser.add_argument(
"--output-dir", "-o",
metavar="DIR",
help="Directory where output files are written (default: current directory).",
)
parser.add_argument(
"--output-prefix",
metavar="PREFIX",
help="Base filename prefix for output files (default: transcript).",
)
# ---- Retry ----
parser.add_argument(
"--max-retries",
type=int,
metavar="N",
help="Maximum retry attempts per chunk on backend failure (default: 3).",
)
# ---- Misc ----
parser.add_argument(
"--dry-run",
action="store_true",
help="Print what would be done without performing transcription or writing files.",
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable DEBUG-level logging.",
)
parser.add_argument(
"--log-file",
metavar="FILE",
help="Write logs to this file. Use 'auto' for a timestamped filename.",
)
return parser
def main() -> int:
parser = build_parser()
args = parser.parse_args()
# ---- Logging setup (before any other output) ----
setup_logging(
debug=args.debug,
log_file=args.log_file,
)
# ---- Build config ----
if args.config:
config = TranscriptionConfig.from_json_file(args.config)
else:
config = TranscriptionConfig()
# ---- Parse output formats from CLI ----
output_formats = None
if args.output_format:
output_formats = [f.strip() for f in args.output_format.split(",") if f.strip()]
# ---- Build overrides dict from CLI args ----
overrides = {}
if args.file:
overrides["input_file"] = args.file
if args.url:
overrides["input_url"] = args.url
if args.backend:
overrides["backend"] = args.backend
if args.fallback_backend:
overrides["fallback_backend"] = args.fallback_backend
if args.language:
overrides["language"] = args.language
if args.chunk_duration:
overrides["chunk_duration_seconds"] = args.chunk_duration
if args.workers:
overrides["workers"] = args.workers
if args.temp_dir:
overrides["temp_dir"] = args.temp_dir
if output_formats:
overrides["output_formats"] = output_formats
if args.output_dir:
overrides["output_dir"] = args.output_dir
if args.output_prefix:
overrides["output_prefix"] = args.output_prefix
if args.max_retries is not None:
overrides["max_retries"] = args.max_retries
if args.dry_run:
overrides["dry_run"] = True
if args.debug:
overrides["debug"] = True
if args.log_file:
overrides["log_file"] = args.log_file
# Backend-specific
if args.whisper_binary:
overrides["whisper_cpp_binary"] = args.whisper_binary
if args.whisper_model:
overrides["whisper_cpp_model"] = args.whisper_model
if args.fw_model:
overrides["faster_whisper_model"] = args.fw_model
if args.fw_device:
overrides["faster_whisper_device"] = args.fw_device
if args.openai_key:
overrides["openai_api_key"] = args.openai_key
if args.openai_model:
overrides["openai_model"] = args.openai_model
config.apply_overrides(overrides)
# ---- Run ----
try:
manager = TranscriptionManager(config)
manager.run()
return 0
except (ValueError, FileNotFoundError) as exc:
print(f"[ERROR] {exc}", file=sys.stderr)
return 2
except RuntimeError as exc:
print(f"[ERROR] {exc}", file=sys.stderr)
return 1
except KeyboardInterrupt:
print("\n[INFO] Interrupted by user.", file=sys.stderr)
return 130
if __name__ == "__main__":
sys.exit(main())