-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranscribe.py
More file actions
110 lines (90 loc) · 3.8 KB
/
transcribe.py
File metadata and controls
110 lines (90 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import whisper
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS # Add this line
from mutagen.id3 import ID3, SYLT, Encoding, ID3NoHeaderError
# Load the "tiny" Whisper model once when the app starts
try:
model = whisper.load_model("tiny")
print("Tiny Whisper model loaded successfully.")
except Exception as e:
print(f"Error loading Whisper model: {e}")
model = None
app = Flask(__name__)
CORS(app) # Allows all origins
# Define the upload folder
UPLOAD_FOLDER = 'uploads'
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
"""
Handles audio file uploads and returns a JSON response with timestamps.
This is the endpoint you should use with your current curl command.
"""
if 'file' not in request.files:
return jsonify({"error": "No file part in the request"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
print(file)
if file and model:
filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
file.save(filepath)
try:
result = model.transcribe(filepath, language="en")
transcribed_segments = result["segments"]
os.remove(filepath)
print(transcribed_segments)
return jsonify({"segments": transcribed_segments}), 200
except Exception as e:
os.remove(filepath)
return jsonify({"error": f"Transcription failed: {str(e)}"}), 500
else:
return jsonify({"error": "Whisper model not loaded or file missing"}), 503
@app.route('/sylt_transcribe', methods=['POST'])
def sylt_transcribe_audio():
"""
Handles audio file uploads, embeds the lyrics as a SYLT tag, and returns the modified MP3 file.
"""
if 'file' not in request.files:
return jsonify({"error": "No file part in the request"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No selected file"}), 400
if file and model:
filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
file.save(filepath)
try:
# Step 1: Transcribe the audio
print(f"Transcribing file: {file.filename}")
result = model.transcribe(filepath, language="en")
transcribed_segments = result["segments"]
# Step 2: Use Mutagen to add SYLT tag correctly
try:
audio_file = ID3(filepath)
except ID3NoHeaderError:
audio_file = ID3()
# This is the corrected way to create the SYLT frame
sylt_frame = SYLT(encoding=Encoding.UTF8, format=2, type=1, desc='lyrics', lang='eng')
# Append each timestamped segment to the frame
for segment in transcribed_segments:
timestamp_ms = int(segment['start'] * 1000)
sylt_frame.add_string(segment['text'], timestamp=timestamp_ms)
audio_file.add(sylt_frame)
audio_file.save(filepath)
# Step 3: Return the modified file
return send_file(filepath, as_attachment=True, download_name=file.filename, mimetype='audio/mpeg')
except Exception as e:
if os.path.exists(filepath):
os.remove(filepath)
return jsonify({"error": f"Processing failed: {str(e)}"}), 500
finally:
# Clean up the file after it has been sent
if os.path.exists(filepath):
os.remove(filepath)
else:
return jsonify({"error": "Whisper model not loaded or file missing"}), 503
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5001)