Learning-practice/transcribe.py at main · garywangcn/Learning-practice · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import whisper
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS  # Add this line
from mutagen.id3 import ID3, SYLT, Encoding, ID3NoHeaderError

# Load the "tiny" Whisper model once when the app starts
try:
    model = whisper.load_model("tiny")
    print("Tiny Whisper model loaded successfully.")
except Exception as e:
    print(f"Error loading Whisper model: {e}")
    model = None

app = Flask(__name__)
CORS(app)  # Allows all origins

# Define the upload folder
UPLOAD_FOLDER = 'uploads'
if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    """
    Handles audio file uploads and returns a JSON response with timestamps.
    This is the endpoint you should use with your current curl command.
    """
    if 'file' not in request.files:
        return jsonify({"error": "No file part in the request"}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    print(file)

    if file and model:
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
        file.save(filepath)

        try:
            result = model.transcribe(filepath, language="en")
            transcribed_segments = result["segments"]

            os.remove(filepath)
            print(transcribed_segments)
            return jsonify({"segments": transcribed_segments}), 200
        except Exception as e:
            os.remove(filepath)
            return jsonify({"error": f"Transcription failed: {str(e)}"}), 500
    else:
        return jsonify({"error": "Whisper model not loaded or file missing"}), 503

@app.route('/sylt_transcribe', methods=['POST'])
def sylt_transcribe_audio():
    """
    Handles audio file uploads, embeds the lyrics as a SYLT tag, and returns the modified MP3 file.
    """
    if 'file' not in request.files:
        return jsonify({"error": "No file part in the request"}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    if file and model:
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
        file.save(filepath)

        try:
            # Step 1: Transcribe the audio
            print(f"Transcribing file: {file.filename}")
            result = model.transcribe(filepath, language="en")
            transcribed_segments = result["segments"]

            # Step 2: Use Mutagen to add SYLT tag correctly
            try:
                audio_file = ID3(filepath)
            except ID3NoHeaderError:
                audio_file = ID3()

            # This is the corrected way to create the SYLT frame
            sylt_frame = SYLT(encoding=Encoding.UTF8, format=2, type=1, desc='lyrics', lang='eng')

            # Append each timestamped segment to the frame
            for segment in transcribed_segments:
                timestamp_ms = int(segment['start'] * 1000)
                sylt_frame.add_string(segment['text'], timestamp=timestamp_ms)

            audio_file.add(sylt_frame)
            audio_file.save(filepath)

            # Step 3: Return the modified file
            return send_file(filepath, as_attachment=True, download_name=file.filename, mimetype='audio/mpeg')

        except Exception as e:
            if os.path.exists(filepath):
                os.remove(filepath)
            return jsonify({"error": f"Processing failed: {str(e)}"}), 500
        finally:
            # Clean up the file after it has been sent
            if os.path.exists(filepath):
                os.remove(filepath)
    else:
        return jsonify({"error": "Whisper model not loaded or file missing"}), 503

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=5001)