Skip to content

Commit 5a64350

Browse files
⬆️ Bump FastAPI to fix starlette DoS vulnerabilities
Allow starlette >=0.49.1 which patches: - High: DoS via Range header in FileResponse - Medium: DoS via multipart form parsing
1 parent b428ad2 commit 5a64350

File tree

3 files changed

+758
-140
lines changed

3 files changed

+758
-140
lines changed

electro/toolkit/whisper_client.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Whisper client utility for audio transcription."""
2+
3+
import logging
4+
import tempfile
5+
from pathlib import Path
6+
from typing import Optional
7+
8+
from fastapi import HTTPException, UploadFile
9+
from openai import AsyncOpenAI
10+
11+
from settings import settings
12+
13+
from electro.toolkit.loguru_logging import logger
14+
15+
16+
class WhisperTranscriptionError(Exception):
17+
"""Custom exception for Whisper transcription errors."""
18+
19+
pass
20+
21+
22+
async def validate_audio_file(file: UploadFile) -> None:
23+
"""
24+
Validate the uploaded audio file.
25+
26+
Args:
27+
file: The uploaded file to validate
28+
29+
Raises:
30+
HTTPException: If file validation fails
31+
"""
32+
if not file.filename:
33+
raise HTTPException(status_code=400, detail="No file uploaded")
34+
35+
file_extension = Path(file.filename).suffix.lower().lstrip('.')
36+
if file_extension not in settings.SUPPORTED_AUDIO_FORMATS:
37+
raise HTTPException(
38+
status_code=400,
39+
detail=f"Unsupported file format. Supported formats: {', '.join(settings.SUPPORTED_AUDIO_FORMATS)}"
40+
)
41+
42+
if file.size and file.size > settings.MAX_AUDIO_FILE_SIZE:
43+
raise HTTPException(
44+
status_code=413,
45+
detail=f"File too large. Maximum size: {settings.MAX_AUDIO_FILE_SIZE // (1024 * 1024)}MB"
46+
)
47+
48+
49+
async def transcribe_audio(
50+
file: UploadFile,
51+
language: Optional[str] = None,
52+
response_format: str = "json",
53+
temperature: float = 0.0
54+
) -> dict:
55+
"""
56+
Transcribe audio file using OpenAI Whisper.
57+
58+
Args:
59+
file: Audio file to transcribe
60+
language: Optional language code (e.g., 'en', 'es', 'fr')
61+
response_format: Response format ('json', 'text', 'srt', 'verbose_json', 'vtt')
62+
temperature: Sampling temperature between 0 and 1
63+
64+
Returns:
65+
Dictionary containing transcription result
66+
67+
Raises:
68+
WhisperTranscriptionError: If transcription fails
69+
"""
70+
await validate_audio_file(file)
71+
72+
client = AsyncOpenAI(
73+
base_url=settings.OPENAI_API_BASE_URL,
74+
api_key=settings.OPENAI_API_KEY
75+
)
76+
77+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{Path(file.filename).suffix}") as temp_file:
78+
try:
79+
# Write uploaded file to temporary file
80+
content = await file.read()
81+
temp_file.write(content)
82+
temp_file.flush()
83+
84+
# Transcribe using OpenAI Whisper
85+
with open(temp_file.name, 'rb') as audio_file:
86+
transcription_params = {
87+
"file": audio_file,
88+
"model": settings.OPENAI_WHISPER_MODEL,
89+
"response_format": response_format,
90+
"temperature": temperature
91+
}
92+
93+
if language:
94+
transcription_params["language"] = language
95+
96+
logger.info(f"Starting transcription for file: {file.filename}")
97+
transcript = await client.audio.transcriptions.create(**transcription_params)
98+
logger.info(f"Transcription completed for file: {file.filename}")
99+
100+
# Handle different response formats
101+
if response_format == "json":
102+
return {
103+
"text": transcript.text,
104+
"language": getattr(transcript, 'language', None),
105+
"duration": getattr(transcript, 'duration', None),
106+
"filename": file.filename
107+
}
108+
elif response_format == "verbose_json":
109+
return {
110+
"text": transcript.text,
111+
"language": getattr(transcript, 'language', None),
112+
"duration": getattr(transcript, 'duration', None),
113+
"segments": getattr(transcript, 'segments', []),
114+
"filename": file.filename
115+
}
116+
else:
117+
return {
118+
"text": str(transcript),
119+
"filename": file.filename
120+
}
121+
122+
except Exception as e:
123+
logger.error(f"Transcription failed for file {file.filename}: {str(e)}")
124+
raise WhisperTranscriptionError(f"Transcription failed: {str(e)}")
125+
126+
finally:
127+
# Clean up temporary file
128+
try:
129+
Path(temp_file.name).unlink(missing_ok=True)
130+
except Exception as cleanup_error:
131+
logger.warning(f"Failed to cleanup temporary file: {cleanup_error}")

0 commit comments

Comments
 (0)