11import asyncio
22import base64
3+ import fcntl
34import json
45import os
6+ import re
57import struct
8+ import tempfile
69import time
710from io import BytesIO
811from pathlib import Path
@@ -83,59 +86,97 @@ def _load_supported_speakers(self) -> set[str]:
8386 return set ()
8487
8588 def _load_uploaded_speakers (self ) -> dict [str , dict ]:
86- """Load uploaded speakers from metadata file."""
89+ """Load uploaded speakers from metadata file (with shared file lock) ."""
8790 if not self .metadata_file .exists ():
8891 return {}
8992
9093 try :
9194 with open (self .metadata_file , 'r' ) as f :
92- metadata = json .load (f )
95+ fcntl .flock (f , fcntl .LOCK_SH )
96+ try :
97+ metadata = json .load (f )
98+ finally :
99+ fcntl .flock (f , fcntl .LOCK_UN )
93100 return metadata .get ("uploaded_speakers" , {})
94101 except Exception as e :
95102 logger .warning (f"Could not load uploaded speakers metadata: { e } " )
96103 return {}
97104
98105 def _save_uploaded_speakers (self ) -> None :
99- """Save uploaded speakers to metadata file."""
106+ """Save uploaded speakers to metadata file (atomic write with lock) ."""
100107 try :
101108 metadata = {"uploaded_speakers" : self .uploaded_speakers }
102- with open (self .metadata_file , 'w' ) as f :
103- json .dump (metadata , f , indent = 2 )
109+ # Write to a temp file then atomically rename to avoid corruption
110+ fd , tmp_path = tempfile .mkstemp (
111+ dir = self .uploaded_speakers_dir , suffix = ".tmp"
112+ )
113+ try :
114+ with os .fdopen (fd , 'w' ) as f :
115+ fcntl .flock (f , fcntl .LOCK_EX )
116+ try :
117+ json .dump (metadata , f , indent = 2 )
118+ finally :
119+ fcntl .flock (f , fcntl .LOCK_UN )
120+ os .replace (tmp_path , self .metadata_file )
121+ except BaseException :
122+ # Clean up temp file on any failure
123+ try :
124+ os .unlink (tmp_path )
125+ except OSError :
126+ pass
127+ raise
104128 except Exception as e :
105129 logger .error (f"Could not save uploaded speakers metadata: { e } " )
106130
107- def _get_uploaded_audio_data (self , voice_name : str ) -> str | None :
108- """Get base64 encoded audio data for uploaded voice."""
131+ def _get_uploaded_audio_data (self , voice_name : str ) -> str :
132+ """Get base64 encoded audio data for uploaded voice.
133+
134+ Raises:
135+ ValueError: If the voice is not an uploaded voice or the audio
136+ file is missing / unreadable.
137+ """
109138 voice_name_lower = voice_name .lower ()
110139 if voice_name_lower not in self .uploaded_speakers :
111- return None
140+ raise ValueError ( f"Voice ' { voice_name } ' is not an uploaded voice" )
112141
113142 speaker_info = self .uploaded_speakers [voice_name_lower ]
114143 file_path = Path (speaker_info ["file_path" ])
115144
116145 if not file_path .exists ():
117- logger .warning (f"Audio file not found for voice { voice_name } : { file_path } " )
118- return None
146+ raise ValueError (
147+ f"Audio file for uploaded voice '{ voice_name } ' is missing "
148+ f"(expected at { file_path } ). Re-upload the voice."
149+ )
119150
120151 try :
121- # Read audio file
122152 with open (file_path , 'rb' ) as f :
123153 audio_bytes = f .read ()
124154
125- # Encode to base64
126155 audio_b64 = base64 .b64encode (audio_bytes ).decode ('utf-8' )
127-
128- # Get MIME type from file extension
129156 mime_type = speaker_info .get ("mime_type" , "audio/wav" )
130-
131- # Return as data URL
132157 return f"data:{ mime_type } ;base64,{ audio_b64 } "
133158 except Exception as e :
134- logger .error (f"Could not read audio file for voice { voice_name } : { e } " )
135- return None
159+ raise ValueError (
160+ f"Could not read audio file for voice '{ voice_name } ': { e } "
161+ ) from e
162+
163+ @staticmethod
164+ def _sanitize_name (value : str ) -> str :
165+ """Sanitize a user-provided name to prevent path traversal.
166+
167+ Only allows alphanumeric characters, underscores, and hyphens.
168+ """
169+ sanitized = re .sub (r"[^a-zA-Z0-9_-]" , "_" , value )
170+ if not sanitized :
171+ raise ValueError (f"Name contains no valid characters: { value !r} " )
172+ return sanitized
136173
137174 async def upload_voice (self , audio_file : UploadFile , consent : str , name : str ) -> dict :
138175 """Upload a new voice sample."""
176+ # Sanitize inputs to prevent path traversal
177+ safe_name = self ._sanitize_name (name )
178+ safe_consent = self ._sanitize_name (consent )
179+
139180 # Validate file size (max 10MB)
140181 MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
141182 audio_file .file .seek (0 , 2 ) # Seek to end
@@ -148,7 +189,6 @@ async def upload_voice(self, audio_file: UploadFile, consent: str, name: str) ->
148189 # Detect MIME type from filename if content_type is generic
149190 mime_type = audio_file .content_type
150191 if mime_type == "application/octet-stream" :
151- # Simple MIME type detection based on file extension
152192 filename_lower = audio_file .filename .lower ()
153193 if filename_lower .endswith (".wav" ):
154194 mime_type = "audio/wav"
@@ -183,12 +223,18 @@ async def upload_voice(self, audio_file: UploadFile, consent: str, name: str) ->
183223 if voice_name_lower in self .uploaded_speakers :
184224 raise ValueError (f"Voice '{ name } ' already exists" )
185225
186- # Generate filename
226+ # Generate filename using sanitized inputs
187227 timestamp = int (time .time ())
188- file_ext = audio_file .filename .split ('.' )[- 1 ] if '.' in audio_file .filename else "wav"
189- filename = f"{ name } _{ consent } _{ timestamp } .{ file_ext } "
228+ raw_filename = audio_file .filename or ""
229+ file_ext = Path (raw_filename ).suffix .lstrip ("." ) or "wav"
230+ filename = f"{ safe_name } _{ safe_consent } _{ timestamp } .{ file_ext } "
190231 file_path = self .uploaded_speakers_dir / filename
191232
233+ # Validate resolved path stays within upload directory
234+ resolved = file_path .resolve ()
235+ if not str (resolved ).startswith (str (self .uploaded_speakers_dir .resolve ())):
236+ raise ValueError ("Invalid file path — possible path traversal" )
237+
192238 # Save audio file
193239 try :
194240 with open (file_path , 'wb' ) as f :
@@ -216,10 +262,10 @@ async def upload_voice(self, audio_file: UploadFile, consent: str, name: str) ->
216262
217263 logger .info (f"Uploaded new voice '{ name } ' with consent ID '{ consent } '" )
218264
265+ # Return response without internal file_path (information disclosure)
219266 return {
220267 "name" : name ,
221268 "consent" : consent ,
222- "file_path" : str (file_path ),
223269 "created_at" : timestamp ,
224270 "mime_type" : mime_type ,
225271 "file_size" : file_size
@@ -257,12 +303,19 @@ def _validate_tts_request(self, request: OpenAICreateSpeechRequest) -> str | Non
257303 return f"Invalid speaker '{ request .voice } '. Supported: { ', ' .join (sorted (self .supported_speakers ))} "
258304
259305 # Validate Base task requirements
260- if task_type == "Base" and request .voice is None :
261- if request .ref_audio is None :
306+ # ref_audio is required unless voice refers to an uploaded speaker
307+ # (uploaded voices auto-populate ref_audio from stored audio)
308+ if task_type == "Base" :
309+ is_uploaded_voice = (
310+ request .voice is not None
311+ and request .voice .lower () in self .uploaded_speakers
312+ )
313+ if request .ref_audio is None and not is_uploaded_voice :
262314 return "Base task requires 'ref_audio' for voice cloning"
263- # Validate ref_audio format
264- if not (request .ref_audio .startswith (("http://" , "https://" )) or request .ref_audio .startswith ("data:" )):
265- return "ref_audio must be a URL (http/https) or base64 data URL (data:...)"
315+ if request .ref_audio is not None :
316+ # Validate ref_audio format when explicitly provided
317+ if not (request .ref_audio .startswith (("http://" , "https://" )) or request .ref_audio .startswith ("data:" )):
318+ return "ref_audio must be a URL (http/https) or base64 data URL (data:...)"
266319
267320 # Validate cross-parameter dependencies
268321 if task_type != "Base" :
@@ -319,13 +372,13 @@ def _build_tts_params(self, request: OpenAICreateSpeechRequest) -> dict[str, Any
319372 if request .voice is not None :
320373 params ["speaker" ] = [request .voice ]
321374
322- # If voice is an uploaded speaker and no ref_audio provided, auto-set it
375+ # If voice is an uploaded speaker and no ref_audio provided, auto-set it.
376+ # _get_uploaded_audio_data raises ValueError if the file is missing.
323377 if request .voice .lower () in self .uploaded_speakers and request .ref_audio is None :
324378 audio_data = self ._get_uploaded_audio_data (request .voice )
325- if audio_data :
326- params ["ref_audio" ] = [audio_data ]
327- params ["x_vector_only_mode" ] = [True ]
328- logger .info (f"Auto-set ref_audio for uploaded voice: { request .voice } " )
379+ params ["ref_audio" ] = [audio_data ]
380+ params ["x_vector_only_mode" ] = [True ]
381+ logger .info (f"Auto-set ref_audio for uploaded voice: { request .voice } " )
329382 elif params ["task_type" ][0 ] == "CustomVoice" :
330383 params ["speaker" ] = ["Vivian" ] # Default for CustomVoice
331384
0 commit comments