|
7 | 7 | import librosa
|
8 | 8 | import torch
|
9 | 9 | from pydub import AudioSegment
|
| 10 | +import soundfile as sf |
10 | 11 | from audio_separator.separator.uvr_lib_v5 import spec_utils
|
11 | 12 |
|
12 | 13 |
|
@@ -217,9 +218,28 @@ def prepare_mix(self, mix):
|
217 | 218 |
|
218 | 219 | def write_audio(self, stem_path: str, stem_source):
|
219 | 220 | """
|
220 |
| - Writes the separated audio source to a file. |
| 221 | + Writes the separated audio source to a file using pydub or soundfile |
| 222 | + Pydub supports a much wider range of audio formats and produces better encoded lossy files for some formats. |
| 223 | + Soundfile is used for very large files (longer than 1 hour), as pydub has memory issues with large files: |
| 224 | + https://github.com/jiaaro/pydub/issues/135 |
221 | 225 | """
|
222 |
| - self.logger.debug(f"Entering write_audio with stem_path: {stem_path}") |
| 226 | + # Get the duration of the input audio file |
| 227 | + duration_seconds = librosa.get_duration(filename=self.audio_file_path) |
| 228 | + duration_hours = duration_seconds / 3600 |
| 229 | + self.logger.info(f"Audio duration is {duration_hours:.2f} hours ({duration_seconds:.2f} seconds).") |
| 230 | + |
| 231 | + if duration_hours >= 1: |
| 232 | + self.logger.warning(f"Using soundfile for writing.") |
| 233 | + self.write_audio_soundfile(stem_path, stem_source) |
| 234 | + else: |
| 235 | + self.logger.info(f"Using pydub for writing.") |
| 236 | + self.write_audio_pydub(stem_path, stem_source) |
| 237 | + |
| 238 | + def write_audio_pydub(self, stem_path: str, stem_source): |
| 239 | + """ |
| 240 | + Writes the separated audio source to a file using pydub (ffmpeg) |
| 241 | + """ |
| 242 | + self.logger.debug(f"Entering write_audio_pydub with stem_path: {stem_path}") |
223 | 243 |
|
224 | 244 | stem_source = spec_utils.normalize(wave=stem_source, max_peak=self.normalization_threshold)
|
225 | 245 |
|
@@ -275,6 +295,41 @@ def write_audio(self, stem_path: str, stem_source):
|
275 | 295 | except (IOError, ValueError) as e:
|
276 | 296 | self.logger.error(f"Error exporting audio file: {e}")
|
277 | 297 |
|
| 298 | + def write_audio_soundfile(self, stem_path: str, stem_source): |
| 299 | + """ |
| 300 | + Writes the separated audio source to a file using soundfile library. |
| 301 | + """ |
| 302 | + self.logger.debug(f"Entering write_audio_soundfile with stem_path: {stem_path}") |
| 303 | + |
| 304 | + # Correctly interleave stereo channels if needed |
| 305 | + if stem_source.shape[1] == 2: |
| 306 | + # If the audio is already interleaved, ensure it's in the correct order |
| 307 | + # Check if the array is Fortran contiguous (column-major) |
| 308 | + if stem_source.flags["F_CONTIGUOUS"]: |
| 309 | + # Convert to C contiguous (row-major) |
| 310 | + stem_source = np.ascontiguousarray(stem_source) |
| 311 | + # Otherwise, perform interleaving |
| 312 | + else: |
| 313 | + stereo_interleaved = np.empty((2 * stem_source.shape[0],), dtype=np.int16) |
| 314 | + # Left channel |
| 315 | + stereo_interleaved[0::2] = stem_source[:, 0] |
| 316 | + # Right channel |
| 317 | + stereo_interleaved[1::2] = stem_source[:, 1] |
| 318 | + stem_source = stereo_interleaved |
| 319 | + |
| 320 | + self.logger.debug(f"Interleaved audio data shape: {stem_source.shape}") |
| 321 | + |
| 322 | + """ |
| 323 | + Write audio using soundfile (for formats other than M4A). |
| 324 | + """ |
| 325 | + # Save audio using soundfile |
| 326 | + try: |
| 327 | + # Specify the subtype to define the sample width |
| 328 | + sf.write(stem_path, stem_source, self.sample_rate) |
| 329 | + self.logger.debug(f"Exported audio file successfully to {stem_path}") |
| 330 | + except Exception as e: |
| 331 | + self.logger.error(f"Error exporting audio file: {e}") |
| 332 | + |
278 | 333 | def clear_gpu_cache(self):
|
279 | 334 | """
|
280 | 335 | This method clears the GPU cache to free up memory.
|
|
0 commit comments