Skip to content

Commit b5b72bb

Browse files
committed
Fixed output for large files by writing with soundfile if duration longer than 1 hour
1 parent 4cd059e commit b5b72bb

File tree

2 files changed

+58
-3
lines changed

2 files changed

+58
-3
lines changed

audio_separator/separator/common_separator.py

+57-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import librosa
88
import torch
99
from pydub import AudioSegment
10+
import soundfile as sf
1011
from audio_separator.separator.uvr_lib_v5 import spec_utils
1112

1213

@@ -217,9 +218,28 @@ def prepare_mix(self, mix):
217218

218219
def write_audio(self, stem_path: str, stem_source):
219220
"""
220-
Writes the separated audio source to a file.
221+
Writes the separated audio source to a file using pydub or soundfile
222+
Pydub supports a much wider range of audio formats and produces better encoded lossy files for some formats.
223+
Soundfile is used for very large files (longer than 1 hour), as pydub has memory issues with large files:
224+
https://github.com/jiaaro/pydub/issues/135
221225
"""
222-
self.logger.debug(f"Entering write_audio with stem_path: {stem_path}")
226+
# Get the duration of the input audio file
227+
duration_seconds = librosa.get_duration(filename=self.audio_file_path)
228+
duration_hours = duration_seconds / 3600
229+
self.logger.info(f"Audio duration is {duration_hours:.2f} hours ({duration_seconds:.2f} seconds).")
230+
231+
if duration_hours >= 1:
232+
self.logger.warning(f"Using soundfile for writing.")
233+
self.write_audio_soundfile(stem_path, stem_source)
234+
else:
235+
self.logger.info(f"Using pydub for writing.")
236+
self.write_audio_pydub(stem_path, stem_source)
237+
238+
def write_audio_pydub(self, stem_path: str, stem_source):
239+
"""
240+
Writes the separated audio source to a file using pydub (ffmpeg)
241+
"""
242+
self.logger.debug(f"Entering write_audio_pydub with stem_path: {stem_path}")
223243

224244
stem_source = spec_utils.normalize(wave=stem_source, max_peak=self.normalization_threshold)
225245

@@ -275,6 +295,41 @@ def write_audio(self, stem_path: str, stem_source):
275295
except (IOError, ValueError) as e:
276296
self.logger.error(f"Error exporting audio file: {e}")
277297

298+
def write_audio_soundfile(self, stem_path: str, stem_source):
299+
"""
300+
Writes the separated audio source to a file using soundfile library.
301+
"""
302+
self.logger.debug(f"Entering write_audio_soundfile with stem_path: {stem_path}")
303+
304+
# Correctly interleave stereo channels if needed
305+
if stem_source.shape[1] == 2:
306+
# If the audio is already interleaved, ensure it's in the correct order
307+
# Check if the array is Fortran contiguous (column-major)
308+
if stem_source.flags["F_CONTIGUOUS"]:
309+
# Convert to C contiguous (row-major)
310+
stem_source = np.ascontiguousarray(stem_source)
311+
# Otherwise, perform interleaving
312+
else:
313+
stereo_interleaved = np.empty((2 * stem_source.shape[0],), dtype=np.int16)
314+
# Left channel
315+
stereo_interleaved[0::2] = stem_source[:, 0]
316+
# Right channel
317+
stereo_interleaved[1::2] = stem_source[:, 1]
318+
stem_source = stereo_interleaved
319+
320+
self.logger.debug(f"Interleaved audio data shape: {stem_source.shape}")
321+
322+
"""
323+
Write audio using soundfile (for formats other than M4A).
324+
"""
325+
# Save audio using soundfile
326+
try:
327+
# Specify the subtype to define the sample width
328+
sf.write(stem_path, stem_source, self.sample_rate)
329+
self.logger.debug(f"Exported audio file successfully to {stem_path}")
330+
except Exception as e:
331+
self.logger.error(f"Error exporting audio file: {e}")
332+
278333
def clear_gpu_cache(self):
279334
"""
280335
This method clears the GPU cache to free up memory.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "audio-separator"
7-
version = "0.19.3"
7+
version = "0.19.4"
88
description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
99
authors = ["Andrew Beveridge <[email protected]>"]
1010
license = "MIT"

0 commit comments

Comments
 (0)