Skip to content

Commit 0f585ab

Browse files
ntamotsubeveradb
andauthored
Minor refactoring of separator parameters (#132)
- Organize default values - Clarify handling of `batch_size` for RoFormer models (ref: #118) Co-authored-by: Andrew Beveridge ☄️ <[email protected]>
1 parent dae33d0 commit 0f585ab

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,10 @@ output_file_paths_6 = separator.separate('audio3.wav')
317317
- sample_rate: (Optional) Set the sample rate of the output audio. Default: 44100
318318
- use_soundfile: (Optional) Use soundfile for output writing, can solve OOM issues, especially on longer audio.
319319
- use_autocast: (Optional) Flag to use PyTorch autocast for faster inference. Do not use for CPU inference. Default: False
320-
- mdx_params: (Optional) MDX Architecture Specific Attributes & Defaults. Default: {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1}
320+
- mdx_params: (Optional) MDX Architecture Specific Attributes & Defaults. Default: {"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False}
321321
- vr_params: (Optional) VR Architecture Specific Attributes & Defaults. Default: {"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False}
322-
- demucs_params: (Optional) VR Architecture Specific Attributes & Defaults. {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}
322+
- demucs_params: (Optional) Demucs Architecture Specific Attributes & Defaults. {"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True}
323+
- mdxc_params: (Optional) MDXC Architecture Specific Attributes & Defaults. Default: {"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0}
323324
324325
## Requirements 📋
325326

audio_separator/separator/architectures/mdxc_separator.py

+2
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,8 @@ def demix(self, mix: np.ndarray) -> dict:
211211
mix, sample_rate = spec_utils.change_pitch_semitones(mix, self.sample_rate, semitone_shift=-self.pitch_shift)
212212

213213
if self.is_roformer:
214+
# Note: Currently, for Roformer models, `batch_size` is not utilized due to negligible performance improvements.
215+
214216
mix = torch.tensor(mix, dtype=torch.float32)
215217

216218
if self.override_model_segment_size:

audio_separator/separator/separator.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,17 @@ class Separator:
6363
high_end_process: False
6464
6565
Demucs Architecture Specific Attributes & Defaults:
66-
model_path: The path to the Demucs model file.
66+
segment_size: "Default"
67+
shifts: 2
68+
overlap: 0.25
69+
segments_enabled: True
70+
71+
MDXC Architecture Specific Attributes & Defaults:
72+
segment_size: 256
73+
override_model_segment_size: False
74+
batch_size: 1
75+
overlap: 8
76+
pitch_shift: 0
6777
"""
6878

6979
def __init__(
@@ -84,7 +94,7 @@ def __init__(
8494
mdx_params={"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": False},
8595
vr_params={"batch_size": 1, "window_size": 512, "aggression": 5, "enable_tta": False, "enable_post_process": False, "post_process_threshold": 0.2, "high_end_process": False},
8696
demucs_params={"segment_size": "Default", "shifts": 2, "overlap": 0.25, "segments_enabled": True},
87-
mdxc_params={"segment_size": 256, "batch_size": 1, "overlap": 8},
97+
mdxc_params={"segment_size": 256, "override_model_segment_size": False, "batch_size": 1, "overlap": 8, "pitch_shift": 0},
8898
):
8999
self.logger = logging.getLogger(__name__)
90100
self.logger.setLevel(log_level)

0 commit comments

Comments
 (0)