Skip to content

Commit 3c23878

Browse files
committed
Removed confusing and inconsistent parameters primary_stem_output_path / secondary_stem_output_path, improved logic to clear stem output paths between separation runs to fix issue with overwritten output files
1 parent eba10ac commit 3c23878

File tree

8 files changed

+61
-79
lines changed

8 files changed

+61
-79
lines changed

README.md

+2-5
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,9 @@ separator = Separator()
212212
separator.load_model()
213213
214214
# Perform the separation on specific audio files without reloading the model
215-
primary_stem_output_path, secondary_stem_output_path = separator.separate('audio1.wav')
215+
output_files = separator.separate('audio1.wav')
216216
217-
print(f'Primary stem saved at {primary_stem_output_path}')
218-
print(f'Secondary stem saved at {secondary_stem_output_path}')
217+
print(f"Separation complete! Output file(s): {' '.join(output_files)}")
219218
```
220219
221220
#### Batch processing, or processing with multiple models
@@ -253,8 +252,6 @@ output_file_paths_6 = separator.separate('audio3.wav')
253252
- log_formatter: (Optional) The log format. Default: None, which falls back to '%(asctime)s - %(levelname)s - %(module)s - %(message)s'
254253
- model_file_dir: (Optional) Directory to cache model files in. Default: /tmp/audio-separator-models/
255254
- output_dir: (Optional) Directory where the separated files will be saved. If not specified, uses the current directory.
256-
- primary_stem_output_path: (Optional) The path for saving the primary stem. Default: None
257-
- secondary_stem_output_path: (Optional) The path for saving the secondary stem. Default: None
258255
- output_format: (Optional) Format to encode output files, any common format (WAV, MP3, FLAC, M4A, etc.). Default: WAV
259256
- normalization_threshold: (Optional) The threshold for audio normalization. Default: 0.9
260257
- output_single_stem: (Optional) Output only a single stem, either 'instrumental' or 'vocals'. Default: None

audio_separator/separator/architectures/mdx_separator.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,6 @@ def __init__(self, common_config, arch_config):
102102
self.secondary_source = None
103103
self.audio_file_path = None
104104
self.audio_file_base = None
105-
self.secondary_source_map = None
106-
self.primary_source_map = None
107105

108106
def load_model(self):
109107
"""
@@ -138,14 +136,11 @@ def separate(self, audio_file_path):
138136
Returns:
139137
list: A list of paths to the output files generated by the separation process.
140138
"""
141-
self.primary_source = None
142-
self.secondary_source = None
143-
144139
self.audio_file_path = audio_file_path
145140
self.audio_file_base = os.path.splitext(os.path.basename(audio_file_path))[0]
146141

147142
# Prepare the mix for processing
148-
self.logger.debug("Preparing mix...")
143+
self.logger.debug(f"Preparing mix for input audio file {self.audio_file_path}...")
149144
mix = self.prepare_mix(self.audio_file_path)
150145

151146
self.logger.debug("Normalizing mix before demixing...")
@@ -180,20 +175,20 @@ def separate(self, audio_file_path):
180175

181176
# Save and process the secondary stem if needed
182177
if not self.output_single_stem or self.output_single_stem.lower() == self.secondary_stem_name.lower():
183-
self.logger.info(f"Saving {self.secondary_stem_name} stem...")
184-
if not self.secondary_stem_output_path:
185-
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
186-
self.secondary_source_map = self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
178+
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
179+
180+
self.logger.info(f"Saving {self.secondary_stem_name} stem to {self.secondary_stem_output_path}...")
181+
self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
187182
output_files.append(self.secondary_stem_output_path)
188183

189184
# Save and process the primary stem if needed
190185
if not self.output_single_stem or self.output_single_stem.lower() == self.primary_stem_name.lower():
191-
self.logger.info(f"Saving {self.primary_stem_name} stem...")
192-
if not self.primary_stem_output_path:
193-
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
186+
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
194187
if not isinstance(self.primary_source, np.ndarray):
195188
self.primary_source = source.T
196-
self.primary_source_map = self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
189+
190+
self.logger.info(f"Saving {self.primary_stem_name} stem to {self.primary_stem_output_path}...")
191+
self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
197192
output_files.append(self.primary_stem_output_path)
198193

199194
# Not yet implemented from UVR features:

audio_separator/separator/architectures/mdxc_separator.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ def __init__(self, common_config, arch_config):
5353
self.secondary_source = None
5454
self.audio_file_path = None
5555
self.audio_file_base = None
56-
self.primary_source_map = None
57-
self.secondary_source_map = None
5856

5957
self.logger.info("MDXC Separator initialisation complete")
6058

@@ -94,7 +92,7 @@ def separate(self, audio_file_path):
9492
self.audio_file_path = audio_file_path
9593
self.audio_file_base = os.path.splitext(os.path.basename(audio_file_path))[0]
9694

97-
self.logger.debug("Preparing mix...")
95+
self.logger.debug(f"Preparing mix for input audio file {self.audio_file_path}...")
9896
mix = self.prepare_mix(self.audio_file_path)
9997

10098
self.logger.debug("Normalizing mix before demixing...")
@@ -115,19 +113,20 @@ def separate(self, audio_file_path):
115113
self.secondary_source = spec_utils.normalize(wave=source[self.secondary_stem_name], max_peak=self.normalization_threshold).T
116114

117115
if not self.output_single_stem or self.output_single_stem.lower() == self.secondary_stem_name.lower():
118-
self.logger.info(f"Saving {self.secondary_stem_name} stem...")
119-
if not self.secondary_stem_output_path:
120-
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
121-
self.secondary_source_map = self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
116+
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
117+
118+
self.logger.info(f"Saving {self.secondary_stem_name} stem to {self.secondary_stem_output_path}...")
119+
self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
122120
output_files.append(self.secondary_stem_output_path)
123121

124122
if not self.output_single_stem or self.output_single_stem.lower() == self.primary_stem_name.lower():
125-
self.logger.info(f"Saving {self.primary_stem_name} stem...")
126-
if not self.primary_stem_output_path:
127-
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
123+
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
124+
128125
if not isinstance(self.primary_source, np.ndarray):
129126
self.primary_source = source.T
130-
self.primary_source_map = self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
127+
128+
self.logger.info(f"Saving {self.primary_stem_name} stem to {self.primary_stem_output_path}...")
129+
self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
131130
output_files.append(self.primary_stem_output_path)
132131
return output_files
133132

audio_separator/separator/architectures/vr_separator.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def separate(self, audio_file_path):
127127
self.audio_file_path = audio_file_path
128128
self.audio_file_base = os.path.splitext(os.path.basename(audio_file_path))[0]
129129

130-
self.logger.debug("Starting inference...")
130+
self.logger.debug(f"Starting separation for input audio file {self.audio_file_path}...")
131131

132132
nn_arch_sizes = [31191, 33966, 56817, 123821, 123812, 129605, 218409, 537238, 537227] # default
133133
vr_5_1_models = [56817, 218409]
@@ -167,27 +167,22 @@ def separate(self, audio_file_path):
167167

168168
# Save and process the primary stem if needed
169169
if not self.output_single_stem or self.output_single_stem.lower() == self.primary_stem_name.lower():
170-
self.logger.info(f"Saving {self.primary_stem_name} stem...")
171-
if not self.primary_stem_output_path:
172-
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
173-
170+
self.logger.debug(f"Processing primary stem: {self.primary_stem_name}")
174171
if not isinstance(self.primary_source, np.ndarray):
175172
self.primary_source = self.spec_to_wav(y_spec).T
176173
self.logger.debug("Converting primary source spectrogram to waveform.")
177174
if not self.model_samplerate == 44100:
178175
self.primary_source = librosa.resample(self.primary_source.T, orig_sr=self.model_samplerate, target_sr=44100).T
179176
self.logger.debug("Resampling primary source to 44100Hz.")
180177

181-
self.primary_source_map = self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
182-
self.logger.debug("Primary stem processed.")
178+
self.primary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}")
179+
180+
self.logger.info(f"Saving {self.primary_stem_name} stem to {self.primary_stem_output_path}...")
181+
self.final_process(self.primary_stem_output_path, self.primary_source, self.primary_stem_name)
183182
output_files.append(self.primary_stem_output_path)
184183

185184
# Save and process the secondary stem if needed
186185
if not self.output_single_stem or self.output_single_stem.lower() == self.secondary_stem_name.lower():
187-
self.logger.info(f"Saving {self.secondary_stem_name} stem...")
188-
if not self.secondary_stem_output_path:
189-
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
190-
191186
self.logger.debug(f"Processing secondary stem: {self.secondary_stem_name}")
192187
if not isinstance(self.secondary_source, np.ndarray):
193188
self.secondary_source = self.spec_to_wav(v_spec).T
@@ -196,8 +191,10 @@ def separate(self, audio_file_path):
196191
self.secondary_source = librosa.resample(self.secondary_source.T, orig_sr=self.model_samplerate, target_sr=44100).T
197192
self.logger.debug("Resampling secondary source to 44100Hz.")
198193

199-
self.secondary_source_map = self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
200-
self.logger.debug("Secondary stem processed.")
194+
self.secondary_stem_output_path = os.path.join(f"{self.audio_file_base}_({self.secondary_stem_name})_{self.model_name}.{self.output_format.lower()}")
195+
196+
self.logger.info(f"Saving {self.secondary_stem_name} stem to {self.secondary_stem_output_path}...")
197+
self.final_process(self.secondary_stem_output_path, self.secondary_source, self.secondary_stem_name)
201198
output_files.append(self.secondary_stem_output_path)
202199

203200
# Not yet implemented from UVR features:

audio_separator/separator/common_separator.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,6 @@ def __init__(self, config):
6565
self.model_path = config.get("model_path")
6666
self.model_data = config.get("model_data")
6767

68-
# Optional custom output paths for the primary and secondary stems
69-
# If left as None, the arch-specific class decides the output filename, e.g. something like:
70-
# f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}"
71-
self.primary_stem_output_path = config.get("primary_stem_output_path")
72-
self.secondary_stem_output_path = config.get("secondary_stem_output_path")
73-
7468
# Output directory and format
7569
self.output_dir = config.get("output_dir")
7670
self.output_format = config.get("output_format")
@@ -90,7 +84,6 @@ def __init__(self, config):
9084
self.bv_model_rebalance = self.model_data.get("is_bv_model_rebalanced", 0)
9185

9286
self.logger.debug(f"Common params: model_name={self.model_name}, model_path={self.model_path}")
93-
self.logger.debug(f"Common params: primary_stem_output_path={self.primary_stem_output_path}, secondary_stem_output_path={self.secondary_stem_output_path}")
9487
self.logger.debug(f"Common params: output_dir={self.output_dir}, output_format={self.output_format}")
9588
self.logger.debug(f"Common params: normalization_threshold={self.normalization_threshold}")
9689
self.logger.debug(f"Common params: enable_denoise={self.enable_denoise}, output_single_stem={self.output_single_stem}")
@@ -99,6 +92,16 @@ def __init__(self, config):
9992
self.logger.debug(f"Common params: primary_stem_name={self.primary_stem_name}, secondary_stem_name={self.secondary_stem_name}")
10093
self.logger.debug(f"Common params: is_karaoke={self.is_karaoke}, is_bv_model={self.is_bv_model}, bv_model_rebalance={self.bv_model_rebalance}")
10194

95+
# File-specific variables which need to be cleared between processing different audio inputs
96+
self.audio_file_path = None
97+
self.audio_file_base = None
98+
99+
self.primary_source = None
100+
self.secondary_source = None
101+
102+
self.primary_stem_output_path = None
103+
self.secondary_stem_output_path = None
104+
102105
self.cached_sources_map = {}
103106

104107
def separate(self, audio_file_path):
@@ -266,3 +269,18 @@ def clear_gpu_cache(self):
266269
if self.torch_device == torch.device("cuda"):
267270
self.logger.debug("Clearing CUDA cache...")
268271
torch.cuda.empty_cache()
272+
273+
def clear_file_specific_paths(self):
274+
"""
275+
Clears the file-specific variables which need to be cleared between processing different audio inputs.
276+
"""
277+
self.logger.info("Clearing input audio file paths, sources and stems...")
278+
279+
self.audio_file_path = None
280+
self.audio_file_base = None
281+
282+
self.primary_source = None
283+
self.secondary_source = None
284+
285+
self.primary_stem_output_path = None
286+
self.secondary_stem_output_path = None

audio_separator/separator/separator.py

+2-27
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ class Separator:
3333
log_formatter (logging.Formatter): The logging formatter.
3434
model_file_dir (str): The directory where model files are stored.
3535
output_dir (str): The directory where output files will be saved.
36-
primary_stem_output_path (str): The path for saving the primary stem.
37-
secondary_stem_output_path (str): The path for saving the secondary stem.
3836
output_format (str): The format of the output audio file.
3937
normalization_threshold (float): The threshold for audio normalization.
4038
output_single_stem (str): Option to output a single stem.
@@ -63,12 +61,10 @@ class Separator:
6361

6462
def __init__(
6563
self,
66-
log_level=logging.DEBUG,
64+
log_level=logging.INFO,
6765
log_formatter=None,
6866
model_file_dir="/tmp/audio-separator-models/",
6967
output_dir=None,
70-
primary_stem_output_path=None,
71-
secondary_stem_output_path=None,
7268
output_format="WAV",
7369
normalization_threshold=0.9,
7470
output_single_stem=None,
@@ -105,12 +101,6 @@ def __init__(
105101
self.model_file_dir = model_file_dir
106102
self.output_dir = output_dir
107103

108-
# Allow the user to specify the output paths for the primary and secondary stems
109-
# If left as None, the arch-specific class decides the output filename, typically e.g. something like:
110-
# f"{self.audio_file_base}_({self.primary_stem_name})_{self.model_name}.{self.output_format.lower()}"
111-
self.primary_stem_output_path = primary_stem_output_path
112-
self.secondary_stem_output_path = secondary_stem_output_path
113-
114104
# Create the model directory if it does not exist
115105
os.makedirs(self.model_file_dir, exist_ok=True)
116106

@@ -141,10 +131,6 @@ def __init__(
141131

142132
self.onnx_execution_provider = None
143133
self.model_instance = None
144-
self.audio_file_path = None
145-
self.audio_file_base = None
146-
self.primary_source = None
147-
self.secondary_source = None
148134

149135
self.setup_accelerated_inferencing_device()
150136

@@ -598,8 +584,6 @@ def load_model(self, model_filename="UVR-MDX-NET-Inst_HQ_3.onnx"):
598584
"model_name": model_name,
599585
"model_path": model_path,
600586
"model_data": model_data,
601-
"primary_stem_output_path": self.primary_stem_output_path,
602-
"secondary_stem_output_path": self.secondary_stem_output_path,
603587
"output_format": self.output_format,
604588
"output_dir": self.output_dir,
605589
"normalization_threshold": self.normalization_threshold,
@@ -652,17 +636,8 @@ def separate(self, audio_file_path):
652636
# Clear GPU cache to free up memory
653637
self.model_instance.clear_gpu_cache()
654638

655-
# Unset the audio file to prevent accidental re-separation of the same file
656-
self.logger.debug("Clearing audio file...")
657-
self.audio_file_path = None
658-
self.audio_file_base = None
659-
660639
# Unset more separation params to prevent accidentally re-using the wrong source files or output paths
661-
self.logger.debug("Clearing sources and stems...")
662-
self.primary_source = None
663-
self.secondary_source = None
664-
self.primary_stem_output_path = None
665-
self.secondary_stem_output_path = None
640+
self.model_instance.clear_file_specific_paths()
666641

667642
# Log the completion of the separation process
668643
self.logger.debug("Separation process completed.")

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "audio-separator"
7-
version = "0.16.0"
7+
version = "0.16.1"
88
description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
99
authors = ["Andrew Beveridge <[email protected]>"]
1010
license = "MIT"

tests/TODO.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
- Test processing file with multiple different models outputs separate expected files
77
- Test each of the architecure specific parameters works as expected in both CLI and class mode
88
- Generate oscillogram and spectrogram of model output for a short test file for each major supported model and compare to expected output to ensure separation is actually separating stems
9-
- Add a few different test files with different properties, e.g. background noise, stems present, or genre of music and ensure separation works as expected for each
9+
- Add a few different test files with different properties, e.g. background noise, stems present, or genre of music and ensure separation works as expected for each
10+
- Test that processing more than one distinct input file in sequence outputs separate files (not overwriting the first output with the second)

0 commit comments

Comments
 (0)