Added download only function, fixed bug in VR arch

beveradb · beveradb · commit 8b0d9b02b72b · 2024-07-20T19:15:59.000-04:00
diff --git a/audio_separator/separator/architectures/vr_separator.py b/audio_separator/separator/architectures/vr_separator.py
@@ -181,7 +181,7 @@ def separate(self, audio_file_path):
         if self.output_single_stem and (self.output_single_stem.lower() != self.primary_stem_name.lower() and self.output_single_stem.lower() != self.secondary_stem_name.lower()):
             # If so, reset output_single_stem to None to save both stems
             self.output_single_stem = None
-            self.logger.warning(f"The output_single_stem setting '{self.output_single_stem}' does not match any of the output files: '{self.primary_stem_name}' and '{self.secondary_stem_name}'. For this model '{self.model_name}' with architecture '{self.arch_name}', the output_single_stem setting will be ignored and all output files will be saved.")
+            self.logger.warning(f"The output_single_stem setting '{self.output_single_stem}' does not match any of the output files: '{self.primary_stem_name}' and '{self.secondary_stem_name}'. For this model '{self.model_name}', the output_single_stem setting will be ignored and all output files will be saved.")
 
         # Save and process the primary stem if needed
         if not self.output_single_stem or self.output_single_stem.lower() == self.primary_stem_name.lower():
diff --git a/audio_separator/separator/separator.py b/audio_separator/separator/separator.py
@@ -718,3 +718,23 @@ def separate(self, audio_file_path):
         self.logger.info(f'Separation duration: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - separate_start_time)))}')
 
         return output_files
+
+    def download_model_and_data(self, model_filename):
+        """
+        Downloads the model file without loading it into memory.
+        """
+        self.logger.info(f"Downloading model {model_filename}...")
+
+        model_filename, model_type, model_friendly_name, model_path, yaml_config_filename = self.download_model_files(model_filename)
+
+        if model_path.lower().endswith(".yaml"):
+            yaml_config_filename = model_path
+
+        if yaml_config_filename is not None:
+            model_data = self.load_model_data_from_yaml(yaml_config_filename)
+        else:
+            model_data = self.load_model_data_using_hash(model_path)
+
+        model_data_dict_size = len(model_data)
+
+        self.logger.info(f"Model downloaded, type: {model_type}, friendly name: {model_friendly_name}, model_path: {model_path}, model_data: {model_data_dict_size} items")
diff --git a/audio_separator/utils/cli.py b/audio_separator/utils/cli.py
@@ -38,12 +38,14 @@ def main():
     output_format_help = "output format for separated files, any common format (default: %(default)s). Example: --output_format=MP3"
     output_dir_help = "directory to write output files (default: <current dir>). Example: --output_dir=/app/separated"
     model_file_dir_help = "model files directory (default: %(default)s). Example: --model_file_dir=/app/models"
+    download_model_only_help = "Download a single model file only, without performing separation."
 
     io_params = parser.add_argument_group("Separation I/O Params")
-    io_params.add_argument("-m", "--model_filename", default="model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt", help=model_filename_help)
+    io_params.add_argument("-m", "--model_filename", default="model_bs_roformer_ep_317_sdr_12.9755.yaml", help=model_filename_help)
     io_params.add_argument("--output_format", default="FLAC", help=output_format_help)
     io_params.add_argument("--output_dir", default=None, help=output_dir_help)
     io_params.add_argument("--model_file_dir", default="/tmp/audio-separator-models/", help=model_file_dir_help)
+    io_params.add_argument("--download_model_only", action="store_true", help=download_model_only_help)
 
     invert_spect_help = "invert secondary stem using spectogram (default: %(default)s). Example: --invert_spect"
     normalization_help = "max peak amplitude to normalize input and output audio to (default: %(default)s). Example: --normalization=0.7"
@@ -130,12 +132,19 @@ def main():
         print(json.dumps(separator.list_supported_model_files(), indent=4, sort_keys=True))
         sys.exit(0)
 
+    if args.download_model_only:
+        logger.info(f"Separator version {package_version} downloading model {args.model_filename} to directory {args.model_file_dir}")
+        separator = Separator(log_formatter=log_formatter, log_level=log_level, model_file_dir=args.model_file_dir)
+        separator.download_model_and_data(args.model_filename)
+        logger.info(f"Model {args.model_filename} downloaded successfully.")
+        sys.exit(0)
+
+    logger.info(f"Separator version {package_version} beginning with input file: {args.audio_file}")
+
     if not hasattr(args, "audio_file"):
         parser.print_help()
         sys.exit(1)
 
-    logger.info(f"Separator version {package_version} beginning with input file: {args.audio_file}")
-
     separator = Separator(
         log_formatter=log_formatter,
         log_level=log_level,
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "audio-separator"
-version = "0.17.6"
+version = "0.18.0"
 description = "Easy to use audio stem separation, using various models from UVR trained primarily by @Anjok07"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"