Skip to content

Commit 47a3171

Browse files
authored
Add output_bitrate argument. (#104)
* Add output_bitrate argument. * Add install extra dependencies to readme. * Add output_bitrate argument to cli.
1 parent 7e79526 commit 47a3171

File tree

4 files changed

+23
-3
lines changed

4 files changed

+23
-3
lines changed

README.md

+9
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,15 @@ Once you're inside the conda env, run the following command to install the proje
302302
poetry install
303303
```
304304
305+
Install extra dependencies depending if you're running with GPU or CPU.
306+
```sh
307+
poetry install --extras "cpu"
308+
```
309+
or
310+
```sh
311+
poetry install --extras "gpu"
312+
```
313+
305314
### Running the Command-Line Interface Locally
306315
307316
You can run the CLI command directly within the virtual environment. For example:

audio_separator/separator/common_separator.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(self, config):
6868
# Output directory and format
6969
self.output_dir = config.get("output_dir")
7070
self.output_format = config.get("output_format")
71+
self.output_bitrate = config.get("output_bitrate")
7172

7273
# Functional options which are applicable to all architectures and the user may tweak to affect the output
7374
self.normalization_threshold = config.get("normalization_threshold")
@@ -250,9 +251,12 @@ def write_audio(self, stem_path: str, stem_source):
250251
elif file_format == "mka":
251252
file_format = "matroska"
252253

254+
# Set the bitrate to 320k for mp3 files if output_bitrate is not specified
255+
bitrate = "320k" if file_format == "mp3" and self.output_bitrate is None else self.output_bitrate
256+
253257
# Export using the determined format
254258
try:
255-
audio_segment.export(stem_path, format=file_format)
259+
audio_segment.export(stem_path, format=file_format, bitrate=bitrate)
256260
self.logger.debug(f"Exported audio file successfully to {stem_path}")
257261
except (IOError, ValueError) as e:
258262
self.logger.error(f"Error exporting audio file: {e}")

audio_separator/separator/separator.py

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class Separator:
3636
model_file_dir (str): The directory where model files are stored.
3737
output_dir (str): The directory where output files will be saved.
3838
output_format (str): The format of the output audio file.
39+
output_bitrate (str): The bitrate of the output audio file.
3940
normalization_threshold (float): The threshold for audio normalization.
4041
output_single_stem (str): Option to output a single stem.
4142
invert_using_spec (bool): Flag to invert using spectrogram.
@@ -68,6 +69,7 @@ def __init__(
6869
model_file_dir="/tmp/audio-separator-models/",
6970
output_dir=None,
7071
output_format="WAV",
72+
output_bitrate=None,
7173
normalization_threshold=0.9,
7274
output_single_stem=None,
7375
invert_using_spec=False,
@@ -113,6 +115,7 @@ def __init__(
113115
os.makedirs(self.output_dir, exist_ok=True)
114116

115117
self.output_format = output_format
118+
self.output_bitrate = output_bitrate
116119

117120
if self.output_format is None:
118121
self.output_format = "WAV"
@@ -652,6 +655,7 @@ def load_model(self, model_filename="model_mel_band_roformer_ep_3005_sdr_11.4360
652655
"model_path": model_path,
653656
"model_data": model_data,
654657
"output_format": self.output_format,
658+
"output_bitrate": self.output_bitrate,
655659
"output_dir": self.output_dir,
656660
"normalization_threshold": self.normalization_threshold,
657661
"output_single_stem": self.output_single_stem,

audio_separator/utils/cli.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ def main():
3636

3737
model_filename_help = "model to use for separation (default: %(default)s). Example: -m 2_HP-UVR.pth"
3838
output_format_help = "output format for separated files, any common format (default: %(default)s). Example: --output_format=MP3"
39+
output_bitrate_help = "output bitrate for separated files, any ffmpeg-compatible bitrate (default: %(default)s). Example: --output_bitrate=320k"
3940
output_dir_help = "directory to write output files (default: <current dir>). Example: --output_dir=/app/separated"
4041
model_file_dir_help = "model files directory (default: %(default)s). Example: --model_file_dir=/app/models"
4142
download_model_only_help = "Download a single model file only, without performing separation."
4243

4344
io_params = parser.add_argument_group("Separation I/O Params")
4445
io_params.add_argument("-m", "--model_filename", default="model_bs_roformer_ep_317_sdr_12.9755.yaml", help=model_filename_help)
4546
io_params.add_argument("--output_format", default="FLAC", help=output_format_help)
47+
io_params.add_argument("--output_bitrate", default=None, help=output_bitrate_help)
4648
io_params.add_argument("--output_dir", default=None, help=output_dir_help)
4749
io_params.add_argument("--model_file_dir", default="/tmp/audio-separator-models/", help=model_file_dir_help)
4850
io_params.add_argument("--download_model_only", action="store_true", help=download_model_only_help)
@@ -142,15 +144,16 @@ def main():
142144
if not hasattr(args, "audio_file"):
143145
parser.print_help()
144146
sys.exit(1)
145-
147+
146148
logger.info(f"Separator version {package_version} beginning with input file: {args.audio_file}")
147-
149+
148150
separator = Separator(
149151
log_formatter=log_formatter,
150152
log_level=log_level,
151153
model_file_dir=args.model_file_dir,
152154
output_dir=args.output_dir,
153155
output_format=args.output_format,
156+
output_bitrate=args.output_bitrate,
154157
normalization_threshold=args.normalization,
155158
output_single_stem=args.single_stem,
156159
invert_using_spec=args.invert_spect,

0 commit comments

Comments
 (0)