From 89ffdaddccc89d03f05e258cfbc35401b12c44a7 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:23:38 +0800 Subject: [PATCH 01/11] Create README.md Signed-off-by: zhou zhuoxin --- .../offline_inference/text_to_audio/README.md | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 examples/offline_inference/text_to_audio/README.md diff --git a/examples/offline_inference/text_to_audio/README.md b/examples/offline_inference/text_to_audio/README.md new file mode 100644 index 0000000000..45bc20ee7e --- /dev/null +++ b/examples/offline_inference/text_to_audio/README.md @@ -0,0 +1,38 @@ +# Text-To-Audio + +The `stabilityai/stable-audio-open-1.0` pipeline generates audio from text prompts. + +## Prerequisites + +If you use a gated model (e.g., `stabilityai/stable-audio-open-1.0`), ensure you have access: + +1. **Accept Model License**: Visit the model page on Hugging Face (e.g., [stabilityai/stable-audio-open-1.0]) and accept the user agreement. +2. **Authenticate**: Log in to Hugging Face locally to access the gated model. + ```bash + huggingface-cli login + ``` + +## Local CLI Usage + +```bash +python text_to_audio.py \ + --model stabilityai/stable-audio-open-1.0 \ + --prompt "The sound of a hammer hitting a wooden surface" \ + --negative_prompt "Low quality" \ + --seed 42 \ + --guidance_scale 7.0 \ + --audio_length 10.0 \ + --num_inference_steps 100 \ + --output stable_audio_output.wav +``` + +Key arguments: + +- `--prompt`: text description (string). +- `--negative_prompt`: negative prompt for classifier-free guidance. +- `--seed`: integer seed for deterministic generation. +- `--guidance_scale`: classifier-free guidance scale. +- `--audio_length`: audio duration in seconds. +- `--num_inference_steps`: diffusion sampling steps.(more steps = higher quality, slower). +- `--output`: path to save the generated WAV file. + From 0274127c375c86999a32bb63e430971518759571 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:44:54 +0800 Subject: [PATCH 02/11] Update README.md Signed-off-by: zhou zhuoxin --- examples/offline_inference/text_to_audio/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/offline_inference/text_to_audio/README.md b/examples/offline_inference/text_to_audio/README.md index 45bc20ee7e..8ec1eafe52 100644 --- a/examples/offline_inference/text_to_audio/README.md +++ b/examples/offline_inference/text_to_audio/README.md @@ -35,4 +35,3 @@ Key arguments: - `--audio_length`: audio duration in seconds. - `--num_inference_steps`: diffusion sampling steps.(more steps = higher quality, slower). - `--output`: path to save the generated WAV file. - From 942b9bae1c13d75676f8ef3b123f26bc790814e8 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:45:29 +0800 Subject: [PATCH 03/11] Fix formatting in README for output option Signed-off-by: zhou zhuoxin From 813dd18188045d3a219d8d73029046418a82b106 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:09:21 +0800 Subject: [PATCH 04/11] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 97357dc3b3..b8e27b1222 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -42,6 +42,9 @@ resolve_model_config_path, ) from vllm_omni.inputs.data import OmniDiffusionSamplingParams, OmniPromptType, OmniSamplingParams +from vllm_omni.model_executor.model_loader.weight_utils import ( + download_weights_from_hf_specific, +) from vllm_omni.outputs import OmniRequestOutput logger = init_logger(__name__) @@ -74,8 +77,23 @@ def omni_snapshot_download(model_id) -> str: from modelscope.hub.snapshot_download import snapshot_download return snapshot_download(model_id) - else: - return _dummy_snapshot_download(model_id) + + # If it's already a local path, just return it + if os.path.exists(model_id): + return model_id + + # For other cases (Hugging Face), perform a real download to ensure all + # necessary files (including *.pt for audio/diffusion) are available locally + # before stage workers are spawned. This prevents initialization timeouts. + return download_weights_from_hf_specific( + model_id, + None, + allow_patterns=[ + "*.json", "*.bin", "*.safetensors", "*.pt", "*.txt", "*.model", + "*.yaml" + ], + require_all=True, + ) class OmniBase: From da08cf94a77317f81a11b626bc373de911460424 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:12:38 +0800 Subject: [PATCH 05/11] Update weight_utils.py Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 7432ad9a2a..35fee67616 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -20,6 +20,7 @@ def download_weights_from_hf_specific( allow_patterns: list[str], revision: str | None = None, ignore_patterns: str | list[str] | None = None, + require_all: bool = False, ) -> str: """Download model weights from Hugging Face Hub. Users can specify the allow_patterns to download only the necessary weights. @@ -35,6 +36,8 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. + require_all (bool): If True, will download all patterns instead of + returning after the first one that contains files. Returns: str: The path to the downloaded model weights. @@ -59,8 +62,8 @@ def download_weights_from_hf_specific( **download_kwargs, ) # If we have downloaded weights for this allow_pattern, - # we don't need to check the rest. - if any(Path(hf_folder).glob(allow_pattern)): + # we don't need to check the rest,unless require_all is set. + if not require_all and any(Path(hf_folder).glob(allow_pattern)): break time_taken = time.perf_counter() - start_time if time_taken > 0.5: From 9f98029e72545d0109ee8a819c6d1b5ba3e62315 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:36:57 +0800 Subject: [PATCH 06/11] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index b8e27b1222..6c61a7bda6 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -71,6 +71,10 @@ def _dummy_snapshot_download(model_id): def omni_snapshot_download(model_id) -> str: + # If it's already a local path, just return it + if os.path.exists(model_id): + return model_id + # TODO: this is just a workaround for quickly use modelscope, we should support # modelscope in weight loading feature instead of using `snapshot_download` if os.environ.get("VLLM_USE_MODELSCOPE", False): @@ -78,19 +82,15 @@ def omni_snapshot_download(model_id) -> str: return snapshot_download(model_id) - # If it's already a local path, just return it - if os.path.exists(model_id): - return model_id - # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. return download_weights_from_hf_specific( - model_id, - None, + model_name_or_path=model_id, + cache_dir=None, allow_patterns=[ - "*.json", "*.bin", "*.safetensors", "*.pt", "*.txt", "*.model", - "*.yaml" + "**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", + "**/*.txt", "**/*.model", "**/*.yaml" ], require_all=True, ) From e1edaa816817bcfb6dde311106d933b8b0f5d196 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:39:14 +0800 Subject: [PATCH 07/11] Update weight_utils.py Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 35fee67616..b38bbc974a 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -36,8 +36,7 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. - require_all (bool): If True, will download all patterns instead of - returning after the first one that contains files. + require_all (bool): If True, will download all patterns. Returns: str: The path to the downloaded model weights. From fadbfe224f889578e715f49d9986bd1c69a6407e Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 12:37:14 +0800 Subject: [PATCH 08/11] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 6c61a7bda6..c96158ccf5 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -74,24 +74,19 @@ def omni_snapshot_download(model_id) -> str: # If it's already a local path, just return it if os.path.exists(model_id): return model_id - # TODO: this is just a workaround for quickly use modelscope, we should support # modelscope in weight loading feature instead of using `snapshot_download` if os.environ.get("VLLM_USE_MODELSCOPE", False): from modelscope.hub.snapshot_download import snapshot_download return snapshot_download(model_id) - # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. return download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, - allow_patterns=[ - "**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", - "**/*.txt", "**/*.model", "**/*.yaml" - ], + allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt","**/*.txt", "**/*.model", "**/*.yaml"], require_all=True, ) From f9b147e87ac06d137c9b3ac5f452d36a7505c136 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 12:40:24 +0800 Subject: [PATCH 09/11] Fix formatting of allow_patterns list in omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index c96158ccf5..eec6537a2a 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -86,7 +86,7 @@ def omni_snapshot_download(model_id) -> str: return download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, - allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt","**/*.txt", "**/*.model", "**/*.yaml"], + allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", "**/*.txt", "**/*.model", "**/*.yaml"], require_all=True, ) From 4589643a559aa6fb461dfeb9566a6843c6d9fe68 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 14:26:35 +0800 Subject: [PATCH 10/11] Update vllm_omni/model_executor/model_loader/weight_utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index b38bbc974a..7347ee20a1 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -61,7 +61,7 @@ def download_weights_from_hf_specific( **download_kwargs, ) # If we have downloaded weights for this allow_pattern, - # we don't need to check the rest,unless require_all is set. + # we don't need to check the rest, unless require_all is set. if not require_all and any(Path(hf_folder).glob(allow_pattern)): break time_taken = time.perf_counter() - start_time From b4ac9800583ae1e9f148497d97a29ba46044cc4c Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 14:26:59 +0800 Subject: [PATCH 11/11] Update vllm_omni/model_executor/model_loader/weight_utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 7347ee20a1..d147269d66 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -36,7 +36,9 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. - require_all (bool): If True, will download all patterns. + require_all (bool): If True, will iterate through and download files + matching all patterns in allow_patterns. If False, will stop after + the first pattern that matches any files. Returns: str: The path to the downloaded model weights.