Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions vllm_omni/entrypoints/omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
resolve_model_config_path,
)
from vllm_omni.inputs.data import OmniDiffusionSamplingParams, OmniPromptType, OmniSamplingParams
from vllm_omni.model_executor.model_loader.weight_utils import (
download_weights_from_hf_specific,
)
from vllm_omni.outputs import OmniRequestOutput

logger = init_logger(__name__)
Expand All @@ -68,14 +71,24 @@ def _dummy_snapshot_download(model_id):


def omni_snapshot_download(model_id) -> str:
# If it's already a local path, just return it
if os.path.exists(model_id):
return model_id
# TODO: this is just a workaround for quickly use modelscope, we should support
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 77 has trailing whitespace. Remove the trailing space after "return model_id".

Copilot uses AI. Check for mistakes.
# modelscope in weight loading feature instead of using `snapshot_download`
if os.environ.get("VLLM_USE_MODELSCOPE", False):
from modelscope.hub.snapshot_download import snapshot_download

return snapshot_download(model_id)
else:
return _dummy_snapshot_download(model_id)
# For other cases (Hugging Face), perform a real download to ensure all
# necessary files (including *.pt for audio/diffusion) are available locally
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 84 has trailing whitespace. Remove the trailing space after "return snapshot_download(model_id)".

Copilot uses AI. Check for mistakes.
# before stage workers are spawned. This prevents initialization timeouts.
return download_weights_from_hf_specific(
model_name_or_path=model_id,
cache_dir=None,
allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", "**/*.txt", "**/*.model", "**/*.yaml"],
require_all=True,
)


class OmniBase:
Expand Down
8 changes: 6 additions & 2 deletions vllm_omni/model_executor/model_loader/weight_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def download_weights_from_hf_specific(
allow_patterns: list[str],
revision: str | None = None,
ignore_patterns: str | list[str] | None = None,
require_all: bool = False,
) -> str:
"""Download model weights from Hugging Face Hub. Users can specify the
allow_patterns to download only the necessary weights.
Expand All @@ -35,6 +36,9 @@ def download_weights_from_hf_specific(
ignore_patterns (Optional[Union[str, list[str]]]): The patterns to
filter out the weight files. Files matched by any of the patterns
will be ignored.
require_all (bool): If True, will iterate through and download files
matching all patterns in allow_patterns. If False, will stop after
the first pattern that matches any files.

Returns:
str: The path to the downloaded model weights.
Expand All @@ -59,8 +63,8 @@ def download_weights_from_hf_specific(
**download_kwargs,
)
# If we have downloaded weights for this allow_pattern,
# we don't need to check the rest.
if any(Path(hf_folder).glob(allow_pattern)):
# we don't need to check the rest, unless require_all is set.
if not require_all and any(Path(hf_folder).glob(allow_pattern)):
break
time_taken = time.perf_counter() - start_time
if time_taken > 0.5:
Expand Down