Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
60fe271
feat: add MobiusModelBuilder Olive pass
Apr 9, 2026
5921223
test: extend EP_MAP coverage to all four EPs; add CPU example config
Apr 9, 2026
9d77132
docs: correct Gemma4 model IDs and annotate audio support
Apr 9, 2026
5ba5b1f
fix: correct example config format (engine.target, remove invalid fie…
Apr 9, 2026
cd86ba3
fix: readability improvements for MobiusModelBuilder pass
Apr 10, 2026
3ee4a23
fix: additional readability fixes for MobiusModelBuilder pass
Apr 10, 2026
c82f407
feat: add output validation and trust_remote_code warning to MobiusMo…
Apr 10, 2026
8c1259c
docs: clarify _patch_build comment on lazy import patch target
Apr 10, 2026
2eb7de5
fix: address all open PR review comments on MobiusModelBuilder
Apr 10, 2026
209b616
fix: update mobius PyPI package name to mobius-ai
Apr 10, 2026
0c4a3cf
fix: remove unused noqa directives (RUF100)
Apr 10, 2026
be13f27
fix: get trust_remote_code from model load_kwargs and add additional_…
Copilot Apr 10, 2026
ee7fbd4
fix: use .get(key, default) over or False for trust_remote_code; clar…
Copilot Apr 10, 2026
e02b3f3
fix: remove unsupported 'comment' field from Gemma4 example configs
justinchuby Apr 23, 2026
dca7795
fix: use OnnxBlockWiseRtnQuantization for Gemma4 INT4 pipeline
justinchuby Apr 23, 2026
2af889f
fix: add MobiusEP enum for execution_provider validation
justinchuby Apr 23, 2026
f1c0a1a
Merge origin/main
justinchuby Apr 23, 2026
16f74dd
chore: move gemma4 example configs to olive-recipes
justinchuby Apr 23, 2026
68ed349
feat: generate ORT GenAI configs by default in MobiusModelBuilder
justinchuby Apr 24, 2026
8efcec5
Address comments
justinchuby May 4, 2026
9d519e4
test: stabilize mobius model builder CI cases
justinchuby May 4, 2026
797f466
fix: fallback to mobius default EP for unsupported providers
justinchuby May 4, 2026
3cccb83
test: add fallback EP test for None execution_provider
justinchuby May 4, 2026
d66515d
lint
justinchuby May 4, 2026
8e5b90f
Address comments
justinchuby May 7, 2026
66d91c8
fix
justinchuby May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,15 @@
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
},
"MobiusModelBuilder": {
"module_path": "olive.passes.onnx.mobius_model_builder.MobiusModelBuilder",
"supported_providers": [ "*" ],
"supported_accelerators": [ "*" ],
"supported_precisions": [ "fp32", "fp16", "bf16" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "mobius-ai", "onnx-ir" ]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need onnx-ir here. onnx-ir is olive dependency

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

},
Comment thread
justinchuby marked this conversation as resolved.
"LoftQ": {
"module_path": "olive.passes.pytorch.lora.LoftQ",
"supported_providers": [ "*" ],
Expand Down Expand Up @@ -682,6 +691,8 @@
"inc": [ "neural-compressor" ],
"lora": [ "accelerate>=0.30.0", "peft", "scipy" ],
"diffusers": [ "accelerate>=0.30.0", "peft", "diffusers" ],
"mobius-ai": [ "mobius-ai" ],
"onnx-ir": [ "onnx-ir" ],
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

"nvmo": [ "nvidia-modelopt[onnx]" ],
"openvino": [
"openvino>=2025.4.1",
Expand Down
250 changes: 250 additions & 0 deletions olive/passes/onnx/mobius_model_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
# -------------------------------------------------------------------------

Check warning

Code scanning / lintrunner

RUFF-FORMAT/format Warning

Run lintrunner -a to apply this patch.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""Build ONNX models from HuggingFace model IDs using the mobius package."""

from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar

from olive.common.utils import StrEnumBase
from olive.constants import Precision
from olive.hardware.constants import ExecutionProvider
from olive.model import HfModelHandler, ONNXModelHandler
from olive.model.handler.composite import CompositeModelHandler
from olive.passes import Pass
from olive.passes.olive_pass import PassConfigParam

if TYPE_CHECKING:
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes.pass_config import BasePassConfig

logger = logging.getLogger(__name__)

# Maps Olive Precision values to mobius dtype strings.
# "f32" = 32-bit float (torch.float32), standard full precision.
# "f16" = 16-bit float (torch.float16), half precision — good for GPU inference.
# "bf16" = bfloat16 (torch.bfloat16), brain float — preferred over f16 on newer hardware.
# For INT4/INT8 quantization, use a downstream Olive quantization pass (e.g. OnnxMatMulNBits)
# after this pass rather than setting precision here.
_PRECISION_TO_DTYPE: dict[str, str] = {
Precision.FP32: "f32",
Precision.FP16: "f16",
Precision.BF16: "bf16",
}


class MobiusModelBuilder(Pass):
"""Olive pass that uses mobius to build ONNX models from HuggingFace model IDs.

Supports all model architectures registered in mobius (LLMs, VLMs, speech
models, diffusion models). For multi-component models (e.g. vision-language
models that produce ``model``, ``vision``, and ``embedding`` sub-graphs) the
pass returns a :class:`~olive.model.handler.composite.CompositeModelHandler`
whose components are individual :class:`~olive.model.ONNXModelHandler` objects.
Single-component models return a plain :class:`~olive.model.ONNXModelHandler`.

Requires ``mobius-ai`` to be installed::

pip install mobius-ai

Comment thread
justinchuby marked this conversation as resolved.
See https://github.com/microsoft/mobius
Comment thread
justinchuby marked this conversation as resolved.
Outdated
"""

class MobiusRuntime(StrEnumBase):
"""Target runtimes for genai config generation."""

NONE = "none"
ORT_GENAI = "ort-genai"

class MobiusEP(StrEnumBase):
"""Execution providers supported by mobius."""

DEFAULT = "default"
CPU = "cpu"
CUDA = "cuda"
DML = "dml"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we still need dml?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

WEBGPU = "webgpu"
TRT_RTX = "trt-rtx"
ONNX_STANDARD = "onnx-standard"

# Maps Olive ExecutionProvider enum values to mobius EP names.
EP_MAP: ClassVar[dict[ExecutionProvider, str]] = {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved

ExecutionProvider.CPUExecutionProvider: "cpu",
ExecutionProvider.CUDAExecutionProvider: "cuda",
ExecutionProvider.DmlExecutionProvider: "dml",
ExecutionProvider.WebGpuExecutionProvider: "webgpu",
}

@classmethod
def is_accelerator_agnostic(cls, accelerator_spec: AcceleratorSpec) -> bool:
# EP selection determines which fused ops are emitted, so this pass is
# EP-specific.
return False

@classmethod
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
return {
"precision": PassConfigParam(
type_=Precision,
required=False,
default_value=Precision.FP32,
description=(
"Model weight / compute precision. One of: fp32, fp16, bf16. "
"Defaults to fp32. For INT4 quantization, run an Olive "
"quantization pass (e.g. OnnxMatMulNBits) after this pass."
),
),
"execution_provider": PassConfigParam(
type_=MobiusModelBuilder.MobiusEP,
Comment thread
justinchuby marked this conversation as resolved.
Outdated
required=False,
default_value=None,
description=(
"Override the mobius execution provider. "
"When None (default), the EP is auto-detected from the Olive "
"accelerator spec."
),
),
"runtime": PassConfigParam(
type_=MobiusModelBuilder.MobiusRuntime,
required=False,
default_value=MobiusModelBuilder.MobiusRuntime.ORT_GENAI,
description=(
"Target runtime. 'ort-genai' (default) generates "
"genai_config.json, tokenizer files, and processor "
"configs alongside the ONNX models. 'none' to skip."
),
),
}

def _run_for_config(
self,
model: HfModelHandler,
config: type[BasePassConfig],
output_model_path: str,
) -> ONNXModelHandler | CompositeModelHandler:
try:
from mobius import build
except ImportError as exc:
raise ImportError(
"mobius-ai is required to run MobiusModelBuilder. Install with: pip install mobius-ai"
) from exc
Comment thread
justinchuby marked this conversation as resolved.

if not isinstance(model, HfModelHandler):
raise ValueError(f"MobiusModelBuilder requires an HfModelHandler input, got {type(model).__name__}.")

# Resolve EP: explicit config override > accelerator spec > fallback to cpu.
ep_str: str = config.execution_provider or self.EP_MAP.get(self.accelerator_spec.execution_provider, "cpu")

Comment thread
justinchuby marked this conversation as resolved.
dtype_str: str = _PRECISION_TO_DTYPE.get(config.precision, "f32")
model_id: str = model.model_name_or_path

# Read trust_remote_code from the model's HuggingFace load kwargs.
trust_remote_code: bool = model.get_load_kwargs().get("trust_remote_code", False)

logger.info(
"MobiusModelBuilder: building '%s' (ep=%s, dtype=%s)",
model_id,
ep_str,
dtype_str,
)

if trust_remote_code:
logger.warning("MobiusModelBuilder: trust_remote_code=True — only use with trusted model sources.")

output_dir = Path(output_model_path)
output_dir.mkdir(parents=True, exist_ok=True)

pkg = build(
model_id,
dtype=dtype_str,
execution_provider=ep_str,
load_weights=True,
trust_remote_code=trust_remote_code,
)

# ModelPackage.save() handles both single and multi-component layouts:
# single component → <output_dir>/model.onnx
# multi-component → <output_dir>/<name>/model.onnx for each key
pkg.save(str(output_dir))

# Generate ORT GenAI config artifacts (genai_config.json, tokenizer
# files, processor configs) when runtime is set to ort-genai.
if config.runtime == self.MobiusRuntime.ORT_GENAI:
self._write_genai_config(pkg, str(output_dir), model_id, ep_str)

package_keys = list(pkg.keys())
logger.info("MobiusModelBuilder: saved components %s to '%s'", package_keys, output_dir)

if len(package_keys) == 1:
# Single-component model (most LLMs): return a plain ONNXModelHandler.
onnx_path = output_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
"mobius.build() may have failed silently or saved to an unexpected path."
)
additional_files = sorted(
{str(fp) for fp in output_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"}
)
return ONNXModelHandler(
model_path=str(output_dir),
onnx_file_name="model.onnx",
model_attributes={
"mobius_package_keys": package_keys,
Comment thread
jambayk marked this conversation as resolved.
"additional_files": additional_files,
**(model.model_attributes or {}),
},
)

# Multi-component model (VLMs, encoder-decoders, diffusion pipelines):
# mobius saves each component to <output_dir>/<key>/model.onnx.
components = []
for key in package_keys:
component_dir = output_dir / key
onnx_path = component_dir / "model.onnx"
if not onnx_path.exists():
raise RuntimeError(
f"MobiusModelBuilder: expected output file not found: {onnx_path}. "
f"mobius.build() may have failed silently for component '{key}'."
)
additional_files = sorted(
{str(fp) for fp in component_dir.iterdir()} - {str(onnx_path), str(onnx_path) + ".data"}
)
components.append(
ONNXModelHandler(
model_path=str(component_dir),
onnx_file_name="model.onnx",
model_attributes={
"mobius_component": key,
"additional_files": additional_files,
**(model.model_attributes or {}),
},
)
)

return CompositeModelHandler(
model_components=components,
model_component_names=package_keys,
model_path=str(output_dir),
model_attributes={
"mobius_package_keys": package_keys,
**(model.model_attributes or {}),
},
)

@staticmethod
def _write_genai_config(pkg, output_dir: str, model_id: str, ep: str) -> None:
"""Generate ORT GenAI config artifacts alongside the ONNX models."""
from mobius.integrations.ort_genai import write_ort_genai_config

genai_artifacts = write_ort_genai_config(
pkg, output_dir, hf_model_id=model_id, ep=ep,
)
logger.info(
"MobiusModelBuilder: wrote ORT GenAI config: %s",
list(genai_artifacts.keys()),
)
Loading
Loading