preliminary support for submodule export (#341)

xadupre · web-flow · commit f677edbd772e · 2025-12-05T18:38:57.000+01:00
* preliminiary support for submodule export

* fix mypy

* spell

* fix

* fix

* fix

* fix
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.8.4
 +++++
 
+* :pr:`341`: preliminary support to export submodule
 * :pr:`340`: supports devices in onnx plugs
 * :pr:`338`: fixes ReplayConfiguration.dump, add function to select of part of a model
 * :pr:`337`: fixes extract_subset_of_nodes
diff --git a/_doc/cmds/validate.rst b/_doc/cmds/validate.rst
@@ -124,7 +124,7 @@ of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`.
 
     main("validate -m arnir0/Tiny-LLM --run -v 1 --export onnx-dynamo -o dump_models --patch --opt ir --ortfusiontype ALL".split())
 
-Sdpa or Eager implementation or Use a StaticCache
+SDPA or Eager implementation or Use a StaticCache
 +++++++++++++++++++++++++++++++++++++++++++++++++
 
 Add ``--mop cache_implementation=static --iop cls_cache=StaticCache`` to use a StaticCache instead of a DynamicCache (default).
@@ -147,3 +147,22 @@ Add ``--mop attn_implementation=eager`` to explicitly select eager implementatio
                 --mop attn_implementation=eager \
                 --mop cache_implementation=static \
                 --iop cls_cache=StaticCache
+
+Frequent examples used to test
+++++++++++++++++++++++++++++++
+
+.. code-block:: bash
+
+    python -m onnx_diagnostic validate -m arnir0/Tiny-LLM --run -v 1 --device cuda --dtype float16 -o dump_models --patch --opt default+onnxruntime --export custom
+
+About the exporter 'custom'
++++++++++++++++++++++++++++
+
+It used to investigate issues or scenarios. It is usually very strict
+and fails every time it falls in one unexpected situation.
+It call :func:`experimental_experiment.torch_interpreter.to_onnx`.
+Some useful environment variables to set before running the command line.
+
+* ``DROPPATTERN=<pattern1,patterns2,...>``: do not apply those patterns when optimizing a model
+* ``DUMPPATTERNS=<folder>``: dumps all matched and applied nodes when a pattern is applied
+* ``PATTERN=<pattern1,pattern2,...>``: increase verbosity for specific patterns to understand why one pattern was not applied
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -47,6 +47,24 @@ def test_text_generation(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_submodule(self):
+        mid = "arnir0/Tiny-LLM::model"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-generation")
+        self.assertIn("inputs", data)
+        self.assertIn("inputs2", data)
+        self.assertIn("inputs_batch1", data)
+        self.assertIn("inputs_empty_cache", data)
+        self.assertIn((data["size"], data["n_weights"]), [(27379968, 6844992)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with torch_export_patches(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
     @hide_stdout()
     def test_text_generation_empty_cache(self):
         mid = "arnir0/Tiny-LLM"
diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py
@@ -3,6 +3,52 @@
 from .onnx_plug import EagerDirectReplacementWithOnnx
 
 
+def get_main_dispatcher(
+    use_control_flow_dispatcher: bool = False,
+    onnx_plugs: Optional[List[EagerDirectReplacementWithOnnx]] = None,
+) -> Any:  # Dispatcher
+    """Creates a custom dispatcher for the custom exporter."""
+    from experimental_experiment.torch_interpreter import Dispatcher
+
+    if use_control_flow_dispatcher:
+        from .control_flow_onnx import create_global_dispatcher
+
+        control_flow_dispatcher = create_global_dispatcher()
+    else:
+        control_flow_dispatcher = None
+
+    class MainDispatcher(Dispatcher):
+        def __init__(self, previous_dispatcher=None):
+            super().__init__({})
+            self.previous_dispatcher = previous_dispatcher
+
+        @property
+        def supported(self):
+            if self.previous_dispatcher:
+                return set(self.registered_functions) | self.previous_dispatcher.supported
+            return set(self.registered_functions)
+
+        def find_function(self, name: Any):
+            if self.previous_dispatcher:
+                find = self.previous_dispatcher.find_function(name)
+                if find:
+                    return find
+            return Dispatcher.find_function(self, name)
+
+        def find_method(self, name: Any):
+            if self.previous_dispatcher:
+                find = self.previous_dispatcher.find_method(name)
+                if find:
+                    return find
+            return Dispatcher.find_method(self, name)
+
+    main_dispatcher = MainDispatcher(control_flow_dispatcher)
+    if onnx_plugs:
+        for plug in onnx_plugs:
+            main_dispatcher.registered_functions[plug.target_name] = plug.custom_converter()
+    return main_dispatcher
+
+
 def to_onnx(
     mod: Union["torch.nn.Module", "torch.fx.GraphModule"],  # noqa: F821
     args: Optional[Sequence["torch.Tensor"]] = None,  # noqa: F821
@@ -82,51 +128,11 @@ def to_onnx(
             options = exporter_kwargs.pop("options", None)
         if options is None:
             options = OptimizationOptions(patterns="default+onnxruntime")
-        if onnx_plugs or use_control_flow_dispatcher:
-            from experimental_experiment.torch_interpreter import Dispatcher
-
-            if use_control_flow_dispatcher:
-                from .control_flow_onnx import create_global_dispatcher
-
-                control_flow_dispatcher = create_global_dispatcher()
-            else:
-                control_flow_dispatcher = None
-
-            class MainDispatcher(Dispatcher):
-                def __init__(self, previous_dispatcher=None):
-                    super().__init__({})
-                    self.previous_dispatcher = previous_dispatcher
-
-                @property
-                def supported(self):
-                    if self.previous_dispatcher:
-                        return (
-                            set(self.registered_functions) | self.previous_dispatcher.supported
-                        )
-                    return set(self.registered_functions)
-
-                def find_function(self, name: Any):
-                    if self.previous_dispatcher:
-                        find = self.previous_dispatcher.find_function(name)
-                        if find:
-                            return find
-                    return Dispatcher.find_function(self, name)
-
-                def find_method(self, name: Any):
-                    if self.previous_dispatcher:
-                        find = self.previous_dispatcher.find_method(name)
-                        if find:
-                            return find
-                    return Dispatcher.find_method(self, name)
-
-            main_dispatcher = MainDispatcher(control_flow_dispatcher)
-            if onnx_plugs:
-                for plug in onnx_plugs:
-                    main_dispatcher.registered_functions[plug.target_name] = (
-                        plug.custom_converter()
-                    )
-        else:
-            main_dispatcher = None
+        main_dispatcher = (
+            get_main_dispatcher(use_control_flow_dispatcher, onnx_plugs)
+            if onnx_plugs or use_control_flow_dispatcher
+            else None
+        )
 
         return _to_onnx(
             mod,
@@ -181,9 +187,17 @@ def find_method(self, name: Any):
             import onnx_ir as ir
             import onnx_ir.passes.common as common_passes
 
+            opset = (
+                18
+                if target_opset is None
+                else (target_opset if isinstance(target_opset, int) else target_opset[""])
+            )
+
             irfunctions = [
                 ir.from_proto(
-                    plug.get_function_proto(*flatten_object((args, kwargs), drop_keys=True))
+                    plug.get_function_proto(
+                        opset, *flatten_object((args, kwargs), drop_keys=True)
+                    )
                 )
                 for plug in onnx_plugs
             ]
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py
@@ -262,12 +262,14 @@ def qwen_version_selector(opset: int, *args: torch.Tensor) -> Tuple[str, torch.d
         itype = torch_dtype_to_onnx_dtype(dtype)
         if strategy is not None:
             return strategy, itype
-        if dtype == torch.float32:
+        if dtype == torch.float32 or itype == onnx.TensorProto.FLOAT:
             if opset >= 24:
                 return "LOOPA24", itype
             return "LOOPMHA", itype
-        if dtype == torch.float16:
-            if first_tensor.is_cuda:
+        if dtype == torch.float16 or itype == onnx.TensorProto.FLOAT16:
+            # first_tensor may be a SymbolicTensor (onnx).
+            # is_cuda is not available.
+            if hasattr(first_tensor, "is_cuda") and first_tensor.is_cuda:
                 return "PACKED", itype
             return "LOOPMHA", itype
         raise AssertionError(
@@ -638,12 +640,14 @@ def forward(
                     self.config._attn_implementation
                 ]
 
-            is_sdpa = (
+            is_sdpa_or_eager = (
                 attention_interface
                 is transformers.integrations.sdpa_attention.sdpa_attention_forward
                 or attention_interface is patched_sdpa_attention_forward
+                or attention_interface
+                is transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.eager_attention_forward
             )
-            if is_sdpa:
+            if is_sdpa_or_eager:
                 attn_output = qwen_sdpa_attention_versatile(
                     query_states,
                     key_states,
diff --git a/onnx_diagnostic/torch_models/code_sample.py b/onnx_diagnostic/torch_models/code_sample.py
@@ -236,7 +236,7 @@ def code_sample(
             )
         )
     """
-    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+    model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
@@ -256,6 +256,7 @@ def code_sample(
         model_kwargs=mop,
         subfolder=subfolder,
         add_second_input=False,
+        submodule=submodule,
     )
     if drop_inputs:
         update = {}
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -26,17 +26,26 @@ def _code_needing_rewriting(model: Any) -> Any:
 
 
 def _preprocess_model_id(
-    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
-) -> Tuple[str, Optional[str], bool, bool]:
+    model_id: str,
+    subfolder: Optional[str],
+    same_as_pretrained: bool,
+    use_pretrained: bool,
+    submodule: Optional[str] = None,
+) -> Tuple[str, Optional[str], bool, bool, Optional[str]]:
+    if "::" in model_id:
+        assert (
+            not submodule
+        ), f"submodule={submodule!r} cannot be defined in model_id={model_id!r} as well"
+        model_id, submodule = model_id.split("::", maxsplit=1)
     if subfolder or "//" not in model_id:
-        return model_id, subfolder, same_as_pretrained, use_pretrained
+        return model_id, subfolder, same_as_pretrained, use_pretrained, submodule
     spl = model_id.split("//")
     if spl[-1] == "pretrained":
-        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True, submodule)
     if spl[-1] in {"transformer", "vae"}:
         # known subfolder
-        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
-    return model_id, subfolder, same_as_pretrained, use_pretrained
+        return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained, submodule
+    return model_id, subfolder, same_as_pretrained, use_pretrained, submodule
 
 
 def get_untrained_model_with_inputs(
@@ -54,6 +63,7 @@ def get_untrained_model_with_inputs(
     subfolder: Optional[str] = None,
     use_only_preinstalled: bool = False,
     config_reduction: Optional[Callable[[Any, str], Dict]] = None,
+    submodule: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Gets a non initialized model similar to the original model
@@ -82,6 +92,7 @@ def get_untrained_model_with_inputs(
         <onnx_diagnostic.torch_models.hghub.reduce_model_config>`,
         this function takes a configuration and a task (string)
         as arguments
+    :param submodule: use a submodule instead of the main model
     :return: dictionary with a model, inputs, dynamic shapes, and the configuration,
         some necessary rewriting as well
 
@@ -108,11 +119,12 @@ def get_untrained_model_with_inputs(
         f"model_id={model_id!r}, preinstalled model is only available "
         f"if use_only_preinstalled is False."
     )
-    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+    model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
         use_pretrained=use_pretrained,
+        submodule=submodule,
     )
     if verbose:
         print(
@@ -147,6 +159,8 @@ def get_untrained_model_with_inputs(
         if verbose:
             print(f"[get_untrained_model_with_inputs] architecture={arch!r}")
             print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
+            if submodule:
+                print(f"[get_untrained_model_with_inputs] submodule={submodule!r}")
         if task is None:
             task = task_from_arch(arch, model_id=model_id, subfolder=subfolder)
         if verbose:
@@ -357,6 +371,19 @@ def get_untrained_model_with_inputs(
     if diff_config is not None:
         res["dump_info"] = dict(config_diff=diff_config)
 
+    if submodule:
+        path = submodule.split("::") if "::" in submodule else [submodule]
+        for p in path:
+            assert hasattr(model, p), (
+                f"Unable to find submodule {p!r} in in class {type(model)}, "
+                f"submodule={submodule!r}, possible candidates: "
+                f"{[k for k in dir(model) if isinstance(getattr(model, k), torch.nn.Module)]}"
+            )
+            model = getattr(model, p)
+
+    if verbose:
+        print(f"[get_untrained_model_with_inputs] model class={model.__class__.__name__!r}")
+
     sizes = compute_model_size(model)
     res["model"] = model
     res["configuration"] = config
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py

Original file line number	Diff line number	Diff line change
`@@ -236,7 +236,7 @@ def code_sample(`
`236`	`236`	`)`
`237`	`237`	`)`
`238`	`238`	`"""`
`239`		`- model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(`
	`239`	`+ model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(`
`240`	`240`	`model_id,`
`241`	`241`	`subfolder,`
`242`	`242`	`same_as_pretrained=same_as_pretrained,`
`@@ -256,6 +256,7 @@ def code_sample(`
`256`	`256`	`model_kwargs=mop,`
`257`	`257`	`subfolder=subfolder,`
`258`	`258`	`add_second_input=False,`
	`259`	`+ submodule=submodule,`
`259`	`260`	`)`
`260`	`261`	`if drop_inputs:`
`261`	`262`	`update = {}`