Support saving external data as safetensors (onnx#306)

justinchuby · Copilot · web-flow · commit b1db94e57432 · 2026-01-13T09:10:46.000-08:00
Supports saving to safetensors as external data file with
`ir.save_safetensors()`. The function follows closely the standard
`ir.save` api and added **sharding support** compatible with Huggingface
Transformers convension.

Initializers in the subgraph are also handled, like ir.load.

Bug fixes:
- Fixed an error when converting an external 2bit tensor to numpy.
- Fixed an error in ir.load() where the base dir is not set for
initializers in the subgraphs.

---------

Signed-off-by: Justin Chu &lt;justinchuby@users.noreply.github.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/docs/api/core.md b/docs/api/core.md
@@ -14,6 +14,7 @@
 
     onnx_ir.load
     onnx_ir.save
+    onnx_ir.save_safetensors
     onnx_ir.from_proto
     onnx_ir.from_onnx_text
     onnx_ir.to_proto
diff --git a/noxfile.py b/noxfile.py
@@ -29,6 +29,7 @@
     "typing_extensions>=4.10",
     "ml-dtypes",
     "onnxruntime",
+    "safetensors",
 )
 ONNX = "onnx==1.18"
 ONNXSCRIPT = "onnxscript"
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -28,6 +28,7 @@ pyyaml
 torch>=2.3
 torchvision>=0.18.0
 transformers>=4.37.2
+safetensors
 
 # Lint
 lintrunner>=0.10.7
diff --git a/src/onnx_ir/__init__.py b/src/onnx_ir/__init__.py
@@ -84,6 +84,7 @@
     # IO
     "load",
     "save",
+    "save_safetensors",
     # Flags
     "DEBUG",
     # Others
@@ -156,6 +157,7 @@
     TypeProtocol,
     ValueProtocol,
 )
+from onnx_ir._safetensors import save_safetensors
 from onnx_ir.serde import TensorProtoTensor, from_onnx_text, from_proto, to_onnx_text, to_proto
 
 DEBUG = False
diff --git a/src/onnx_ir/_core.py b/src/onnx_ir/_core.py
@@ -757,7 +757,8 @@ def _load(self):
             _enums.DataType.UINT2,
         }:
             # Use uint8 to read in the full byte. Otherwise ml_dtypes.int4 will clip the values
-            dt = np.dtype(np.uint8).newbyteorder("<")
+            # No need to set endianness for uint8
+            dt = np.dtype(np.uint8)
             count = self.size // 2 + self.size % 2
         else:
             # Handle the byte order correctly by always using little endian
@@ -772,6 +773,11 @@ def _load(self):
             self._array = _type_casting.unpack_4bitx2(self._array, shape).view(
                 self.dtype.numpy()
             )
+        elif self.dtype.bitwidth == 2:
+            # Unpack the 2bit arrays
+            self._array = _type_casting.unpack_2bitx4(self._array, shape).view(
+                self.dtype.numpy()
+            )
         else:
             self._array = self._array.reshape(shape)
 
diff --git a/src/onnx_ir/_io.py b/src/onnx_ir/_io.py
@@ -106,11 +106,11 @@ def callback(tensor: ir.TensorProtocol, metadata: ir.external_data.CallbackInfo)
         base_dir = os.path.dirname(path)
 
         # Store the original initializer values so they can be restored if modify_model=False
-        initializer_values: list[_core.Value] = []
+        initialized_values: list[_core.Value] = []
         for graph in model.graphs():
             # Collect from all subgraphs as well
-            initializer_values.extend(graph.initializers.values())
-        tensors = [v.const_value for v in initializer_values]
+            initialized_values.extend(graph.initializers.values())
+        tensors = [v.const_value for v in initialized_values]
 
         try:
             model = _external_data.unload_from_model(
@@ -125,7 +125,7 @@ def callback(tensor: ir.TensorProtocol, metadata: ir.external_data.CallbackInfo)
 
         finally:
             # Restore the original initializer values so the model is unchanged
-            for initializer, tensor in zip(initializer_values, tensors, strict=True):
+            for initializer, tensor in zip(initialized_values, tensors, strict=True):
                 initializer.const_value = tensor
 
     else:
diff --git a/src/onnx_ir/_safetensors/__init__.py b/src/onnx_ir/_safetensors/__init__.py
diff --git a/src/onnx_ir/_safetensors/_safetensors_test.py b/src/onnx_ir/_safetensors/_safetensors_test.py
diff --git a/src/onnx_ir/external_data.py b/src/onnx_ir/external_data.py

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@`
`29`	`29`	`"typing_extensions>=4.10",`
`30`	`30`	`"ml-dtypes",`
`31`	`31`	`"onnxruntime",`
	`32`	`+ "safetensors",`
`32`	`33`	`)`
`33`	`34`	`ONNX = "onnx==1.18"`
`34`	`35`	`ONNXSCRIPT = "onnxscript"`