Support the complex64 data type (#660)

danieldk · web-flow · commit 547d0a7019b9 · 2025-10-28T17:56:49.000+01:00
* Support the complex64 data type

* Add more tests, paddle support

* Work around an old Torch bug where complex &lt;- scalar segfaults

* Fix big endian lookup

* Complex64 -&gt; C64

* More Complex64 -&gt; C64

* Fixes
diff --git a/bindings/python/py_src/safetensors/numpy.py b/bindings/python/py_src/safetensors/numpy.py
@@ -154,6 +154,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
     "I8": np.int8,
     "U8": np.uint8,
     "BOOL": bool,
+    "C64": np.complex64,
 }
 
 
diff --git a/bindings/python/py_src/safetensors/paddle.py b/bindings/python/py_src/safetensors/paddle.py
@@ -168,6 +168,7 @@ def _paddle2np(paddle_dict: Dict[str, paddle.Tensor]) -> Dict[str, np.array]:
     paddle.float64: 8,
     paddle.float8_e4m3fn: 1,
     paddle.float8_e5m2: 1,
+    paddle.complex64: 8,
     # XXX: These are not supported yet in paddle
     # paddle.uint64: 8,
     # paddle.uint32: 4,
diff --git a/bindings/python/py_src/safetensors/torch.py b/bindings/python/py_src/safetensors/torch.py
@@ -383,6 +383,7 @@ def load(data: bytes) -> Dict[str, torch.Tensor]:
     torch.int8: 1,
     torch.bool: 1,
     torch.float64: 8,
+    torch.complex64: 8,
     _float8_e4m3fn: 1,
     _float8_e5m2: 1,
     _float8_e8m0: 1,
@@ -410,6 +411,7 @@ def load(data: bytes) -> Dict[str, torch.Tensor]:
     "BOOL": torch.bool,
     "F8_E4M3": _float8_e4m3fn,
     "F8_E5M2": _float8_e5m2,
+    "C64": torch.complex64,
 }
 if Version(torch.__version__) >= Version("2.3.0"):
     _TYPES.update(
@@ -493,6 +495,7 @@ def _tobytes(tensor: torch.Tensor, name: str) -> bytes:
             # XXX: This is ok because both have the same width and byteswap is a no-op anyway
             _float8_e4m3fn: np.uint8,
             _float8_e5m2: np.uint8,
+            torch.complex64: np.complex64,
         }
         npdtype = NPDTYPES[tensor.dtype]
         # Not in place as that would potentially modify a live running model
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
@@ -84,6 +84,7 @@ fn prepare(tensor_dict: HashMap<String, PyBound<PyDict>>) -> PyResult<HashMap<St
             "float8_e5m2" => Dtype::F8_E5M2,
             "float8_e8m0fnu" => Dtype::F8_E8M0,
             "float4_e2m1fn_x2" => Dtype::F4,
+            "complex64" => Dtype::C64,
             dtype_str => {
                 return Err(SafetensorError::new_err(format!(
                     "dtype {dtype_str} is not covered",
@@ -1467,6 +1468,7 @@ fn get_pydtype(module: &PyBound<'_, PyModule>, dtype: Dtype, is_numpy: bool) ->
             Dtype::F8_E5M2 => module.getattr(intern!(py, "float8_e5m2"))?.into(),
             Dtype::F8_E8M0 => module.getattr(intern!(py, "float8_e8m0fnu"))?.into(),
             Dtype::F4 => module.getattr(intern!(py, "float4_e2m1fn_x2"))?.into(),
+            Dtype::C64 => module.getattr(intern!(py, "complex64"))?.into(),
             dtype => {
                 return Err(SafetensorError::new_err(format!(
                     "Dtype not understood: {dtype}"
diff --git a/bindings/python/src/view.rs b/bindings/python/src/view.rs
@@ -92,6 +92,7 @@ pub fn prepare(tensor_dict: HashMap<String, PyBound<PyDict>>) -> PyResult<HashMa
             "float8_e5m2" => Dtype::F8_E5M2,
             "float8_e8m0fnu" => Dtype::E8M0,
             "float4_e2m1fn_x2" => Dtype::F4,
+            "complex64" => Dtype::C64,
             dtype_str => {
                 return Err(SafetensorError::new_err(format!(
                     "dtype {dtype_str} is not covered",
diff --git a/bindings/python/tests/test_flax_comparison.py b/bindings/python/tests/test_flax_comparison.py
@@ -22,6 +22,7 @@ def setUp(self):
             "test": random.normal(key, (1024, 1024), dtype=jnp.float32),
             "test2": random.normal(key, (1024, 1024), dtype=jnp.float16),
             "test3": random.normal(key, (1024, 1024), dtype=jnp.bfloat16),
+            "test4": random.normal(key, (1024, 1024), dtype=jnp.complex64),
         }
         self.flax_filename = "./tests/data/flax_load.msgpack"
         self.sf_filename = "./tests/data/flax_load.safetensors"
diff --git a/bindings/python/tests/test_mlx_comparison.py b/bindings/python/tests/test_mlx_comparison.py
@@ -26,6 +26,9 @@ def setUp(self):
             "test": mx.random.uniform(shape=(1024, 1024), dtype=mx.float32),
             "test2": mx.random.uniform(shape=(1024, 1024), dtype=mx.float32),
             "test3": mx.random.uniform(shape=(1024, 1024), dtype=mx.float32),
+            "test4": mx.random.uniform(shape=(1024, 1024), dtype=mx.float32).astype(
+                mx.complex64
+            ),
             # This doesn't work because bfloat16 is not implemented
             # with similar workarounds as jax/tensorflow.
             # https://github.com/ml-explore/mlx/issues/1296
diff --git a/bindings/python/tests/test_paddle_comparison.py b/bindings/python/tests/test_paddle_comparison.py
@@ -20,6 +20,7 @@ def setUp(self):
             "test": paddle.zeros((1024, 1024), dtype=paddle.float32),
             "test2": paddle.zeros((1024, 1024), dtype=paddle.float32),
             "test3": paddle.zeros((1024, 1024), dtype=paddle.float32),
+            "test4": paddle.zeros((1024, 1024), dtype=paddle.complex64),
         }
         self.paddle_filename = "./tests/data/paddle_load.pdparams"
         self.sf_filename = "./tests/data/paddle_load.safetensors"
diff --git a/bindings/python/tests/test_pt_comparison.py b/bindings/python/tests/test_pt_comparison.py
@@ -64,6 +64,7 @@ def test_odd_dtype(self):
             "test": torch.randn((2, 2), dtype=torch.bfloat16),
             "test2": torch.randn((2, 2), dtype=torch.float16),
             "test3": torch.zeros((2, 2), dtype=torch.bool),
+            "test4": torch.zeros((2, 2), dtype=torch.complex64),
         }
 
         # Modify bool to have both values.
@@ -75,6 +76,31 @@ def test_odd_dtype(self):
         self.assertTrue(torch.equal(data["test"], reloaded["test"]))
         self.assertTrue(torch.equal(data["test2"], reloaded["test2"]))
         self.assertTrue(torch.equal(data["test3"], reloaded["test3"]))
+        self.assertTrue(torch.equal(data["test4"], reloaded["test4"]))
+
+    def test_complex(self):
+        # Test complex separately. Each value consists of two numbers
+        # and we want to validate that the representation is the same
+        # across platforms.
+        data = torch.zeros((2, 2), dtype=torch.complex64)
+        out = save({"test": data})
+
+        self.assertEqual(
+            out,
+            b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"C64","shape":[2,2],"data_offsets":[0,32]}}    '
+            b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+        )
+
+        real = torch.tensor([-1.0])
+        imag = torch.tensor([1.0])
+        data[1][1] = torch.complex(real, imag)
+        out = save({"test": data})
+
+        self.assertEqual(
+            out,
+            b'@\x00\x00\x00\x00\x00\x00\x00{"test":{"dtype":"C64","shape":[2,2],"data_offsets":[0,32]}}    '
+            b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\xbf\x00\x00\x80?",
+        )
 
     def test_odd_dtype_fp8(self):
         if torch.__version__ < "2.1":
diff --git a/bindings/python/tests/test_tf_comparison.py b/bindings/python/tests/test_tf_comparison.py
@@ -33,6 +33,7 @@ def setUp(self):
             "test": tf.zeros((1024, 1024), dtype=tf.float32),
             "test2": tf.zeros((1024, 1024), dtype=tf.float32),
             "test3": tf.zeros((1024, 1024), dtype=tf.float32),
+            "test4": tf.zeros((1024, 1024), dtype=tf.complex64),
         }
         self.tf_filename = "./tests/data/tf_load.h5"
         self.sf_filename = "./tests/data/tf_load.safetensors"
diff --git a/safetensors/src/tensor.rs b/safetensors/src/tensor.rs
@@ -806,6 +806,8 @@ pub enum Dtype {
     U32,
     /// Floating point (32-bit)
     F32,
+    /// Complex (32-bit parts)
+    C64,
     /// Floating point (64-bit)
     F64,
     /// Signed integer (64-bit)
@@ -837,6 +839,7 @@ impl Dtype {
             Dtype::BF16 => 16,
             Dtype::F32 => 32,
             Dtype::F64 => 64,
+            Dtype::C64 => 64,
         }
     }
     /// Gives out the size (in bytes) of 1 element of this dtype.
@@ -871,6 +874,7 @@ impl Display for Dtype {
             Dtype::BF16 => "BF16",
             Dtype::F32 => "F32",
             Dtype::F64 => "F64",
+            Dtype::C64 => "C64",
         })
     }
 }
@@ -908,6 +912,7 @@ mod tests {
             Just(Dtype::BF16),
             Just(Dtype::F32),
             Just(Dtype::F64),
+            Just(Dtype::C64),
         ]
     }
 

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,7 @@ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:`
`154`	`154`	`"I8": np.int8,`
`155`	`155`	`"U8": np.uint8,`
`156`	`156`	`"BOOL": bool,`
	`157`	`+ "C64": np.complex64,`
`157`	`158`	`}`
`158`	`159`
`159`	`160`
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ def setUp(self):`
`22`	`22`	`"test": random.normal(key, (1024, 1024), dtype=jnp.float32),`
`23`	`23`	`"test2": random.normal(key, (1024, 1024), dtype=jnp.float16),`
`24`	`24`	`"test3": random.normal(key, (1024, 1024), dtype=jnp.bfloat16),`
	`25`	`+ "test4": random.normal(key, (1024, 1024), dtype=jnp.complex64),`
`25`	`26`	`}`
`26`	`27`	`self.flax_filename = "./tests/data/flax_load.msgpack"`
`27`	`28`	`self.sf_filename = "./tests/data/flax_load.safetensors"`
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ def setUp(self):`
`20`	`20`	`"test": paddle.zeros((1024, 1024), dtype=paddle.float32),`
`21`	`21`	`"test2": paddle.zeros((1024, 1024), dtype=paddle.float32),`
`22`	`22`	`"test3": paddle.zeros((1024, 1024), dtype=paddle.float32),`
	`23`	`+ "test4": paddle.zeros((1024, 1024), dtype=paddle.complex64),`
`23`	`24`	`}`
`24`	`25`	`self.paddle_filename = "./tests/data/paddle_load.pdparams"`
`25`	`26`	`self.sf_filename = "./tests/data/paddle_load.safetensors"`
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ def setUp(self):`
`33`	`33`	`"test": tf.zeros((1024, 1024), dtype=tf.float32),`
`34`	`34`	`"test2": tf.zeros((1024, 1024), dtype=tf.float32),`
`35`	`35`	`"test3": tf.zeros((1024, 1024), dtype=tf.float32),`
	`36`	`+ "test4": tf.zeros((1024, 1024), dtype=tf.complex64),`
`36`	`37`	`}`
`37`	`38`	`self.tf_filename = "./tests/data/tf_load.h5"`
`38`	`39`	`self.sf_filename = "./tests/data/tf_load.safetensors"`