mixed precision einsum fix for torch + fixed tf/jax tests

JyotinderSingh · JyotinderSingh · commit 3a312396568c · 2025-12-08T10:14:20.000+05:30
diff --git a/keras/src/layers/core/einsum_dense.py b/keras/src/layers/core/einsum_dense.py
@@ -6,6 +6,7 @@
 import numpy as np
 
 from keras.src import activations
+from keras.src import backend
 from keras.src import constraints
 from keras.src import dtype_policies
 from keras.src import initializers
@@ -741,12 +742,27 @@ def grad_fn(*args, upstream=None):
                 inputs_scale = self._adjust_scale_for_quant(
                     inputs_scale, "input"
                 )
+                x = ops.einsum(self.equation, inputs, kernel)
+                # De-scale outputs
+                x = ops.cast(x, self.compute_dtype)
+                x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
             else:
-                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
-            x = ops.einsum(self.equation, inputs, kernel)
-            # De-scale outputs
-            x = ops.cast(x, self.compute_dtype)
-            x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
+                # Weight-only quantization: dequantize kernel and use float
+                # einsum. This is a workaround for PyTorch's einsum which
+                # doesn't support mixed-precision inputs (float input,
+                # int8 kernel).
+                if backend.backend() == "torch":
+                    kernel_scale = self._adjust_scale_for_dequant(kernel_scale)
+                    float_kernel = ops.divide(
+                        ops.cast(kernel, dtype=self.compute_dtype),
+                        kernel_scale,
+                    )
+                    x = ops.einsum(self.equation, inputs, float_kernel)
+                else:
+                    x = ops.einsum(self.equation, inputs, kernel)
+                    # De-scale outputs
+                    x = ops.cast(x, self.compute_dtype)
+                    x = ops.divide(x, kernel_scale)
             return x, grad_fn
 
         x = einsum_with_inputs_gradient(
@@ -823,16 +839,29 @@ def grad_fn(*args, upstream=None):
                 inputs_scale = self._adjust_scale_for_quant(
                     inputs_scale, "input"
                 )
+                x = ops.einsum(self.equation, inputs_q, unpacked_kernel)
+                # De-scale outputs
+                x = ops.cast(x, self.compute_dtype)
+                x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
             else:
-                inputs_q = inputs
-                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
-
-            # Compute einsum on quantized inputs and unpacked int4 kernel.
-            x = ops.einsum(self.equation, inputs_q, unpacked_kernel)
-
-            # De-scale outputs.
-            x = ops.cast(x, self.compute_dtype)
-            x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
+                # Weight-only quantization: dequantize kernel and use float
+                # einsum. This is a workaround for PyTorch's einsum which
+                # doesn't support mixed-precision inputs (float input,
+                # int4 kernel).
+                if backend.backend() == "torch":
+                    # Align `kernel_scale` to the same layout as
+                    # `unpacked_kernel`.
+                    kernel_scale = self._adjust_scale_for_dequant(kernel_scale)
+                    float_kernel = ops.divide(
+                        ops.cast(unpacked_kernel, dtype=self.compute_dtype),
+                        kernel_scale,
+                    )
+                    x = ops.einsum(self.equation, inputs, float_kernel)
+                else:
+                    x = ops.einsum(self.equation, inputs, unpacked_kernel)
+                    # De-scale outputs
+                    x = ops.cast(x, self.compute_dtype)
+                    x = ops.divide(x, kernel_scale)
             return x, grad_fn
 
         x = einsum_with_inputs_gradient(
diff --git a/keras/src/layers/core/einsum_dense_test.py b/keras/src/layers/core/einsum_dense_test.py
@@ -31,6 +31,12 @@ class EinsumDenseTest(testing.TestCase):
             {"axis": -1},
         ),
         ("int8_weight_only", "int8", {"axis": 0}, None),
+        (
+            "int4_weight_only",
+            "int4",
+            {"axis": 0, "value_range": (-8, 7), "output_dtype": "int8"},
+            None,
+        ),
     )
     def test_einsum_dense_quantize(
         self, mode, weight_quantizer_args, activation_quantizer_args
diff --git a/keras/src/quantizers/gptq_test.py b/keras/src/quantizers/gptq_test.py
@@ -625,14 +625,16 @@ def test_quantize_gptq_combinations(self, dataset, config):
             "mode": "gptq",
             "config": None,
             "expected_exception": ValueError,
-            "error_msg": "Mode 'gptq' requires a valid `config`",
+            "error_msg": "For GPTQ, you must pass a GPTQConfig "
+            "object explicitly.",
         },
         {
             "testcase_name": "gptq_with_base_quantization_config",
             "mode": "gptq",
             "config": QuantizationConfig(),
-            "expected_exception": ValueError,
-            "error_msg": "Mode 'gptq' requires a valid `config`",
+            "expected_exception": NotImplementedError,
+            "error_msg": "Do not instantiate "
+            "QuantizationConfig directly.",
         },
     )
     def test_quantize_scenarios(
diff --git a/keras/src/quantizers/quantization_config.py b/keras/src/quantizers/quantization_config.py
@@ -11,7 +11,10 @@ def __init__(self, weight_quantizer=None, activation_quantizer=None):
 
     @property
     def mode(self):
-        raise NotImplementedError
+        raise NotImplementedError(
+            "Subclasses must implement this property. Do not instantiate "
+            "QuantizationConfig directly."
+        )
 
     def get_config(self):
         return {