keras-team
diff --git a/‎keras/src/layers/core/dense.py‎
Lines changed: 59 additions & 24 deletions b/‎keras/src/layers/core/dense.py‎
Lines changed: 59 additions & 24 deletions
diff --git a/‎keras/src/layers/core/dense_test.py‎
Lines changed: 58 additions & 0 deletions b/‎keras/src/layers/core/dense_test.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎keras/src/layers/core/einsum_dense.py‎
Lines changed: 51 additions & 25 deletions b/‎keras/src/layers/core/einsum_dense.py‎
Lines changed: 51 additions & 25 deletions
@@ -9,8 +9,11 @@
 from keras.src import quantizers
 from keras.src import regularizers
 from keras.src.api_export import keras_export
+from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
 from keras.src.layers.input_spec import InputSpec
 from keras.src.layers.layer import Layer
+from keras.src.quantizers.quantization_config import QuantizationConfig
+from keras.src.quantizers.quantization_config import validate_and_resolve_config
 from keras.src.quantizers.quantizers import dequantize_with_sz_map
 
 
@@ -370,9 +373,9 @@ def variable_serialization_spec(self):
 
     def quantized_build(self, kernel_shape, mode, config=None):
         if mode == "int8":
-            self._int8_build(kernel_shape)
+            self._int8_build(kernel_shape, config)
         elif mode == "int4":
-            self._int4_build(kernel_shape)
+            self._int4_build(kernel_shape, config)
         elif mode == "float8":
             self._float8_build()
         elif mode == "gptq":
@@ -381,8 +384,14 @@ def quantized_build(self, kernel_shape, mode, config=None):
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
 
-    def _int8_build(self, kernel_shape):
-        self.inputs_quantizer = quantizers.AbsMaxQuantizer(axis=-1)
+    def _int8_build(self, kernel_shape, config=None):
+        # Per-channel int8 quantizer for the last axis (features).
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config, quantizers.AbsMaxQuantizer(axis=-1)
+            )
+        )
+
         self._kernel = self.add_weight(
             name="kernel",
             shape=kernel_shape,
@@ -481,7 +490,7 @@ def _gptq_call(self, inputs, training=False):
             y = self.activation(y)
         return y
 
-    def _int4_build(self, kernel_shape):
+    def _int4_build(self, kernel_shape, config=None):
         """Build variables for int4 quantization.
 
         `kernel_shape` is the *original* float32 kernel shape
@@ -490,8 +499,10 @@ def _int4_build(self, kernel_shape):
         int8 byte.
         """
         # Per-channel int8 quantizer for the last axis (features).
-        self.inputs_quantizer = quantizers.AbsMaxQuantizer(
-            axis=-1,
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config, quantizers.AbsMaxQuantizer(axis=-1)
+            )
         )
         input_dim, output_dim = kernel_shape
         packed_rows = (input_dim + 1) // 2  # ceil for odd dims
@@ -515,8 +526,6 @@ def _int4_build(self, kernel_shape):
         self._orig_input_dim = input_dim
 
     def _float8_build(self):
-        from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
-
         # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set
         # `amax_history_length` to its default value.
         amax_history_length = getattr(
@@ -580,7 +589,15 @@ def grad_fn(*args, upstream=None):
                 inputs_grad = ops.matmul(upstream, ops.transpose(float_kernel))
                 return (inputs_grad, None, None)
 
-            inputs, inputs_scale = self.inputs_quantizer(inputs)
+            if self.inputs_quantizer:
+                inputs, inputs_scale = self.inputs_quantizer(inputs)
+            else:
+                # Weight-only quantization: inputs are not quantized
+                # We still need inputs_scale for the formula:
+                # x = x / (inputs_scale * kernel_scale)
+                # If inputs are not quantized, inputs_scale should be 1.
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
+
             x = ops.matmul(inputs, kernel)
             # De-scale outputs
             x = ops.cast(x, self.compute_dtype)
@@ -631,7 +648,10 @@ def grad_fn(*args, upstream=None):
                 inputs_grad = ops.matmul(upstream, ops.transpose(float_kernel))
                 return (inputs_grad, None, None)
 
-            inputs, inputs_scale = self.inputs_quantizer(inputs)
+            if self.inputs_quantizer:
+                inputs, inputs_scale = self.inputs_quantizer(inputs)
+            else:
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
             x = ops.matmul(inputs, unpacked_kernel)
             x = ops.cast(x, self.compute_dtype)
             x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
@@ -746,38 +766,53 @@ def grad(*args, upstream=None, variables=None):
             x = self.activation(x)
         return x
 
-    def quantize(self, mode, type_check=True, config=None):
+    def quantize(self, mode=None, type_check=True, config=None):
         # Prevent quantization of the subclasses
         if type_check and (type(self) is not Dense):
             raise self._not_implemented_error(self.quantize)
 
+        config = validate_and_resolve_config(mode, config)
+        mode = config.mode
+
         kernel_shape = self._kernel.shape
         if mode == "int8":
-            kernel_value, kernel_scale = quantizers.abs_max_quantize(
-                self._kernel, axis=0, to_numpy=True
+            # Handle weight quantization
+            # Quantize `self._kernel` to int8 and compute corresponding scale
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                config, quantizers.AbsMaxQuantizer(axis=0)
             )
-            kernel_scale = ops.squeeze(kernel_scale, axis=0)
+            kernel_value, kernel_scale = weight_quantizer(
+                self._kernel, to_numpy=True
+            )
+
+            if len(kernel_scale.shape) > 0:
+                kernel_scale = ops.squeeze(kernel_scale, axis=0)
+
             del self._kernel
             # Build variables for int8 mode
-            self.quantized_build(kernel_shape, mode)
+            self.quantized_build(kernel_shape, mode, config)
             self._kernel.assign(kernel_value)
             self.kernel_scale.assign(kernel_scale)
         elif mode == "int4":
             # 1. Quantize to int4 values (still int8 dtype, range [-8,7])
-            kernel_value_int4, kernel_scale = quantizers.abs_max_quantize(
-                self._kernel,
-                axis=0,
-                value_range=(-8, 7),
-                dtype="int8",
-                to_numpy=True,
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                config,
+                quantizers.AbsMaxQuantizer(
+                    axis=0, value_range=(-8, 7), output_dtype="int8"
+                ),
             )
-            kernel_scale = ops.squeeze(kernel_scale, axis=0)
+            kernel_value_int4, kernel_scale = weight_quantizer(
+                self._kernel, to_numpy=True
+            )
+
+            if len(kernel_scale.shape) > 0:
+                kernel_scale = ops.squeeze(kernel_scale, axis=0)
             # 2. Pack two int4 values into a single int8 byte.
             packed_kernel_value, _, _ = quantizers.pack_int4(kernel_value_int4)
             del self._kernel
             # Build variables using the original kernel shape; _int4_build will
             # compute the packed shape internally.
-            self.quantized_build(kernel_shape, mode)
+            self.quantized_build(kernel_shape, mode, config)
             # Assign packed values.
             self._kernel.assign(packed_kernel_value)
             self.kernel_scale.assign(kernel_scale)
 
@@ -17,9 +17,67 @@
 from keras.src import testing
 from keras.src.backend.common import keras_tensor
 from keras.src.quantizers.gptq_config import GPTQConfig
+from keras.src.quantizers.quantization_config import Int4QuantizationConfig
+from keras.src.quantizers.quantization_config import Int8QuantizationConfig
+from keras.src.quantizers.quantizers import AbsMaxQuantizer
 
 
 class DenseTest(testing.TestCase):
+    @parameterized.named_parameters(
+        ("int8", "int8", {"axis": 0}, {"axis": -1}),
+        (
+            "int4",
+            "int4",
+            {"axis": 0, "value_range": (-8, 7), "output_dtype": "int8"},
+            {"axis": -1},
+        ),
+        ("int8_weight_only", "int8", {"axis": 0}, None),
+    )
+    def test_dense_quantize_config(
+        self, mode, weight_quantizer_args, activation_quantizer_args
+    ):
+        """Test Dense quantization with QuantizationConfig."""
+        layer = layers.Dense(units=32)
+        layer.build((None, 8))
+
+        weight_quantizer = AbsMaxQuantizer(**weight_quantizer_args)
+        if activation_quantizer_args is not None:
+            activation_quantizer = AbsMaxQuantizer(**activation_quantizer_args)
+        else:
+            activation_quantizer = None
+
+        if mode == "int8":
+            config = Int8QuantizationConfig(
+                weight_quantizer=weight_quantizer,
+                activation_quantizer=activation_quantizer,
+            )
+        elif mode == "int4":
+            config = Int4QuantizationConfig(
+                weight_quantizer=weight_quantizer,
+                activation_quantizer=activation_quantizer,
+            )
+
+        layer.quantize(mode, config=config)
+
+        if activation_quantizer_args is not None:
+            # Verify inputs_quantizer is set correctly
+            self.assertIsInstance(layer.inputs_quantizer, AbsMaxQuantizer)
+            self.assertEqual(layer.inputs_quantizer.axis, (-1,))
+        else:
+            # Verify inputs_quantizer is None
+            self.assertIsNone(layer.inputs_quantizer)
+
+        # Verify call works
+        x = np.random.random((2, 8)).astype("float32")
+        y = layer(x)
+        self.assertEqual(y.shape, (2, 32))
+
+        if mode == "int4":
+            # Verify kernel is int8 (packed int4)
+            self.assertEqual(
+                backend.standardize_dtype(layer._kernel.dtype), "int8"
+            )
+
     @pytest.mark.requires_trainable_backend
     def test_dense_basics(self):
         # 2D case, no bias.
 
@@ -13,8 +13,10 @@
 from keras.src import quantizers
 from keras.src import regularizers
 from keras.src.api_export import keras_export
+from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
 from keras.src.layers.input_spec import InputSpec
 from keras.src.layers.layer import Layer
+from keras.src.quantizers.quantization_config import QuantizationConfig
 from keras.src.quantizers.quantizers import dequantize_with_sz_map
 
 
@@ -444,9 +446,9 @@ def variable_serialization_spec(self):
 
     def quantized_build(self, kernel_shape, mode, config=None):
         if mode == "int8":
-            self._int8_build(kernel_shape)
+            self._int8_build(kernel_shape, config)
         elif mode == "int4":
-            self._int4_build(kernel_shape)
+            self._int4_build(kernel_shape, config)
         elif mode == "float8":
             self._float8_build()
         elif mode == "gptq":
@@ -455,10 +457,13 @@ def quantized_build(self, kernel_shape, mode, config=None):
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
 
-    def _int8_build(self, kernel_shape):
+    def _int8_build(self, kernel_shape, config=None):
         self._set_quantization_info()
-        self.inputs_quantizer = quantizers.AbsMaxQuantizer(
-            axis=self._input_reduced_axes
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config,
+                quantizers.AbsMaxQuantizer(axis=self._input_reduced_axes),
+            )
         )
         self._kernel = self.add_weight(
             name="kernel",
@@ -591,7 +596,7 @@ def _gptq_call(self, inputs, training=False):
             y = self.activation(y)
         return y
 
-    def _int4_build(self, kernel_shape):
+    def _int4_build(self, kernel_shape, config=None):
         """Build variables for int4 quantization.
 
         The packed int4 kernel stores two int4 values within a single int8
@@ -603,8 +608,11 @@ def _int4_build(self, kernel_shape):
         self._set_quantization_info()
 
         # Quantizer for the inputs (per the reduced axes)
-        self.inputs_quantizer = quantizers.AbsMaxQuantizer(
-            axis=self._input_reduced_axes
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config,
+                quantizers.AbsMaxQuantizer(axis=self._input_reduced_axes),
+            )
         )
 
         # Choose the axis to perform int4 packing - use the first reduced axis
@@ -643,8 +651,6 @@ def _int4_build(self, kernel_shape):
         )
 
     def _float8_build(self):
-        from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
-
         # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set
         # `amax_history_length` to its default value.
         amax_history_length = getattr(
@@ -727,10 +733,15 @@ def grad_fn(*args, upstream=None):
                 )
                 return (inputs_grad, None, None)
 
-            inputs, inputs_scale = self.inputs_quantizer(inputs)
+            if self.inputs_quantizer:
+                inputs, inputs_scale = self.inputs_quantizer(inputs)
+                # Deal with `inputs_scale`
+                inputs_scale = self._adjust_scale_for_quant(
+                    inputs_scale, "input"
+                )
+            else:
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
             x = ops.einsum(self.equation, inputs, kernel)
-            # Deal with `inputs_scale`
-            inputs_scale = self._adjust_scale_for_quant(inputs_scale, "input")
             # De-scale outputs
             x = ops.cast(x, self.compute_dtype)
             x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
@@ -803,14 +814,20 @@ def grad_fn(*args, upstream=None):
                 return (inputs_grad, None, None)
 
             # Quantize inputs per `self.inputs_quantizer`.
-            inputs_q, inputs_scale = self.inputs_quantizer(inputs)
+            if self.inputs_quantizer:
+                inputs_q, inputs_scale = self.inputs_quantizer(inputs)
+                # Align `inputs_scale` axes with the output
+                # for correct broadcasting
+                inputs_scale = self._adjust_scale_for_quant(
+                    inputs_scale, "input"
+                )
+            else:
+                inputs_q = inputs
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
 
             # Compute einsum on quantized inputs and unpacked int4 kernel.
             x = ops.einsum(self.equation, inputs_q, unpacked_kernel)
 
-            # Align `inputs_scale` axes with the output for correct broadcasting
-            inputs_scale = self._adjust_scale_for_quant(inputs_scale, "input")
-
             # De-scale outputs.
             x = ops.cast(x, self.compute_dtype)
             x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
@@ -933,24 +950,33 @@ def quantize(self, mode, type_check=True, config=None):
             raise self._not_implemented_error(self.quantize)
 
         kernel_shape = self._kernel.shape
+
         if mode in ("int8", "int4", "gptq"):
             self._set_quantization_info()
 
         if mode == "int8":
             # Quantize `self._kernel` to int8 and compute corresponding scale
-            kernel_value, kernel_scale = quantizers.abs_max_quantize(
-                self._kernel, axis=self._kernel_reduced_axes, to_numpy=True
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                config,
+                quantizers.AbsMaxQuantizer(axis=self._kernel_reduced_axes),
+            )
+            kernel_value, kernel_scale = weight_quantizer(
+                self._kernel, to_numpy=True
             )
             kernel_scale = self._adjust_scale_for_quant(kernel_scale, "kernel")
             del self._kernel
         elif mode == "int4":
             # Quantize to int4 values (stored in int8 dtype, range [-8, 7])
-            kernel_value_int4, kernel_scale = quantizers.abs_max_quantize(
-                self._kernel,
-                axis=self._kernel_reduced_axes,
-                value_range=(-8, 7),
-                dtype="int8",
-                to_numpy=True,
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                config,
+                quantizers.AbsMaxQuantizer(
+                    axis=self._kernel_reduced_axes,
+                    value_range=(-8, 7),
+                    output_dtype="int8",
+                ),
+            )
+            kernel_value_int4, kernel_scale = weight_quantizer(
+                self._kernel, to_numpy=True
             )
             kernel_scale = self._adjust_scale_for_quant(kernel_scale, "kernel")