keras-team · danielenricocahall · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025 · limzikiki
diff --git a/keras/src/layers/preprocessing/normalization.py b/keras/src/layers/preprocessing/normalization.py
@@ -6,7 +6,9 @@
 from keras.src import ops
 from keras.src.api_export import keras_export
 from keras.src.layers.preprocessing.data_layer import DataLayer
+from keras.src.trainers.data_adapters import get_data_adapter
 from keras.src.utils.module_utils import tensorflow as tf
+from keras.utils import PyDataset
 
 
 @keras_export("keras.layers.Normalization")
@@ -229,6 +231,26 @@ def adapt(self, data):
                 # Batch dataset if it isn't batched
                 data = data.batch(128)
             input_shape = tuple(data.element_spec.shape)
+        elif isinstance(data, PyDataset):
+            # as PyDatasets returns tuples of input/annotation pairs
+            adapter = get_data_adapter(data)
+            tf_dataset = adapter.get_tf_dataset()
+            if len(tf_dataset.element_spec) == 1:
+                # just x
+                data = tf_dataset.map(lambda x: x)
+            elif len(tf_dataset.element_spec) == 2:
+                # (x, y) pairs
+                data = tf_dataset.map(lambda x, y: x)
+            elif len(tf_dataset.element_spec) == 3:
+                # (x, y, sample_weight) tuples
+                data = tf_dataset.map(lambda x, y, z: x)
+            input_shape = data.element_spec.shape
+        else:
+            raise TypeError(
+                f"Unsupported data type: {type(data)}. `adapt` supports "
+                f"`np.ndarray`, backend tensors, `tf.data.Dataset`, and "
+                f"`keras.utils.PyDataset`."
+            )
 
         if not self.built:
             self.build(input_shape)

diff --git a/keras/src/layers/preprocessing/normalization_test.py b/keras/src/layers/preprocessing/normalization_test.py
@@ -169,3 +169,35 @@ def test_normalization_with_scalar_mean_var(self):
         input_data = np.array([[1, 2, 3]], dtype="float32")
         layer = layers.Normalization(mean=3.0, variance=2.0)
         layer(input_data)
+
+    @parameterized.parameters([("x",), ("x_and_y",), ("x_y_and_weights")])
+    def test_adapt_pydataset_compat(self, pydataset_type):
+        import keras
+
+        class CustomDataset(keras.utils.PyDataset):
+            def __len__(self):
+                return 100
+
+            def __getitem__(self, idx):
+                x = np.random.rand(32, 32, 3)
+                y = np.random.randint(0, 10, size=(1,))
+                weights = np.random.randint(0, 10, size=(1,))
+                if pydataset_type == "x":
+                    return x
+                elif pydataset_type == "x_and_y":
+                    return x, y
+                elif pydataset_type == "x_y_and_weights":
+                    return x, y, weights
+                else:
+                    raise NotImplementedError(pydataset_type)
+
+        normalizer = keras.layers.Normalization()
+        normalizer.adapt(CustomDataset())
+        self.assertTrue(normalizer.built)
+        self.assertIsNotNone(normalizer.mean)
+        self.assertIsNotNone(normalizer.variance)
+        self.assertEqual(normalizer.mean.shape[-1], 3)
+        self.assertEqual(normalizer.variance.shape[-1], 3)
+        sample_input = np.random.rand(1, 32, 32, 3)
+        output = normalizer(sample_input)
+        self.assertEqual(output.shape, (1, 32, 32, 3))