Fix BatchNormalization

nhuet · ducoffeM · commit 45657abeb29b · 2025-09-09T10:45:22.000+02:00
- layer_pos and layer_neg must not take into account the bias (already
  added in forward_ibp_propagate())
  =&gt; center and add_moving_mean
     default to False in BatchNormalizationKernelConstraint
  =&gt; we update the docstring for layer_neg and layer_pos in DecomonLayer to
     make it explicit
- if scale = False, we set layer_neg to 0 (as gamma = None but should be
  considered to 1. so only layer_pos has to be considered)
- in tests, when randomizing layer weights, we need to be careful not to
  put a negative number for BatchNormalization.moving_variance
diff --git a/src/decomon/layers/layer.py b/src/decomon/layers/layer.py
@@ -441,8 +441,8 @@ def _forward_affine_propagate_linear(
 
         Args:
             layer: linear layer used for standard propagation.
-            layer_pos: Layer variant that applies positive weights only, for mixed monotonicity handling.
-            layer_neg: Layer variant that applies negative weights only, for mixed monotonicity handling.
+            layer_pos: Layer variant that applies positive weights only, for mixed monotonicity handling, does not apply bias.
+            layer_neg: Layer variant that applies negative weights only, for mixed monotonicity handling, does not apply bias.
             layer_input_shape_wo_batchsize: Input shape of the layer, excluding the batch size.
             layer_output_shape_wo_batchsize: Output shape of the layer, excluding the batch size.
             input_affine_bounds: List of four tensors [W_lower, b_lower, W_upper, b_upper] representing
diff --git a/src/decomon/layers/normalization/batch_normalization.py b/src/decomon/layers/normalization/batch_normalization.py
@@ -17,5 +17,5 @@ def __init__(self, *args: Any, **kwargs: Any):
         super().__init__(*args, **kwargs)
         # create positive and negative version
 
-        self.layer_pos = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.maximum, center=self.layer.center)
-        self.layer_neg = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.minimum, center=False)
+        self.layer_pos = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.maximum)
+        self.layer_neg = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.minimum, null_if_noscale=True)
diff --git a/src/decomon/layers/normalization/utils.py b/src/decomon/layers/normalization/utils.py
@@ -14,11 +14,13 @@ def __init__(
         self,
         layer: BatchNormalization,
         ops: Callable[[Tensor, Tensor], Tensor] = K.maximum,
-        add_moving_mean: bool = True,
-        center: bool = True,
+        add_moving_mean: bool = False,
+        center: bool = False,
+        null_if_noscale: bool = False,
         **kwargs: Any,
     ):
         super().__init__(layer=layer, **kwargs)
+        self.null_if_noscale = null_if_noscale
         self.ops = ops
         self.add_moving_mean = add_moving_mean
         self.moving_mean_ = self.layer.moving_mean
@@ -55,6 +57,10 @@ def call(self, inputs: Tensor, training: Optional[bool] = None, mask: Optional[T
                     f"mask.shape={mask.shape}, inputs.shape={inputs.shape}"
                 )
 
+        # special case: no scale => layer_neg should not exist
+        if not self.scale and self.null_if_noscale:
+            return K.zeros_like(inputs)
+
         compute_dtype = backend.result_type(inputs.dtype, "float32")
         # BN is prone to overflow with float16/bfloat16 inputs, so we upcast to
         # float32 for the subsequent computations.
diff --git a/tests/test_unary_layers.py b/tests/test_unary_layers.py
@@ -166,7 +166,8 @@ def data_format_kwargs(data_format):
         (DecomonAveragePooling2D, {}, AveragePooling2D, dict(pool_size=2)),
         (DecomonGlobalAveragePooling2D, {}, GlobalAveragePooling2D, data_format_kwargs),
         (DecomonGlobalAveragePooling2D, {}, GlobalAveragePooling2D, data_format_kwargs),
-        # (DecomonBatchNormalization, {}, BatchNormalization, dict()),  # lower_affine not ok
+        (DecomonBatchNormalization, {}, BatchNormalization, dict()),
+        (DecomonBatchNormalization, {}, BatchNormalization, dict(center=False, scale=False)),
         (DecomonConv2D, {}, Conv2D, dict(filters=2, kernel_size=2)),
         (DecomonDepthwiseConv2D, {}, DepthwiseConv2D, dict(kernel_size=2)),
         # (DecomonMax, {}, Max, dict(axis=1)),  # to be fixed
@@ -233,15 +234,18 @@ def test_decomon_unary_layer(
     # build keras layer
     layer(keras_symbolic_layer_input)
 
-    # randomize weights between -1 and 1 => non-zero biases
+    # randomize weights (e.g. to test non-zero biases)
     for w in layer.weights:
-        w.assign(2.0 * np.random.random(w.shape) - 1.0)
+        # positive-only weights ?
+        if "variance" in w.name:  # like BatchNormalization.moving_variance
+            w.assign(np.random.random(w.shape) + 0.5)  # between 0.5 and 1.5
+        else:
+            w.assign(2.0 * np.random.random(w.shape) - 1.0)  # between -1 and 1
 
     # init + build decomon layer
     output_shape = layer.output.shape[1:]
     model_output_shape = output_shape
     model_input_shape = keras_symbolic_model_input.shape[1:]
-
     decomon_layer = decomon_layer_class(
         layer=layer,
         ibp=ibp,