Skip to content

Commit 45657ab

Browse files
nhuetducoffeM
authored andcommitted
Fix BatchNormalization
- layer_pos and layer_neg must not take into account the bias (already added in forward_ibp_propagate()) => center and add_moving_mean default to False in BatchNormalizationKernelConstraint => we update the docstring for layer_neg and layer_pos in DecomonLayer to make it explicit - if scale = False, we set layer_neg to 0 (as gamma = None but should be considered to 1. so only layer_pos has to be considered) - in tests, when randomizing layer weights, we need to be careful not to put a negative number for BatchNormalization.moving_variance
1 parent 9f59ee9 commit 45657ab

File tree

4 files changed

+20
-10
lines changed

4 files changed

+20
-10
lines changed

src/decomon/layers/layer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,8 @@ def _forward_affine_propagate_linear(
441441
442442
Args:
443443
layer: linear layer used for standard propagation.
444-
layer_pos: Layer variant that applies positive weights only, for mixed monotonicity handling.
445-
layer_neg: Layer variant that applies negative weights only, for mixed monotonicity handling.
444+
layer_pos: Layer variant that applies positive weights only, for mixed monotonicity handling, does not apply bias.
445+
layer_neg: Layer variant that applies negative weights only, for mixed monotonicity handling, does not apply bias.
446446
layer_input_shape_wo_batchsize: Input shape of the layer, excluding the batch size.
447447
layer_output_shape_wo_batchsize: Output shape of the layer, excluding the batch size.
448448
input_affine_bounds: List of four tensors [W_lower, b_lower, W_upper, b_upper] representing

src/decomon/layers/normalization/batch_normalization.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ def __init__(self, *args: Any, **kwargs: Any):
1717
super().__init__(*args, **kwargs)
1818
# create positive and negative version
1919

20-
self.layer_pos = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.maximum, center=self.layer.center)
21-
self.layer_neg = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.minimum, center=False)
20+
self.layer_pos = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.maximum)
21+
self.layer_neg = BatchNormalizationKernelConstraint(layer=self.layer, ops=K.minimum, null_if_noscale=True)

src/decomon/layers/normalization/utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ def __init__(
1414
self,
1515
layer: BatchNormalization,
1616
ops: Callable[[Tensor, Tensor], Tensor] = K.maximum,
17-
add_moving_mean: bool = True,
18-
center: bool = True,
17+
add_moving_mean: bool = False,
18+
center: bool = False,
19+
null_if_noscale: bool = False,
1920
**kwargs: Any,
2021
):
2122
super().__init__(layer=layer, **kwargs)
23+
self.null_if_noscale = null_if_noscale
2224
self.ops = ops
2325
self.add_moving_mean = add_moving_mean
2426
self.moving_mean_ = self.layer.moving_mean
@@ -55,6 +57,10 @@ def call(self, inputs: Tensor, training: Optional[bool] = None, mask: Optional[T
5557
f"mask.shape={mask.shape}, inputs.shape={inputs.shape}"
5658
)
5759

60+
# special case: no scale => layer_neg should not exist
61+
if not self.scale and self.null_if_noscale:
62+
return K.zeros_like(inputs)
63+
5864
compute_dtype = backend.result_type(inputs.dtype, "float32")
5965
# BN is prone to overflow with float16/bfloat16 inputs, so we upcast to
6066
# float32 for the subsequent computations.

tests/test_unary_layers.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,8 @@ def data_format_kwargs(data_format):
166166
(DecomonAveragePooling2D, {}, AveragePooling2D, dict(pool_size=2)),
167167
(DecomonGlobalAveragePooling2D, {}, GlobalAveragePooling2D, data_format_kwargs),
168168
(DecomonGlobalAveragePooling2D, {}, GlobalAveragePooling2D, data_format_kwargs),
169-
# (DecomonBatchNormalization, {}, BatchNormalization, dict()), # lower_affine not ok
169+
(DecomonBatchNormalization, {}, BatchNormalization, dict()),
170+
(DecomonBatchNormalization, {}, BatchNormalization, dict(center=False, scale=False)),
170171
(DecomonConv2D, {}, Conv2D, dict(filters=2, kernel_size=2)),
171172
(DecomonDepthwiseConv2D, {}, DepthwiseConv2D, dict(kernel_size=2)),
172173
# (DecomonMax, {}, Max, dict(axis=1)), # to be fixed
@@ -233,15 +234,18 @@ def test_decomon_unary_layer(
233234
# build keras layer
234235
layer(keras_symbolic_layer_input)
235236

236-
# randomize weights between -1 and 1 => non-zero biases
237+
# randomize weights (e.g. to test non-zero biases)
237238
for w in layer.weights:
238-
w.assign(2.0 * np.random.random(w.shape) - 1.0)
239+
# positive-only weights ?
240+
if "variance" in w.name: # like BatchNormalization.moving_variance
241+
w.assign(np.random.random(w.shape) + 0.5) # between 0.5 and 1.5
242+
else:
243+
w.assign(2.0 * np.random.random(w.shape) - 1.0) # between -1 and 1
239244

240245
# init + build decomon layer
241246
output_shape = layer.output.shape[1:]
242247
model_output_shape = output_shape
243248
model_input_shape = keras_symbolic_model_input.shape[1:]
244-
245249
decomon_layer = decomon_layer_class(
246250
layer=layer,
247251
ibp=ibp,

0 commit comments

Comments
 (0)