icenet-mp/icenet_mp/callbacks/ema_weight_averaging_callback.py at 4dbf8d76bc56d0585f7f1efb1699f17f5e3e5cd2 · alan-turing-institute/icenet-mp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from typing import Any

from lightning.pytorch import LightningModule, Trainer
from lightning.pytorch.callbacks import WeightAveraging
from torch.optim.swa_utils import get_ema_multi_avg_fn


class EMAWeightAveragingCallback(WeightAveraging):
    """A callback that updates an averaged model for Exponential Moving Average (EMA) after each training step."""

    def __init__(
        self,
        *,
        decay_rate: float,
        every_n_epochs: int | None = None,
        every_n_steps: int | None = None,
    ) -> None:
        """Summarise metrics during evaluation.

        Args:
            decay_rate: Parameter update decay rate.
            every_n_epochs: How many epochs to wait before updating.
            every_n_steps: How many steps to wait before updating.

        """
        super().__init__(
            multi_avg_fn=get_ema_multi_avg_fn(decay_rate), use_buffers=True
        )
        self.every_n_epochs = every_n_epochs
        self.every_n_steps = every_n_steps

    def on_train_batch_end(
        self, trainer: Trainer, pl_module: LightningModule, *args: Any, **kwargs: Any
    ) -> None:
        """Ignore the update if the module has no parameters."""
        if next(pl_module.parameters(), None) is not None:
            super().on_train_batch_end(trainer, pl_module, *args, **kwargs)

    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
        """Ignore the update if the module has no parameters."""
        if next(pl_module.parameters(), None) is not None:
            super().on_train_epoch_end(trainer, pl_module)

    def should_update(
        self, step_idx: int | None = None, epoch_idx: int | None = None
    ) -> bool:
        """Update if we are at the requested number of steps or epochs."""
        if self.every_n_epochs and epoch_idx:
            return epoch_idx % self.every_n_epochs == 0

        if self.every_n_steps and step_idx:
            return step_idx % self.every_n_steps == 0

        return False