#625 use RunningNorm instead of RunningMeanAndVar

Jan Michelfeit · Jan Michelfeit · commit 1fdfc74f3239 · 2022-11-29T17:35:12.000+01:00
diff --git a/src/imitation/policies/replay_buffer_wrapper.py b/src/imitation/policies/replay_buffer_wrapper.py
@@ -9,6 +9,7 @@
 
 from imitation.rewards.reward_function import RewardFn
 from imitation.util import util
+from imitation.util.networks import RunningNorm
 
 
 def _samples_to_reward_fn_input(
@@ -148,7 +149,7 @@ def __init__(
         self.sample_count = 0
         self.k = k
         # TODO support n_envs > 1
-        self.entropy_stats = util.RunningMeanAndVar(shape=(1,))
+        self.entropy_stats = RunningNorm(1)
         self.entropy_as_reward_samples = entropy_as_reward_samples
 
     def sample(self, *args, **kwargs):
@@ -173,10 +174,8 @@ def sample(self, *args, **kwargs):
             self.k,
         )
 
-        # Normalize to have mean of 0 and standard deviation of 1
-        self.entropy_stats.update(entropies)
-        entropies -= self.entropy_stats.running_mean
-        entropies /= self.entropy_stats.std
+        # Normalize to have mean of 0 and standard deviation of 1 according to running stats
+        entropies = self.entropy_stats.forward(entropies)
 
         entropies_th = (
             util.safe_to_tensor(entropies)
diff --git a/src/imitation/util/networks.py b/src/imitation/util/networks.py
@@ -121,12 +121,12 @@ def update_stats(self, batch: th.Tensor) -> None:
         tot_count = self.count + batch_count
         self.running_mean += delta * batch_count / tot_count
 
-        self.running_var *= self.count
-        self.running_var += batch_var * batch_count
-        self.running_var += th.square(delta) * self.count * batch_count / tot_count
-        self.running_var /= tot_count
+        m_a = self.running_var * self.count
+        m_b = batch_var * batch_count
+        M2 = m_a + m_b + th.square(delta) * self.count * batch_count / tot_count
+        self.running_var = M2 / tot_count
 
-        self.count += batch_count
+        self.count = tot_count
 
 
 class EMANorm(BaseNorm):
diff --git a/src/imitation/util/util.py b/src/imitation/util/util.py
@@ -361,46 +361,6 @@ def get_first_iter_element(iterable: Iterable[T]) -> Tuple[T, Iterable[T]]:
     return first_element, return_iterable
 
 
-class RunningMeanAndVar:
-    """Stores a running mean and variance using Wellford's algorithm."""
-
-    def __init__(
-        self,
-        shape: Tuple[int, ...] = (),
-        device: Optional[str] = None,
-    ) -> None:
-        """Initialize blank mean, variance, count."""
-        self.running_mean = th.zeros(shape, device=device)
-        self.M2 = th.zeros(shape, device=device)
-        self.count = 0
-
-    def update(self, batch: th.Tensor) -> None:
-        """Update the mean and variance with a batch `x`."""
-        with th.no_grad():
-            batch_mean = th.mean(batch, dim=0)
-            batch_var = th.var(batch, dim=0, unbiased=False)
-            batch_count = batch.shape[0]
-
-            delta = batch_mean - self.running_mean
-            tot_count = self.count + batch_count
-            self.running_mean += delta * batch_count / tot_count
-
-            self.M2 += batch_var * batch_count
-            self.M2 += th.square(delta) * self.count * batch_count / tot_count
-
-            self.count += batch_count
-
-    @property
-    def var(self) -> th.Tensor:
-        """Returns the unbiased estimate of the variances."""
-        return self.M2 / (self.count - 1)
-
-    @property
-    def std(self) -> th.Tensor:
-        """Returns the unbiased estimate of the standard deviations."""
-        return np.sqrt(self.var)
-
-
 def compute_state_entropy(
     obs: th.Tensor,
     all_obs: th.Tensor,
diff --git a/tests/util/test_util.py b/tests/util/test_util.py
@@ -120,40 +120,6 @@ def test_tensor_iter_norm():
         util.tensor_iter_norm(tensor_list, ord=0.0)
 
 
-def test_RunningMeanAndVar():
-    running_stats = util.RunningMeanAndVar(shape=(3, 4))
-    data = th.normal(mean=10 * th.ones(size=(20, 3, 4), dtype=th.double))
-
-    first_half = data[:10]
-    running_stats.update(first_half)
-    np.testing.assert_allclose(
-        running_stats.running_mean,
-        first_half.mean(dim=0),
-        atol=1e-5,
-        rtol=1e-4,
-    )
-    np.testing.assert_allclose(
-        running_stats.var,
-        first_half.var(dim=0),
-        atol=1e-5,
-        rtol=1e-4,
-    )
-
-    running_stats.update(data[10:])
-    np.testing.assert_allclose(
-        running_stats.running_mean,
-        data.mean(dim=0),
-        atol=1e-5,
-        rtol=1e-4,
-    )
-    np.testing.assert_allclose(
-        running_stats.var,
-        data.var(dim=0),
-        atol=1e-5,
-        rtol=1e-4,
-    )
-
-
 def test_compute_state_entropy_1d():
     all_obs = th.arange(10, dtype=th.float).unsqueeze(1)
     obs = all_obs[4:6]