#625 fix entropy_reward.py

Jan Michelfeit · Jan Michelfeit · commit ec7b853cf0c9 · 2022-12-01T16:26:16.000+01:00
diff --git a/src/imitation/algorithms/pebble/entropy_reward.py b/src/imitation/algorithms/pebble/entropy_reward.py
@@ -35,13 +35,14 @@ def __call__(
 
         all_observations = self.replay_buffer_view.observations
         # ReplayBuffer sampling flattens the venv dimension, let's adapt to that
-        all_observations = all_observations.reshape((-1, *self.obs_shape))
+        all_observations = all_observations.reshape((-1, *state.shape[1:]))  # TODO #625: fix self.obs_shape
+        # TODO #625: deal with the conversion back and forth between np and torch
         entropies = util.compute_state_entropy(
-            state,
-            all_observations,
+            th.tensor(state),
+            th.tensor(all_observations),
             self.nearest_neighbor_k,
         )
-        normalized_entropies = self.entropy_stats.forward(th.as_tensor(entropies))
+        normalized_entropies = self.entropy_stats.forward(entropies)
         return normalized_entropies.numpy()
 
     def __getstate__(self):