fix gaussian policy double squashing (#155)

cpnota · web-flow · commit 2010acaab129 · 2020-07-04T14:48:10.000-04:00
* fix gaussian policy double squashing

* fix gaussian_test
diff --git a/all/policies/gaussian.py b/all/policies/gaussian.py
@@ -48,7 +48,7 @@ def __init__(self, model, space):
     def forward(self, state):
         outputs = super().forward(state)
         action_dim = outputs.shape[1] // 2
-        means = self._squash(torch.tanh(outputs[:, 0:action_dim]))
+        means = self._squash(outputs[:, 0:action_dim])
 
         if not self.training:
             return means
diff --git a/all/policies/gaussian_test.py b/all/policies/gaussian_test.py
@@ -59,10 +59,10 @@ def test_converge(self):
     def test_eval(self):
         state = State(torch.randn(1, STATE_DIM))
         dist = self.policy.no_grad(state)
-        tt.assert_almost_equal(dist.mean, torch.tensor([[-0.229, 0.43, -0.058]]), decimal=3)
+        tt.assert_almost_equal(dist.mean, torch.tensor([[-0.233, 0.459, -0.058]]), decimal=3)
         tt.assert_almost_equal(dist.entropy(), torch.tensor([4.251]), decimal=3)
         best = self.policy.eval(state)
-        tt.assert_almost_equal(best, torch.tensor([[-0.229, 0.43, -0.058]]), decimal=3)
+        tt.assert_almost_equal(best, torch.tensor([[-0.233, 0.459, -0.058]]), decimal=3)
 
 
 if __name__ == '__main__':