From 70bc864156e12327eef979c964528e185ded4aca Mon Sep 17 00:00:00 2001
From: muupan <muupan@gmail.com>
Date: Wed, 26 May 2021 23:55:11 +0900
Subject: [PATCH 1/3] Add a test of ACER with fixed covariance, which fails for
 now

---
 tests/agents_tests/test_acer.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/agents_tests/test_acer.py b/tests/agents_tests/test_acer.py
index 25fbebc2d..9a0aacf40 100644
--- a/tests/agents_tests/test_acer.py
+++ b/tests/agents_tests/test_acer.py
@@ -15,7 +15,11 @@
 from pfrl.experiments.evaluator import run_evaluation_episodes
 from pfrl.experiments.train_agent_async import train_agent_async
 from pfrl.nn import ConcatObsAndAction
-from pfrl.policies import GaussianHeadWithDiagonalCovariance, SoftmaxCategoricalHead
+from pfrl.policies import (
+    GaussianHeadWithDiagonalCovariance,
+    GaussianHeadWithFixedCovariance,
+    SoftmaxCategoricalHead,
+)
 from pfrl.q_functions import DiscreteActionValueHead
 from pfrl.replay_buffers import EpisodicReplayBuffer
 
@@ -263,6 +267,15 @@ def test_compute_loss_with_kl_constraint_gaussian():
     _test_compute_loss_with_kl_constraint(policy)
 
 
+def test_compute_loss_with_kl_constraint_gaussian_with_fixed_covariance():
+    action_size = 3
+    policy = nn.Sequential(
+        nn.Linear(1, action_size),
+        GaussianHeadWithFixedCovariance(),
+    )
+    _test_compute_loss_with_kl_constraint(policy)
+
+
 def test_compute_loss_with_kl_constraint_softmax():
     n_actions = 3
     policy = nn.Sequential(

From 711bff6b7943974c9ce6c34955ef93291133eb44 Mon Sep 17 00:00:00 2001
From: muupan <muupan@gmail.com>
Date: Wed, 26 May 2021 23:57:07 +0900
Subject: [PATCH 2/3] Support fixed covariance by filtering out non-learnable
 params

Loss function used in the test is changed because maximizing the entropy has
no effect for Gaussian with fixed covariance
---
 pfrl/agents/acer.py             | 9 ++++++++-
 tests/agents_tests/test_acer.py | 8 +++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/pfrl/agents/acer.py b/pfrl/agents/acer.py
index 47ef07d5d..1f80997e3 100644
--- a/pfrl/agents/acer.py
+++ b/pfrl/agents/acer.py
@@ -170,12 +170,19 @@ def evaluator(action):
 
 
 def get_params_of_distribution(distrib):
+    """Returns learnable parameters of a given distribution."""
     if isinstance(distrib, torch.distributions.Independent):
         return get_params_of_distribution(distrib.base_dist)
     elif isinstance(distrib, torch.distributions.Categorical):
+        assert distrib._param.requires_grad
         return (distrib._param,)
     elif isinstance(distrib, torch.distributions.Normal):
-        return distrib.loc, distrib.scale
+        # Either loc or scale must be learnable
+        params = tuple(
+            param for param in [distrib.loc, distrib.scale] if param.requires_grad
+        )
+        assert len(params) > 0
+        return params
     else:
         raise NotImplementedError("{} is not supported by ACER".format(type(distrib)))
 
diff --git a/tests/agents_tests/test_acer.py b/tests/agents_tests/test_acer.py
index 9a0aacf40..8f9db360a 100644
--- a/tests/agents_tests/test_acer.py
+++ b/tests/agents_tests/test_acer.py
@@ -295,11 +295,13 @@ def _test_compute_loss_with_kl_constraint(base_policy):
     with torch.no_grad():
         # Compute KL divergence against the original distribution
         base_distrib = base_policy(x)
+        some_action = base_distrib.sample()
 
     def base_loss_func(distrib):
-        # Any loss that tends to increase KL divergence should be ok
-        kl = torch.distributions.kl_divergence(base_distrib, distrib)
-        return -(kl + distrib.entropy())
+        # Any loss that tends to increase KL divergence should be ok.
+        # Here I choose to minimize the log probability of some fixed action.
+        # The loss is clipped to avoid NaN.
+        return torch.max(distrib.log_prob(some_action), torch.as_tensor(-20.))
 
     def compute_kl_after_update(loss_func, n=100):
         policy = copy.deepcopy(base_policy)

From 5e99719f2dc381e1effb3d1543f1f44a90aadd94 Mon Sep 17 00:00:00 2001
From: muupan <muupan@gmail.com>
Date: Thu, 27 May 2021 00:36:12 +0900
Subject: [PATCH 3/3] Black

---
 tests/agents_tests/test_acer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/agents_tests/test_acer.py b/tests/agents_tests/test_acer.py
index 8f9db360a..721290042 100644
--- a/tests/agents_tests/test_acer.py
+++ b/tests/agents_tests/test_acer.py
@@ -301,7 +301,7 @@ def base_loss_func(distrib):
         # Any loss that tends to increase KL divergence should be ok.
         # Here I choose to minimize the log probability of some fixed action.
         # The loss is clipped to avoid NaN.
-        return torch.max(distrib.log_prob(some_action), torch.as_tensor(-20.))
+        return torch.max(distrib.log_prob(some_action), torch.as_tensor(-20.0))
 
     def compute_kl_after_update(loss_func, n=100):
         policy = copy.deepcopy(base_policy)