Acellera
diff --git a/‎acegen/models/gru.py‎
Lines changed: 29 additions & 6 deletions b/‎acegen/models/gru.py‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎acegen/rl_env/token_env.py‎
Lines changed: 8 additions & 18 deletions b/‎acegen/rl_env/token_env.py‎
Lines changed: 8 additions & 18 deletions
diff --git a/‎acegen/rl_env/utils.py‎
Lines changed: 6 additions & 0 deletions b/‎acegen/rl_env/utils.py‎
Lines changed: 6 additions & 0 deletions
@@ -34,6 +34,22 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
         return out
 
 
+class Temperature(torch.nn.Module):
+    """Implements a temperature layer.
+
+    Simple Module that applies a temperature value to the logits for RL inference.
+
+    Args:
+        temperature (float): The temperature value.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, logits: torch.Tensor, temperature: torch.tensor) -> torch.Tensor:
+        return logits / temperature
+
+
 def create_gru_components(
     vocabulary_size: int,
     embedding_size: int = 256,
@@ -47,7 +63,7 @@ def create_gru_components(
     recurrent_state: str = "recurrent_state",
     python_based: bool = False,
 ):
-    """Create all GRU model components: embedding, GRU, and head.
+    """Create all GRU model components: embedding, GRU, head and temperature.
 
     These modules handle the case of having a time dimension (RL training)
     and not having it (RL inference).
@@ -97,8 +113,13 @@ def create_gru_components(
         in_keys=["features"],
         out_keys=[out_key],
     )
+    temperature = TensorDictModule(
+        Temperature(),
+        in_keys=[out_key, "temperature"],
+        out_keys=[out_key],
+    )
 
-    return embedding_module, gru_module, head
+    return embedding_module, gru_module, head, temperature
 
 
 def create_gru_actor(
@@ -139,7 +160,7 @@ def create_gru_actor(
     training_actor, inference_actor = create_gru_actor(10)
     ```
     """
-    embedding, gru, head = create_gru_components(
+    embedding, gru, head, temperature = create_gru_components(
         vocabulary_size,
         embedding_size,
         hidden_size,
@@ -153,7 +174,7 @@ def create_gru_actor(
         python_based,
     )
 
-    actor_inference_model = TensorDictSequential(embedding, gru, head)
+    actor_inference_model = TensorDictSequential(embedding, gru, head, temperature)
     actor_training_model = TensorDictSequential(
         embedding,
         gru.set_recurrent_mode(True),
@@ -217,7 +238,7 @@ def create_gru_critic(
     output_size = vocabulary_size if critic_value_per_action else 1
     out_key = "action_value" if critic_value_per_action else "state_value"
 
-    embedding, gru, head = create_gru_components(
+    embedding, gru, head, _ = create_gru_components(
         vocabulary_size,
         embedding_size,
         hidden_size,
@@ -281,7 +302,7 @@ def create_gru_actor_critic(
         inference_critic) = create_gru_actor_critic(10)
     ```
     """
-    embedding, gru, actor_head = create_gru_components(
+    embedding, gru, actor_head, temperature = create_gru_components(
         vocabulary_size,
         embedding_size,
         hidden_size,
@@ -295,6 +316,8 @@ def create_gru_actor_critic(
         python_based,
     )
 
+    actor_head = TensorDictSequential(actor_head, temperature)
+
     actor_head = ProbabilisticActor(
         module=actor_head,
         in_keys=["logits"],
 
@@ -2,12 +2,7 @@
 
 import torch
 from tensordict.tensordict import TensorDict, TensorDictBase
-from torchrl.data import (
-    Composite,
-    Categorical,
-    OneHotDiscreteTensorSpec,
-    Unbounded,
-)
+from torchrl.data import Categorical, Composite, OneHotDiscreteTensorSpec, Unbounded
 from torchrl.data.utils import DEVICE_TYPING
 from torchrl.envs import EnvBase
 
@@ -109,6 +104,9 @@ def __init__(
                 "terminated": torch.zeros(
                     self.num_envs, 1, device=self.device, dtype=torch.bool
                 ),
+                "temperature": torch.ones(
+                    self.num_envs, 1, device=self.device, dtype=torch.float32
+                ),
                 "sequence": self.sequence.clone(),
                 "sequence_mask": self.sequence_mask.clone(),
             },
@@ -181,9 +179,7 @@ def _set_seed(self, seed: Optional[int] = -1) -> None:
 
     def _set_specs(self) -> None:
         obs_spec = (
-            OneHotDiscreteTensorSpec
-            if self.one_hot_obs_encoding
-            else Categorical
+            OneHotDiscreteTensorSpec if self.one_hot_obs_encoding else Categorical
         )
         self.observation_spec = Composite(
             {
@@ -220,9 +216,7 @@ def _set_specs(self) -> None:
             }
         ).expand(self.num_envs)
         action_spec = (
-            OneHotDiscreteTensorSpec
-            if self.one_hot_action_encoding
-            else Categorical
+            OneHotDiscreteTensorSpec if self.one_hot_action_encoding else Categorical
         )
         self.action_spec = Composite(
             {
@@ -246,12 +240,8 @@ def _set_specs(self) -> None:
         self.done_spec = (
             Composite(
                 {
-                    "done": Categorical(
-                        n=2, dtype=torch.bool, device=self.device
-                    ),
-                    "truncated": Categorical(
-                        n=2, dtype=torch.bool, device=self.device
-                    ),
+                    "done": Categorical(n=2, dtype=torch.bool, device=self.device),
+                    "truncated": Categorical(n=2, dtype=torch.bool, device=self.device),
                     "terminated": Categorical(
                         n=2, dtype=torch.bool, device=self.device
                     ),
 
@@ -37,6 +37,7 @@ def generate_complete_smiles(
     prompt: Union[str, list] = None,
     end_of_episode_key: str = "done",
     exploration_type: ExplorationType = ExplorationType.RANDOM,
+    temperature: float | torch.Tensor = 1.0,
     promptsmiles: str = None,
     promptsmiles_optimize: bool = True,
     promptsmiles_shuffle: bool = True,
@@ -68,6 +69,7 @@ def generate_complete_smiles(
             indicates the end of an episode. Defaults to "done".
         exploration_type (ExplorationType, optional): Exploration type to use. Defaults to
             :class:`~torchrl.envs.utils.ExplorationType.RANDOM`.
+        temperature (float, optional): Temperature to use when sampling actions from the policy.
         promptsmiles (str, optional): SMILES string of scaffold with attachment points or fragments seperated
             by "." with one attachment point each.
         promptsmiles_optimize (bool, optional): Optimize the prompt for the model being used.
@@ -335,6 +337,7 @@ def generate_complete_smiles(
 
         initial_observation = initial_observation.to(policy_device)
         tensordict_ = initial_observation
+        initial_temperature = tensordict_["temperature"].clone()
         finished = (
             torch.zeros(batch_size, dtype=torch.bool).unsqueeze(-1).to(policy_device)
         )
@@ -352,6 +355,9 @@ def generate_complete_smiles(
                     if prompt:
                         enforce_mask = enc_prompts[:, _] != vocabulary.end_token_index
 
+                    # Define temperature tensor
+                    tensordict_.set("temperature", initial_temperature * temperature)
+
                     # Execute policy
                     tensordict_ = tensordict_.to(policy_device)
                     policy_sample(tensordict_)