Acellera
diff --git a/‎acegen/models/common.py‎
Lines changed: 16 additions & 0 deletions b/‎acegen/models/common.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎acegen/models/gru.py‎
Lines changed: 14 additions & 19 deletions b/‎acegen/models/gru.py‎
Lines changed: 14 additions & 19 deletions
diff --git a/‎acegen/rl_env/token_env.py‎
Lines changed: 6 additions & 0 deletions b/‎acegen/rl_env/token_env.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎acegen/rl_env/utils.py‎
Lines changed: 44 additions & 9 deletions b/‎acegen/rl_env/utils.py‎
Lines changed: 44 additions & 9 deletions
diff --git a/‎acegen/vocabulary/vocabulary.py‎
Lines changed: 22 additions & 9 deletions b/‎acegen/vocabulary/vocabulary.py‎
Lines changed: 22 additions & 9 deletions
@@ -0,0 +1,16 @@
+import torch
+
+class Temperature(torch.nn.Module):
+    """Implements a temperature layer.
+
+    Simple Module that applies a temperature value to the logits for RL inference.
+
+    Args:
+        temperature (float): The temperature value.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, logits: torch.Tensor, temperature: torch.tensor) -> torch.Tensor:
+        return logits / temperature
@@ -3,7 +3,9 @@
 import torch
 from tensordict.nn import TensorDictModule, TensorDictSequential
 from torchrl.envs import ExplorationType
-from torchrl.modules import ActorValueOperator, GRUModule, MLP, ProbabilisticActor
+from torchrl.modules import ActorValueOperator, GRUModule, MLP, ProbabilisticActor, MaskedCategorical
+
+from acegen.models.common import Temperature
 
 
 class Embed(torch.nn.Module):
@@ -34,22 +36,6 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
         return out
 
 
-class Temperature(torch.nn.Module):
-    """Implements a temperature layer.
-
-    Simple Module that applies a temperature value to the logits for RL inference.
-
-    Args:
-        temperature (float): The temperature value.
-    """
-
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, logits: torch.Tensor, temperature: torch.tensor) -> torch.Tensor:
-        return logits / temperature
-
-
 def create_gru_components(
     vocabulary_size: int,
     embedding_size: int = 256,
@@ -133,6 +119,7 @@ def create_gru_actor(
     return_log_prob=True,
     in_key: str = "observation",
     out_key: str = "logits",
+    action_mask_key: str = "action_mask",
     recurrent_state: str = "recurrent_state_actor",
     python_based: bool = False,
 ):
@@ -151,6 +138,7 @@ def create_gru_actor(
             of the action.
         in_key (str): The input key name.
         out_key (str):): The output key name.
+        action_mask_key (str): The action mask key name.
         recurrent_state (str): The name of the recurrent state.
         python_based (bool): Whether to use the Python-based GRU module.
             Default is False, a CuDNN-based GRU module is used.
@@ -181,11 +169,18 @@ def create_gru_actor(
         head,
     )
 
+    if action_mask_key:
+        inf_keys = {"logits": "logits", "mask": action_mask_key}
+        inf_dist = MaskedCategorical
+    else:
+        inf_keys = ["logits"]
+        inf_dist = distribution_class
+        
     actor_inference_model = ProbabilisticActor(
         module=actor_inference_model,
-        in_keys=["logits"],
+        in_keys=inf_keys,
         out_keys=["action"],
-        distribution_class=distribution_class,
+        distribution_class=inf_dist,
         return_log_prob=return_log_prob,
         default_interaction_type=ExplorationType.RANDOM,
     )
 
@@ -91,6 +91,10 @@ def __init__(
             self.num_envs, self.max_length, device=self.device, dtype=torch.bool
         )
         self.sequence_mask[:, 0] = True
+        
+        self.action_mask = torch.ones(
+            self.num_envs, self.length_vocabulary, device=self.device, dtype=torch.bool
+        )
 
         self._reset_tensordict = TensorDict(
             {
@@ -109,6 +113,7 @@ def __init__(
                 ),
                 "sequence": self.sequence.clone(),
                 "sequence_mask": self.sequence_mask.clone(),
+                "action_mask": self.action_mask.clone()
             },
             device=self.device,
             batch_size=self.batch_size,
@@ -167,6 +172,7 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
                 "observation": obs,
                 "sequence": self.sequence.clone(),
                 "sequence_mask": self.sequence_mask.clone(),
+                "action_mask": self.action_mask.clone()
             },
             device=self.device,
             batch_size=self.batch_size,
 
@@ -1,4 +1,6 @@
+import re
 import warnings
+from pathlib import Path
 from functools import partial
 from typing import Callable, Union
 
@@ -12,6 +14,7 @@
 from torchrl.envs.utils import ExplorationType, step_mdp
 
 from acegen.data.utils import smiles_to_tensordict
+from acegen.data.smiles_dataset import load_dataset
 from acegen.vocabulary import Vocabulary
 
 try:
@@ -134,12 +137,24 @@ def generate_complete_smiles(
             vocabulary=vocabulary,
             max_length=max_length,
         )
-        # Split fragments into a list if there are multiple
-        promptsmiles = promptsmiles.split(".")
-        if len(promptsmiles) == 1:
-            promptsmiles = promptsmiles[0]
-
+        # Deduce type of prompt
         if isinstance(promptsmiles, str):
+            if Path(promptsmiles).exists():
+                promptsmiles = load_dataset(promptsmiles)
+                prompt_type = "scaffold"
+            elif "." in promptsmiles:
+                promptsmiles = promptsmiles.split(".")
+                prompt_type = "fragment"
+            else:
+                prompt_type = "scaffold"
+        elif isinstance(promptsmiles, list):
+            prompt_type = "scaffold"
+        else:
+            raise ValueError(
+                "PromptSMILES must be a string or a list of strings, or a path to a file."
+                )
+
+        if prompt_type == "scaffold":
             # We are decorating a Scaffold
             PS = ScaffoldDecorator(
                 scaffold=promptsmiles,
@@ -152,7 +167,7 @@ def generate_complete_smiles(
                 return_all=True,
             )
 
-        if isinstance(promptsmiles, list):
+        if prompt_type == "fragment": 
             # We are linking fragments
             PS = FragmentLinker(
                 fragments=promptsmiles,
@@ -297,8 +312,20 @@ def generate_complete_smiles(
 
         failed_encodings = []
         if prompt:
+            print(prompt)
             if isinstance(prompt, str):
                 prompt = [prompt] * batch_size[0]
+                
+            # Add X to vocabulary for substitution
+            vocabulary.add_characters("X")
+            free_sample_tokens = torch.tensor([vocabulary["X"], vocabulary.end_token_index]).to(policy_device)
+            
+            # Create action mask of atoms
+            atom_patt = re.compile(r"(\[[^\]]*\]|Br|Cl|[a-wyzA-WYZ])")
+            atom_tokens = torch.tensor([vocabulary[t] for t in vocabulary.chars if atom_patt.fullmatch(t)]).to(policy_device)
+            atom_mask = torch.zeros(len(vocabulary)-1, dtype=torch.bool).to(policy_device)
+            atom_mask = atom_mask.scatter(0, atom_tokens, True)
+            
             # Encode the prompt(s)
             tokens = []
             for i, smi in enumerate(prompt):
@@ -353,7 +380,12 @@ def generate_complete_smiles(
                     tensordict_.set("mask", torch.ones_like(finished))
                     tensordict_.set(("next", "mask"), torch.ones_like(finished))
                     if prompt:
-                        enforce_mask = enc_prompts[:, _] != vocabulary.end_token_index
+                        enforce_mask = ~torch.isin(
+                            enc_prompts[:, _], free_sample_tokens
+                            )
+                        # Apply atom mask if prompt is X
+                        if any(enc_prompts[:, _] == vocabulary["X"]):
+                            tensordict_["action_mask"][enc_prompts[:, _] == vocabulary["X"]] = atom_mask
 
                     # Define temperature tensor
                     tensordict_.set("temperature", initial_temperature * temperature)
@@ -365,17 +397,20 @@ def generate_complete_smiles(
 
                     # Enforce prompt
                     if prompt:
-                        new_action = (~enforce_mask * tensordict_.get("action")) + (
+                        prompt_action = (~enforce_mask * tensordict_.get("action")) + (
                             enforce_mask * enc_prompts[:, _]
                         ).long()
-                        tensordict_.set("action", new_action)
+                        tensordict_.set("action", prompt_action)
 
                     # Step forward in the environment
                     tensordict_ = environment.step(tensordict_)
 
                     # Mask out finished environments
                     if finished.any():
                         tensordict_.masked_fill_(finished.squeeze(), 0)
+                        # Don't fill action_mask
+                        tensordict_["action_mask"][finished.squeeze()] = 1
+                        tensordict_["next"]["action_mask"][finished.squeeze()] = 1
 
                     # Extend list of tensordicts
                     tensordicts.append(tensordict_.clone())
 
@@ -44,7 +44,7 @@ def __init__(
         self.special_tokens = [end_token, start_token]
         self.special_tokens += list(set(special_tokens))
         self.additional_chars = set()
-        self.chars = self.special_tokens
+        self.chars = deepcopy(self.special_tokens)
         self.vocab_size = len(self.chars)
         self.vocab = dict(zip(self.chars, range(len(self.chars))))
         self.reversed_vocab = {v: k for k, v in self.vocab.items()}
@@ -104,26 +104,39 @@ def decode(self, encoded_string, ignore_indices=()):
         return string
 
     def add_characters(self, chars):
-        """Adds characters to the vocabulary.
+        """Adds characters to the end of the vocabulary.
 
         Args:
             chars (list[str]): A list of characters to add to the vocabulary.
         """
+        additional_chars = set()
         for char in chars:
             if char not in self.chars:
-                self.additional_chars.add(char)
-        char_list = list(self.additional_chars)
-        char_list.sort()
-        self.chars = self.special_tokens + char_list
-        self.vocab_size = len(self.chars)
-        self.vocab = dict(zip(self.chars, range(len(self.chars))))
+                additional_chars.add(char)
+        additional_chars = list(additional_chars)
+        additional_chars.sort()
+        n_prev = len(self.chars)
+        n_new = n_prev + len(additional_chars)
+        self.chars += additional_chars
+        self.additional_chars.update(additional_chars)
+        self.vocab.update(dict(zip(additional_chars, range(n_prev, n_new))))
         self.reversed_vocab = {v: k for k, v in self.vocab.items()}
+        self.vocab_size = len(self.chars)
 
     def __len__(self):
         return len(self.chars)
 
     def __str__(self):
-        return "Vocabulary containing {} tokens: {}".format(len(self), self.chars)
+        return f"Vocabulary(len={len(self)}, tokens={self.vocab})"
+    
+    def __repr__(self):
+        return f"Vocabulary(len={len(self)}, tokens={self.vocab})"
+    
+    def __getitem__(self, key):
+        if isinstance(key, int):
+            return self.reversed_vocab[key]
+        if isinstance(key, str):
+            return self.vocab[key]
 
     @classmethod
     def create_from_strings(