mila-iqia
diff --git a/‎experiments/active_learning_si_sw/excise_and_repaint_config.yaml‎
Lines changed: 59 additions & 0 deletions b/‎experiments/active_learning_si_sw/excise_and_repaint_config.yaml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎src/diffusion_for_multi_scale_molecular_dynamics/active_learning_loop/configuration_parsing.py‎
Lines changed: 194 additions & 0 deletions b/‎src/diffusion_for_multi_scale_molecular_dynamics/active_learning_loop/configuration_parsing.py‎
Lines changed: 194 additions & 0 deletions
diff --git a/‎src/diffusion_for_multi_scale_molecular_dynamics/active_learning_loop/sample_maker/excise_and_repaint_sample_maker.py‎
Lines changed: 66 additions & 1 deletion b/‎src/diffusion_for_multi_scale_molecular_dynamics/active_learning_loop/sample_maker/excise_and_repaint_sample_maker.py‎
Lines changed: 66 additions & 1 deletion
@@ -0,0 +1,59 @@
+#================================================================================
+# Configuration file for an active learning run
+#================================================================================
+exp_name: excise_and_repaint_sample_maker
+
+seed: 42
+
+elements: [Si]
+
+uncertainty_thresholds: [0.001, 0.0001, 0.00001, 0.000001]
+
+flare:
+  cutoff: 5.0
+  n_radial: 12
+  lmax: 3
+  initial_sigma: 1000.0
+  initial_sigma_e: 1.0
+  initial_sigma_f: 0.050
+  initial_sigma_s: 1.0
+  variance_type: local
+
+  flare_optimizer:
+    optimize_on_the_fly: False
+    # optimization_method: "nelder-mead"
+    # max_optimization_iterations: 10
+    # optimize_sigma: False
+    # optimize_sigma_e: False
+    # optimize_sigma_f: False
+    # optimize_sigma_s: False
+
+oracle:
+  name: stillinger_weber
+  sw_coeff_filename: Si.sw
+
+sampling:
+  algorithm: excise_and_repaint
+  sample_box_strategy: fixed
+  sample_box_size: [ 10.86, 10.86, 10.86 ]
+  sample_edit_radius: 5.0 # in Angstrom: generated atoms within this radius from the central atom will be removed.
+  excision:
+    algorithm: spherical_cutoff
+    radial_cutoff: 5.0  # radial cutoff in Angstrom
+  noise:
+    total_time_steps: 500
+    sigma_min: 0.0001
+    sigma_max: 0.2
+    schedule_type: linear
+    corrector_step_epsilon: 2.5e-8
+  repaint_generator:
+    number_of_atoms: 64
+    number_of_corrector_steps: 2
+    one_atom_type_transition_per_step: False
+    atom_type_greedy_sampling: False
+    atom_type_transition_in_corrector: False
+    record_samples: False
+
+lammps:
+  mpi_processors: 4
+  openmp_threads: 2
@@ -0,0 +1,194 @@
+from typing import Any, AnyStr, Dict, List, Optional, Tuple, Union
+
+import torch
+
+from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.atom_selector.atom_selector_factory import \
+    create_atom_selector_parameters
+from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.excisor.excisor_factory import \
+    create_excisor_parameters
+from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.sample_maker.base_sample_maker import \
+    BaseSampleMaker
+from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.sample_maker.sample_maker_factory import (
+    create_sample_maker, create_sample_maker_parameters)
+from diffusion_for_multi_scale_molecular_dynamics.generators.predictor_corrector_axl_generator import \
+    PredictorCorrectorSamplingParameters
+from diffusion_for_multi_scale_molecular_dynamics.models.score_networks import \
+    ScoreNetwork
+from diffusion_for_multi_scale_molecular_dynamics.noise_schedulers.noise_parameters import \
+    NoiseParameters
+from diffusion_for_multi_scale_molecular_dynamics.sample_diffusion import \
+    get_axl_network
+
+
+def get_repaint_parameters(
+    sampling_dictionary: Dict[AnyStr, Any],
+    element_list: List[str],
+    path_to_score_network_checkpoint: Optional[str] = None,
+) -> Tuple[
+    Union[NoiseParameters, None],
+    Union[PredictorCorrectorSamplingParameters, None],
+    Union[ScoreNetwork, None],
+    str,
+]:
+    """Get repaint parameters.
+
+    This convenience method is responsible for extracting the relevant configuration objects in the
+    case that the sample maker algorithm is "Excise and Repaint", and to return a "None" default for
+    these configuration objects if a different algorithm is used.
+
+    Args:
+        sampling_dictionary: Dictionary of sampling parameters, as read in from a yaml configuration file.
+        element_list: List of element names.
+        path_to_score_network_checkpoint: Path to score network checkpoint.
+
+    Returns:
+        noise_parameters: a NoiseParameters object if the config is present, otherwise None.
+        sampling_parameters: a PredictorCorrectorSamplingParameters object if the config is present, otherwise None.
+        axl_network: a Score Network object to draw constrained samples if the config is present, otherwise None.
+        device: a string indicating which device should be used: either cpu or cuda.
+    """
+    algorithm = sampling_dictionary["algorithm"]
+    # Default values
+    device = "cpu"
+    axl_network = None
+    noise_parameters = None
+    sampling_parameters = None
+    if algorithm != "excise_and_repaint":
+        return noise_parameters, sampling_parameters, axl_network, device
+
+    if torch.cuda.is_available():
+        device = "cuda"
+    assert (
+        path_to_score_network_checkpoint is not None
+    ), "A path to a valid score network checkpoint must be provided to use 'excise_and_repaint'."
+    axl_network = get_axl_network(path_to_score_network_checkpoint)
+
+    assert (
+        "noise" in sampling_dictionary
+    ), "A 'noise' configuration must be defined in the 'sampling' field in order to use 'excise_and_repaint'."
+
+    noise_dictionary = sampling_dictionary["noise"]
+    noise_parameters = NoiseParameters(**noise_dictionary)
+
+    assert "repaint_generator" in sampling_dictionary, (
+        "A 'repaint_generator' configuration must be defined in the 'sampling' field in order to use "
+        "'excise_and_repaint'."
+    )
+
+    sampling_generator_dictionary = sampling_dictionary["repaint_generator"]
+
+    assert "algorithm" not in sampling_generator_dictionary, (
+        "Do not specify the 'algorithm' for the repaint generator: only the predictor_corrector repaint generator "
+        "algorithm is valid and will be automatically selected."
+    )
+    sampling_generator_dictionary["algorithm"] = "predictor_corrector"
+
+    assert "num_atom_types" not in sampling_generator_dictionary, (
+        "Do not specify the 'num_atom_types' for the repaint generator: the value will be inferred from "
+        "the element list."
+    )
+    sampling_generator_dictionary["num_atom_types"] = len(element_list)
+
+    assert "number_of_samples" not in sampling_generator_dictionary, (
+        "Do not specify the 'number_of_samples' for the repaint generator: the value will be inferred from "
+        "the 'number_of_samples_per_substructure' sampling field."
+    )
+    sampling_generator_dictionary["number_of_samples"] = sampling_dictionary.get(
+        "number_of_samples_per_substructure", 1
+    )
+
+    assert (
+        "use_fixed_lattice_parameters" not in sampling_generator_dictionary
+        and "cell_dimensions" not in sampling_generator_dictionary
+    ), (
+        "Do not specify 'use_fixed_lattice_parameters' or 'cell_dimensions' for the repaint generator: these values "
+        "will be inferred from the sampling field."
+    )
+    sampling_generator_dictionary["use_fixed_lattice_parameters"] = (
+        sampling_dictionary.get("sample_box_strategy", "fixed")
+    )
+
+    if sampling_generator_dictionary["use_fixed_lattice_parameters"] == "fixed":
+        sampling_generator_dictionary["cell_dimensions"] = sampling_dictionary[
+            "sample_box_size"
+        ]
+
+    sampling_parameters = PredictorCorrectorSamplingParameters(
+        **sampling_generator_dictionary
+    )
+
+    return noise_parameters, sampling_parameters, axl_network, device
+
+
+def get_sample_maker_from_configuration(
+    sampling_dictionary: Dict,
+    uncertainty_threshold: float,
+    element_list: List[str],
+    path_to_score_network_checkpoint: Optional[str] = None,
+) -> BaseSampleMaker:
+    """Get sample maker from configuration.
+
+    the sampling dictionary should have the following structure:
+
+        sampling:
+            algorithm: ...
+            (other sample maker parameters)
+
+            excision [Only if using Excise and *]:
+                (excision parameters)
+
+            noise [Only if using Excise and Repaint]:
+                (noise parameters)
+
+          repaint_generator [Only if using Excise and Repaint]:
+                (constrained sampling parameters)
+
+    Args:
+        sampling_dictionary: Dictionary of sampling parameters, as read in from a yaml configuration file.
+        uncertainty_threshold: Uncertainty threshold.
+        element_list: List of element names.
+        path_to_score_network_checkpoint: Path to score network checkpoint.
+
+    Returns:
+        sample_maker: A configured Sample Maker instance.
+    """
+    # Let's make sure we don't modify the input, which would lead to undesirable side effects!
+    sampling_dict = sampling_dictionary.copy()
+
+    noise_parameters, sampling_parameters, axl_network, device = get_repaint_parameters(
+        sampling_dictionary=sampling_dict,
+        element_list=element_list,
+        path_to_score_network_checkpoint=path_to_score_network_checkpoint,
+    )
+
+    atom_selector_parameter_dictionary = dict(
+        algorithm="threshold", uncertainty_threshold=uncertainty_threshold
+    )
+    atom_selector_parameters = create_atom_selector_parameters(
+        atom_selector_parameter_dictionary
+    )
+
+    excisor_parameter_dictionary = sampling_dict.pop("excision", None)
+    if excisor_parameter_dictionary is not None:
+        excisor_parameters = create_excisor_parameters(excisor_parameter_dictionary)
+    else:
+        excisor_parameters = None
+
+    # Let's extract only the sample_maker configuration, popping out components that don't belong.
+    sample_maker_dictionary = sampling_dict.copy()
+    sample_maker_dictionary["element_list"] = element_list
+    sample_maker_dictionary.pop("noise", None)
+    sample_maker_dictionary.pop("repaint_generator", None)
+
+    sample_maker_parameters = create_sample_maker_parameters(sample_maker_dictionary)
+
+    sample_maker = create_sample_maker(
+        sample_maker_parameters=sample_maker_parameters,
+        atom_selector_parameters=atom_selector_parameters,
+        excisor_parameters=excisor_parameters,
+        noise_parameters=noise_parameters,
+        sampling_parameters=sampling_parameters,
+        diffusion_model=axl_network,
+        device=device,
+    )
+    return sample_maker
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
+import numpy as np
 import torch
 
 from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.atom_selector.base_atom_selector import \
@@ -9,6 +10,8 @@
     BaseEnvironmentExcision
 from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.sample_maker.base_sample_maker import (
     BaseExciseSampleMaker, BaseExciseSampleMakerArguments)
+from diffusion_for_multi_scale_molecular_dynamics.active_learning_loop.utils import \
+    get_distances_from_reference_point
 from diffusion_for_multi_scale_molecular_dynamics.generators.axl_generator import \
     SamplingParameters
 from diffusion_for_multi_scale_molecular_dynamics.generators.constrained_langevin_generator import (
@@ -28,6 +31,9 @@ class ExciseAndRepaintSampleMakerArguments(BaseExciseSampleMakerArguments):
 
     algorithm: str = "excise_and_repaint"
 
+    # in Angstrom: generated atoms within this radius from the central atom will be removed.
+    sample_edit_radius: Optional[float] = None
+
 
 class ExciseAndRepaintSampleMaker(BaseExciseSampleMaker):
     """Sample maker for the excise and repaint approach.
@@ -67,6 +73,11 @@ def __init__(
              "substructure requested in the sample_maker configuration (ie 'number_of_samples_per_substructure'). "
              "The configuration currently asks for inconsistent things. Review input.")
 
+        self.samples_should_be_edited = False
+        if sample_maker_arguments.sample_edit_radius is not None:
+            self.samples_should_be_edited = True
+            self.sample_edit_radius = sample_maker_arguments.sample_edit_radius
+
         self.sample_noise_parameters = noise_parameters
         self.sampling_parameters = sampling_parameters
         self.diffusion_model = diffusion_model
@@ -143,6 +154,11 @@ def make_samples_from_constrained_substructure(
                 atom at the center of the excised region.
             list_info: list of samples additional information.
         """
+        number_of_constrained_atoms = len(substructure.X)
+        assert active_atom_index < number_of_constrained_atoms, \
+            ("The active atom index is larger than the number of constrained atoms: "
+             "this should be impossible, something is wrong. Review code!")
+
         sampling_constraints = self.create_sampling_constraints(substructure)
         generator = ConstrainedLangevinGenerator(
             noise_parameters=self.sample_noise_parameters,
@@ -160,6 +176,14 @@ def make_samples_from_constrained_substructure(
         new_structures = self.torch_batch_axl_to_list_of_numpy_axl(
             generated_samples["original_axl"]
         )
+        if self.samples_should_be_edited:
+            # Edit the sampled structures in place.
+            new_structures = [self.edit_generated_structure(sampled_structure,
+                                                            active_atom_index,
+                                                            number_of_constrained_atoms,
+                                                            self.sample_edit_radius)
+                              for sampled_structure in new_structures]
+
         # Since the order of the atoms in the constrained substructure are
         # explicitly enforced, the index of the active atom is the same in the
         # constrained substructure and in the sample.
@@ -173,3 +197,44 @@ def make_samples_from_constrained_substructure(
     def filter_made_samples(self, structures: List[AXL]) -> List[AXL]:
         """Return identical structures."""
         return structures
+
+    @staticmethod
+    def edit_generated_structure(sampled_structure: AXL,
+                                 active_atom_index: int,
+                                 number_of_constrained_atoms: int,
+                                 sample_edit_radius: float) -> AXL:
+        """Edit generated structure.
+
+        This method removes generated atoms that are within a sphere of radius "sample_edit_radius" around
+        the active atom. It is assumed that the first "number_of_constrained_atoms" are the constrained atoms;
+        these should not be edited out!
+
+        Args:
+            sampled_structure: generated sampled structure
+            number_of_constrained_atoms: number of atoms that are constrained and should not be removed.
+            active_atom_index: index of the "active atom" in the input sample.
+            sample_edit_radius: radius of exclusion sphere around the active index where
+                generated atoms must be removed.
+
+        Returns:
+            edited_sampled_structure: the edited sampled structure
+        """
+        central_atom_relative_coordinates = sampled_structure.X[active_atom_index]
+        distances_from_central_atom = get_distances_from_reference_point(
+            sampled_structure.X, central_atom_relative_coordinates, sampled_structure.L
+        )
+
+        number_of_atoms = len(sampled_structure.X)
+
+        constrained_atoms_mask = np.zeros(number_of_atoms, dtype=bool)
+        constrained_atoms_mask[:number_of_constrained_atoms] = True
+
+        outside_radius_mask = distances_from_central_atom > sample_edit_radius
+
+        keep_mask = np.logical_or(constrained_atoms_mask, outside_radius_mask)
+
+        edited_structure = AXL(A=sampled_structure.A[keep_mask],
+                               X=sampled_structure.X[keep_mask],
+                               L=sampled_structure.L)
+
+        return edited_structure