Merge pull request #223 from macrocosm-os/staging

mccrindlebrian · web-flow · commit 0290dfd7a2a9 · 2024-09-28T15:42:44.000-04:00
Staging
diff --git a/folding/__init__.py b/folding/__init__.py
@@ -1,7 +1,7 @@
 from .protocol import JobSubmissionSynapse
 from .validators.protein import Protein
 
-__version__ = "1.0.1"
+__version__ = "1.0.2"
 version_split = __version__.split(".")
 __spec_version__ = (
     (10000 * int(version_split[0]))
diff --git a/folding/miners/folding_miner.py b/folding/miners/folding_miner.py
@@ -395,17 +395,19 @@ def submit_simulation(
         with open(os.path.join(output_dir, f"{pdb_id}.pdb"), "w") as f:
             f.write(synapse.pdb_contents)
 
-        system_config = SimulationConfig(**synapse.system_config)
+        system_config = copy.deepcopy(synapse.system_config)
+        if system_config["seed"] is None:
+            system_config["seed"] = self.generate_random_seed()
+
+        system_config = SimulationConfig(**system_config)
         write_pkl(system_config, system_config_filepath)
 
         # Create the job and submit it to the executor
         simulation_manager = SimulationManager(
             pdb_id=pdb_id,
             output_dir=output_dir,
             system_config=system_config.to_dict(),
-            seed=self.generate_random_seed()
-            if system_config.seed is None
-            else system_config.seed,
+            seed=system_config.seed,
         )
 
         future = self.executor.submit(
diff --git a/folding/store.py b/folding/store.py
@@ -153,8 +153,8 @@ class Job:
     gro_hash: str = None
     update_interval: pd.Timedelta = pd.Timedelta(minutes=10)
     updated_count: int = 0
+    min_updates: int = 5
     max_time_no_improvement: pd.Timedelta = pd.Timedelta(minutes=25)
-    min_updates: int = 10
     epsilon: float = 0.05  # percentage.
     event: dict = None
 
@@ -185,14 +185,21 @@ def update(
             raise ValueError(f"Hotkey {hotkey!r} is not a valid choice")
 
         percent_improvement = (
-            (self.best_loss - loss) / self.best_loss
+            (loss - self.best_loss) / self.best_loss
             if not np.isinf(self.best_loss) and not self.best_loss == 0
-            else 1
+            else np.nan
         )
         self.updated_at = pd.Timestamp.now().floor("s")
         self.updated_count += 1
 
-        if (np.isinf(self.best_loss)) or percent_improvement >= self.epsilon:
+        never_updated_better_loss = (
+            np.isnan(percent_improvement) and loss < self.best_loss
+        )  # only happens if best_loss is 0 or inf
+        better_loss = (
+            percent_improvement >= self.epsilon
+        )  # only happens if best_loss is not 0 or inf
+
+        if never_updated_better_loss or better_loss:
             self.best_loss = loss
             self.best_loss_at = pd.Timestamp.now().floor("s")
             self.best_hotkey = hotkey
diff --git a/folding/utils/config.py b/folding/utils/config.py
@@ -194,7 +194,7 @@ def add_args(cls, parser):
         "--neuron.events_retention_size",
         type=str,
         help="Events retention size.",
-        default="2 GB",
+        default="25 MB",
     )
 
     parser.add_argument(
diff --git a/folding/utils/logging.py b/folding/utils/logging.py
@@ -4,6 +4,7 @@
 from loguru import logger
 from dataclasses import asdict, dataclass
 import datetime as dt
+import os
 
 import folding
 import bittensor as bt
@@ -113,6 +114,7 @@ def log_event(self, event, failed=False, pdb_location: str = None):
 
     # Log the event to wandb.
     run.log(event)
+    wandb.save(os.path.join(self.config.neuron.full_path, f"events.log"))
 
     if pdb_location is not None:
         log_protein(run, pdb_id_path=pdb_location)
diff --git a/folding/validators/forward.py b/folding/validators/forward.py
@@ -68,11 +68,14 @@ def run_step(
     # Get the list of uids to query for this step.
     axons = [self.metagraph.axons[uid] for uid in uids]
 
+    system_config = protein.system_config.to_dict()
+    system_config["seed"] = None  # We don't want to pass the seed to miners.
+
     synapse = JobSubmissionSynapse(
         pdb_id=protein.pdb_id,
         md_inputs=protein.md_inputs,
         pdb_contents=protein.pdb_contents,
-        system_config=protein.system_config.to_dict(),
+        system_config=system_config,
     )
 
     # Make calls to the network with the prompt - this is synchronous.
diff --git a/folding/validators/protein.py b/folding/validators/protein.py
@@ -63,7 +63,7 @@ def __init__(
         self.water: str = water
 
         self.system_config = SimulationConfig(
-            ff=self.ff, water=self.water, box=self.box, seed=25
+            ff=self.ff, water=self.water, box=self.box, seed=1337
         )
 
         self.config = config
diff --git a/folding/validators/reward.py b/folding/validators/reward.py
@@ -1,3 +1,4 @@
+import time
 from typing import List
 
 import bittensor as bt
@@ -10,45 +11,57 @@
 def get_energies(
     protein: Protein, responses: List[JobSubmissionSynapse], uids: List[int]
 ):
-    """Takes all the data from reponse synapses, applies the reward pipeline, and aggregates the rewards
-    into a single torch.FloatTensor. Also aggregates the RMSDs for logging.
+    """Takes all the data from reponse synapses, checks if the data is valid, and returns the energies.
+
+    Args:
+        protein (Protein): instance of the Protein class
+        responses (List[JobSubmissionSynapse]): list of JobSubmissionSynapse objects
+        uids (List[int]): list of uids
 
     Returns:
-        tuple:
-            torch.FloatTensor: A tensor of rewards for each miner.
-            torch.FloatTensor: A tensor of RMSDs for each miner.
+        Tuple: Tuple containing the energies and the event dictionary
     """
     event = {}
     event["is_valid"] = [False] * len(uids)
     event["checked_energy"] = [0] * len(uids)
     event["reported_energy"] = [0] * len(uids)
     event["miner_energy"] = [0] * len(uids)
     event["rmsds"] = [0] * len(uids)
+    event["process_md_output_time"] = [0] * len(uids)
+    event["is_run_valid"] = [0] * len(uids)
+
     energies = np.zeros(len(uids))
+
     for i, (uid, resp) in enumerate(zip(uids, responses)):
         # Ensures that the md_outputs from the miners are parsed correctly
         try:
-            if not protein.process_md_output(
+            start_time = time.time()
+            can_process = protein.process_md_output(
                 md_output=resp.md_output,
                 hotkey=resp.axon.hotkey,
                 state=resp.miner_state,
                 seed=resp.miner_seed,
-            ):
+            )
+            event["process_md_output_time"][i] = time.time() - start_time
+
+            if not can_process:
                 continue
 
             if resp.dendrite.status_code != 200:
                 bt.logging.info(
                     f"uid {uid} responded with status code {resp.dendrite.status_code}"
                 )
                 continue
+
             energy = protein.get_energy()
-            # rmsd = protein.get_rmsd().iloc[-1]["rmsd"]
             rmsd = protein.get_rmsd()
 
             if energy == 0:
                 continue
 
+            start_time = time.time()
             is_valid, checked_energy, miner_energy = protein.is_run_valid()
+            event["is_run_valid"][i] = time.time() - start_time
 
             energies[i] = energy if is_valid else 0
 
diff --git a/neurons/validator.py b/neurons/validator.py
@@ -304,7 +304,6 @@ def prepare_event_for_logging(event: Dict):
             rewards.numpy()
         )  # add the rewards to the logging event.
 
-        bt.logging.success(f"Event information: {merged_events}")
         event = prepare_event_for_logging(merged_events)
 
         # If the job is finished, remove the pdb directory
@@ -319,6 +318,11 @@ def prepare_event_for_logging(event: Dict):
         else:
             bt.logging.error(f"Protein.from_job returns NONE for protein {job.pdb}")
 
+        # Remove these keys from the log because they polute the terminal.
+        merged_events.pop("checked_energy")
+        merged_events.pop("miner_energy")
+
+        bt.logging.success(f"Event information: {merged_events}")
         log_event(self, event=event, pdb_location=pdb_location)
 
 

Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,7 @@ def add_args(cls, parser):`
`194`	`194`	`"--neuron.events_retention_size",`
`195`	`195`	`type=str,`
`196`	`196`	`help="Events retention size.",`
`197`		`- default="2 GB",`
	`197`	`+ default="25 MB",`
`198`	`198`	`)`
`199`	`199`
`200`	`200`	`parser.add_argument(`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ def __init__(`
`63`	`63`	`self.water: str = water`
`64`	`64`
`65`	`65`	`self.system_config = SimulationConfig(`
`66`		`- ff=self.ff, water=self.water, box=self.box, seed=25`
	`66`	`+ ff=self.ff, water=self.water, box=self.box, seed=1337`
`67`	`67`	`)`
`68`	`68`
`69`	`69`	`self.config = config`