Merge pull request #168 from macrocosm-os/staging

mccrindlebrian · web-flow · commit 104c47bace18 · 2024-07-31T14:00:56.000-05:00
Staging
diff --git a/demo.ipynb b/demo.ipynb
@@ -46,7 +46,7 @@
     "from neurons.validator import Validator\n",
     "from folding.store import Job\n",
     "from folding.validators.protein import Protein\n",
-    "from folding.protocol import FoldingSynapse\n",
+    "from folding.protocol import JobSubmissionSynapse\n",
     "from folding.validators.reward import get_energies\n",
     "from folding.utils.ops import get_response_info\n",
     "from folding.rewards.reward_pipeline import reward_pipeline\n",
@@ -769,7 +769,7 @@
    "source": [
     "\n",
     "# Create a synapse to query the network\n",
-    "synapse = FoldingSynapse(\n",
+    "synapse = JobSubmissionSynapse(\n",
     "    pdb_id=protein.pdb_id, md_inputs=protein.md_inputs, mdrun_args=\"\"\n",
     ")\n",
     "\n",
diff --git a/folding/__init__.py b/folding/__init__.py
@@ -1,7 +1,7 @@
-from .protocol import FoldingSynapse
+from .protocol import JobSubmissionSynapse
 from .validators.protein import Protein
 
-__version__ = "0.2.0"
+__version__ = "0.3.0"
 version_split = __version__.split(".")
 __spec_version__ = (
     (10000 * int(version_split[0]))
diff --git a/folding/base/miner.py b/folding/base/miner.py
@@ -21,10 +21,10 @@
 import argparse
 import traceback
 
-from typing import Tuple
 import bittensor as bt
 
 from folding.base.neuron import BaseNeuron
+from folding.protocol import PingSynapse
 from folding.utils.config import add_miner_args
 
 
@@ -60,6 +60,8 @@ def __init__(self, config=None):
             forward_fn=self.forward,
             blacklist_fn=self.blacklist,
             priority_fn=self.priority,
+        ).attach(
+            forward_fn=self.ping_forward,  # not sure if we need blacklist on this.
         )
         bt.logging.info(f"Axon created: {self.axon}")
 
@@ -69,6 +71,23 @@ def __init__(self, config=None):
         self.thread: threading.Thread = None
         self.lock = asyncio.Lock()
 
+    def ping_forward(self, synapse: PingSynapse):
+        """Respond to the validator with the necessary information about serving
+
+        Args:
+            self (PingSynapse): must attach "can_serve" and "available_compute"
+        """
+
+        bt.logging.info(f"Received ping request from {synapse.dendrite.hotkey[:8]}")
+
+        synapse.available_compute = self.max_workers - len(self.simulations)
+
+        # TODO: add more conditions.
+        if synapse.available_compute > 0:
+            synapse.can_serve = True
+            bt.logging.success("Telling validator you can serve ✅")
+        return synapse
+
     def run(self):
         """
         Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.
diff --git a/folding/miners/folding_miner.py b/folding/miners/folding_miner.py
@@ -9,7 +9,7 @@
 
 # import base miner class which takes care of most of the boilerplate
 from folding.base.miner import BaseMinerNeuron
-from folding.protocol import FoldingSynapse
+from folding.protocol import JobSubmissionSynapse
 from folding.utils.logging import log_event
 from folding.utils.ops import (
     run_cmd_commands,
@@ -23,7 +23,9 @@
 BASE_DATA_PATH = os.path.join(ROOT_DIR, "miner-data")
 
 
-def attach_files(files_to_attach: List, synapse: FoldingSynapse) -> FoldingSynapse:
+def attach_files(
+    files_to_attach: List, synapse: JobSubmissionSynapse
+) -> JobSubmissionSynapse:
     """function that parses a list of files and attaches them to the synapse object"""
     bt.logging.info(f"Sending files to validator: {files_to_attach}")
     for filename in files_to_attach:
@@ -45,14 +47,14 @@ def attach_files(files_to_attach: List, synapse: FoldingSynapse) -> FoldingSynap
 
 
 def attach_files_to_synapse(
-    synapse: FoldingSynapse,
+    synapse: JobSubmissionSynapse,
     data_directory: str,
     state: str,
-) -> FoldingSynapse:
+) -> JobSubmissionSynapse:
     """load the output files as bytes and add to synapse.md_output
 
     Args:
-        synapse (FoldingSynapse): Recently received synapse object
+        synapse (JobSubmissionSynapse): Recently received synapse object
         data_directory (str): directory where the miner is holding the necessary data for the validator.
         state (str): the current state of the simulation
 
@@ -72,7 +74,7 @@ def attach_files_to_synapse(
 
 
     Returns:
-        FoldingSynapse: synapse with md_output attached
+        JobSubmissionSynapse: synapse with md_output attached
     """
 
     synapse.md_output = {}  # ensure that the initial state is empty
@@ -110,8 +112,8 @@ def attach_files_to_synapse(
 
 
 def check_synapse(
-    self, synapse: FoldingSynapse, output_dir: str, event: Dict = None
-) -> FoldingSynapse:
+    self, synapse: JobSubmissionSynapse, output_dir: str, event: Dict = None
+) -> JobSubmissionSynapse:
     """Utility function to remove md_inputs if they exist"""
     if len(synapse.md_inputs) > 0:
         event["md_inputs_sizes"] = list(map(len, synapse.md_inputs.values()))
@@ -240,7 +242,7 @@ def check_and_remove_simulations(self, event: Dict) -> Dict:
 
         return event
 
-    def forward(self, synapse: FoldingSynapse) -> FoldingSynapse:
+    def forward(self, synapse: JobSubmissionSynapse) -> JobSubmissionSynapse:
         """
         The main async function that is called by the dendrite to run the simulation.
         There are a set of default behaviours the miner should carry out based on the form the synapse comes in as:
@@ -252,7 +254,7 @@ def forward(self, synapse: FoldingSynapse) -> FoldingSynapse:
                 - If the number of active processes is less than the number of CPUs and the pdb_id is unique, start a new process
 
         Returns:
-            FoldingSynapse: synapse with md_output attached
+            JobSubmissionSynapse: synapse with md_output attached
         """
         # If we are already running a process with the same identifier, return intermediate information
         bt.logging.debug(f"⌛ Query from validator for protein: {synapse.pdb_id} ⌛")
@@ -330,9 +332,7 @@ def forward(self, synapse: FoldingSynapse) -> FoldingSynapse:
                     f"❗ Cannot start new process: job limit reached. ({len(self.simulations)}/{self.max_workers}).❗"
                 )
 
-                bt.logging.warning(
-                    f"❗ Removing miner from job pool ❗"
-                )
+                bt.logging.warning(f"❗ Removing miner from job pool ❗")
 
                 event["condition"] = "cpu_limit_reached"
                 synapse.miner_serving = False
@@ -378,7 +378,7 @@ def forward(self, synapse: FoldingSynapse) -> FoldingSynapse:
             self=self, synapse=synapse, event=event, output_dir=output_dir
         )
 
-    async def blacklist(self, synapse: FoldingSynapse) -> Tuple[bool, str]:
+    async def blacklist(self, synapse: JobSubmissionSynapse) -> Tuple[bool, str]:
         if (
             not self.config.blacklist.allow_non_registered
             and synapse.dendrite.hotkey not in self.metagraph.hotkeys
@@ -406,7 +406,7 @@ async def blacklist(self, synapse: FoldingSynapse) -> Tuple[bool, str]:
         )
         return False, "Hotkey recognized!"
 
-    async def priority(self, synapse: FoldingSynapse) -> float:
+    async def priority(self, synapse: JobSubmissionSynapse) -> float:
         caller_uid = self.metagraph.hotkeys.index(
             synapse.dendrite.hotkey
         )  # Get the caller index.
diff --git a/folding/protocol.py b/folding/protocol.py
@@ -21,7 +21,13 @@
 import bittensor as bt
 
 
-class FoldingSynapse(bt.Synapse):
+class PingSynapse(bt.Synapse):
+    """ Responsible for determining if a miner can accept a request """
+    can_serve: bool = False
+    available_compute: typing.Optional[int] = None #TODO: number of threads / gpus? 
+    
+
+class JobSubmissionSynapse(bt.Synapse):
     """
     A protocol representation which uses bt.Synapse as its base.
     This protocol helps in handling request and response communication between
@@ -59,7 +65,7 @@ def deserialize(self) -> int:
             f"Deserializing response from miner, I am: {self.pdb_id}, hotkey: {self.axon.hotkey[:8]}"
         )
         # Right here we perform validation that the reponse has expected hash
-        if type(self.md_output) != dict:
+        if not isinstance(self.md_output, dict):
             self.md_output = {}
         else:
             md_output = {}
diff --git a/folding/utils/config.py b/folding/utils/config.py
@@ -70,7 +70,7 @@ def add_args(cls, parser):
         help="Device to run on.",
         default="cpu",
     )
-    
+
     parser.add_argument(
         "--neuron.metagraph_resync_length",
         type=int,
@@ -302,6 +302,12 @@ def add_validator_args(cls, parser):
         help="The timeout for each forward call. (seconds)",
         default=45,
     )
+    parser.add_argument(
+        "--neuron.ping_timeout",
+        type=float,
+        help="Controls the timeout for the PingSynapse call",
+        default=3,
+    )
 
     parser.add_argument(
         "--neuron.update_interval",
diff --git a/folding/utils/ops.py b/folding/utils/ops.py
@@ -1,19 +1,19 @@
+import hashlib
 import os
+import pickle as pkl
+import random
 import re
-import sys
-import tqdm
 import shutil
-import random
-import hashlib
 import subprocess
+import sys
 import traceback
-import pickle as pkl
+from typing import Dict, List
 
-from typing import List, Dict
+import bittensor as bt
 import requests
+import tqdm
 
-import bittensor as bt
-from folding.protocol import FoldingSynapse
+from folding.protocol import JobSubmissionSynapse
 
 # Recommended force field-water pairs, retrieved from gromacs-2024.1/share/top
 FF_WATER_PAIRS = {
@@ -244,7 +244,7 @@ def is_pdb_complete(pdb_text: str) -> bool:
     return True
 
 
-def get_response_info(responses: List[FoldingSynapse]) -> Dict:
+def get_response_info(responses: List[JobSubmissionSynapse]) -> Dict:
     """Gather all desired response information from the set of miners."""
 
     response_times = []
diff --git a/folding/validators/forward.py b/folding/validators/forward.py
@@ -3,11 +3,12 @@
 import bittensor as bt
 from pathlib import Path
 from typing import List, Dict
+from collections import defaultdict
 
 from folding.validators.protein import Protein
 from folding.utils.logging import log_event
 from folding.validators.reward import get_energies
-from folding.protocol import FoldingSynapse
+from folding.protocol import PingSynapse, JobSubmissionSynapse
 
 from folding.utils.ops import select_random_pdb_id, load_pdb_ids, get_response_info
 from folding.validators.hyperparameters import HyperParameters
@@ -17,6 +18,31 @@
     root_dir=ROOT_DIR, filename="pdb_ids.pkl"
 )  # TODO: Currently this is a small list of PDBs without MISSING flags.
 
+def run_ping_step(
+        self, 
+        uids: List[int], 
+        timeout: float
+) -> Dict:
+    """ Report a dictionary of ping information from all miners that were
+    randomly sampled for this batch.
+    """
+    axons = [self.metagraph.axons[uid] for uid in uids]
+    synapse = PingSynapse()
+
+    bt.logging.info(f"Pinging {len(axons)} uids")
+    responses: List[PingSynapse] = self.dendrite.query(
+        axons=axons,
+        synapse=synapse,
+        timeout=timeout,
+    )
+
+    ping_report = defaultdict(list)
+    for resp in responses:
+        ping_report['miner_status'].append(resp.can_serve)
+        ping_report['reported_compute'].append(resp.available_compute)
+
+    return ping_report
+
 
 def run_step(
     self,
@@ -29,13 +55,13 @@ def run_step(
 
     # Get the list of uids to query for this step.
     axons = [self.metagraph.axons[uid] for uid in uids]
-    synapse = FoldingSynapse(
+    synapse = JobSubmissionSynapse(
         pdb_id=protein.pdb_id, md_inputs=protein.md_inputs, mdrun_args=mdrun_args
     )
 
     # Make calls to the network with the prompt - this is synchronous.
     bt.logging.warning("waiting for responses....")
-    responses: List[FoldingSynapse] = self.dendrite.query(
+    responses: List[JobSubmissionSynapse] = self.dendrite.query(
         axons=axons,
         synapse=synapse,
         timeout=timeout,
diff --git a/folding/validators/reward.py b/folding/validators/reward.py
@@ -1,18 +1,15 @@
-import os
-import pandas as pd
-import numpy as np
+from typing import List
+
 import bittensor as bt
-from typing import List, Dict
+import numpy as np
 
+from folding.protocol import JobSubmissionSynapse
 from folding.validators.protein import Protein
-from folding.utils.data import DataExtractor
-from folding.protocol import FoldingSynapse
-from folding.rewards.reward import RewardEvent
-from folding.rewards.energy import EnergyRewardModel
-from folding.rewards.rmsd import RMSDRewardModel
 
 
-def get_energies(protein: Protein, responses: List[FoldingSynapse], uids: List[int]):
+def get_energies(
+    protein: Protein, responses: List[JobSubmissionSynapse], uids: List[int]
+):
     """Takes all the data from reponse synapses, applies the reward pipeline, and aggregates the rewards
     into a single torch.FloatTensor. Also aggregates the RMSDs for logging.
 
diff --git a/min_compute.yml b/min_compute.yml
@@ -7,7 +7,7 @@
 
 # NOTE: Specification for miners may be different from validators
 
-version: '1.0' # update this version key as needed, ideally should match your release version
+version: '0.3.0' # update this version key as needed, ideally should match your release version
 
 compute_spec:
 
@@ -20,6 +20,12 @@ compute_spec:
       recommended_speed: 3.5  # Recommended speed per core (GHz)
       architecture: "x86_64"  # Architecture type (e.g., x86_64, arm64)
 
+    gpu:
+      required: True                       # Does the application require a GPU?
+      min_vram: 24                         # Minimum GPU VRAM (GB)
+      min_compute_capability: 6.0          # Minimum CUDA compute capability
+      recommended_compute_capability: 7.0  # Recommended CUDA compute capability
+
     memory:
       min_ram: 16          # Minimum RAM (GB)
       min_swap: 4          # Minimum swap space (GB)
diff --git a/neurons/validator.py b/neurons/validator.py
diff --git a/scripts/run_validator.sh b/scripts/run_validator.sh
diff --git a/tests/test_miner.py b/tests/test_miner.py