macrocosm-os
diff --git a/‎folding/__init__.py
Lines changed: 1 addition & 1 deletion b/‎folding/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎folding/registries/evaluation_registry.py
Lines changed: 72 additions & 23 deletions b/‎folding/registries/evaluation_registry.py
Lines changed: 72 additions & 23 deletions
diff --git a/‎folding/utils/constants.py
Lines changed: 3 additions & 0 deletions b/‎folding/utils/constants.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎folding/utils/ops.py
Lines changed: 21 additions & 0 deletions b/‎folding/utils/ops.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎folding_api/main.py
Lines changed: 2 additions & 1 deletion b/‎folding_api/main.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎folding_api/schemas.py
Lines changed: 78 additions & 0 deletions b/‎folding_api/schemas.py
Lines changed: 78 additions & 0 deletions
@@ -1,4 +1,4 @@
-__version__ = "2.4.1"
+__version__ = "2.4.2"
 version_split = __version__.split(".")
 __spec_version__ = (
     (10000 * int(version_split[0]))
 
@@ -1,12 +1,12 @@
 import os
-from typing import Any, Dict, List, Union
-import traceback
+import random
+from typing import Any, Dict
+
+import bittensor as bt
 
 import numpy as np
 import pandas as pd
-from openmm import app
-import bittensor as bt
-import plotly.graph_objects as go
+from openmm import app, unit
 
 from folding.base.evaluation import BaseEvaluator
 from folding.base.simulation import OpenMMSimulation
@@ -20,6 +20,7 @@
     save_files,
     save_pdb,
     write_pkl,
+    check_uniqueness,
 )
 from folding.utils.opemm_simulation_config import SimulationConfig
 from folding.protocol import IntermediateSubmissionSynapse
@@ -62,6 +63,7 @@ def __init__(
         )
 
         self.intermediate_checkpoint_files = {}
+        self.miner_reported_energies = {}
 
     def process_md_output(self) -> bool:
         """Method to process molecular dynamics data from a miner and recreate the simulation.
@@ -282,6 +284,21 @@ def compare_state_to_cpt(
             return False
         return True
 
+    def select_stratified_checkpoints(
+        self, num_checkpoints: int, num_samples: int
+    ) -> list[int]:
+        """Selects num_samples checkpoints from num_checkpoints at evenly spaced intervals."""
+
+        # Create N evenly spaced bin edges, excluding the last edge (final checkpoint)
+        edges = np.linspace(0, num_checkpoints, num_samples + 1, dtype=int)[:-1]
+
+        # Sample one checkpoint randomly from each bin
+        selected = [
+            random.randint(start, max(start, end - 1))
+            for start, end in zip(edges[:-1], edges[1:])
+        ]
+        return selected
+
     async def is_run_valid(self, validator=None, job_id=None, axon=None):
         """
         Checks if the run is valid by evaluating a set of logical conditions:
@@ -306,7 +323,7 @@ async def is_run_valid(self, validator=None, job_id=None, axon=None):
             miner_energies_dict = {}
 
             logger.info(f"Checking if run is valid for {self.hotkey_alias}...")
-            logger.info(f"Checking final checkpoint...")
+            logger.info("Checking final checkpoint...")
             # Check the final checkpoint
             (
                 is_valid,
@@ -326,11 +343,10 @@ async def is_run_valid(self, validator=None, job_id=None, axon=None):
 
             # Check the intermediate checkpoints
             if validator is not None and job_id is not None and axon is not None:
-                checkpoint_numbers = np.random.choice(
-                    range(self.number_of_checkpoints),
-                    size=c.MAX_CHECKPOINTS_TO_VALIDATE,
-                    replace=False,
-                ).tolist()
+                checkpoint_numbers = self.select_stratified_checkpoints(
+                    num_checkpoints=self.number_of_checkpoints,
+                    num_samples=c.MAX_CHECKPOINTS_TO_VALIDATE + 1,  # +1 for Final
+                )
 
                 # Get intermediate checkpoints from the miner
                 intermediate_checkpoints = await self.get_intermediate_checkpoints(
@@ -385,11 +401,33 @@ async def is_run_valid(self, validator=None, job_id=None, axon=None):
                     if not is_valid:
                         return False, checked_energies_dict, miner_energies_dict, result
 
+                # Check if the miner's checkpoint is similar to the validator's checkpoint.
+                miner_reported_energies = []
+                checkpoint_length = len(
+                    self.miner_reported_energies[str(checkpoint_numbers[0])]
+                )
+                for _, energy in self.miner_reported_energies.items():
+                    miner_reported_energies.append(
+                        energy[:checkpoint_length]
+                    )  # final cpt is larger in length.
+
+                if not check_uniqueness(
+                    vectors=miner_reported_energies,
+                    tol=c.MINER_CHECKPOINT_SIMILARITY_TOLERANCE,
+                ):
+                    logger.warning("Miner checkpoints not unique")
+                    return (
+                        False,
+                        checked_energies_dict,
+                        miner_energies_dict,
+                        "miner-checkpoint-similarity",
+                    )
+
                 return True, checked_energies_dict, miner_energies_dict, "valid"
 
-        except ValidationError as E:
-            logger.warning(f"{E}")
-            return False, {}, {}, E.message
+        except ValidationError as e:
+            logger.warning(f"{e}")
+            return False, {}, {}, e.message
 
         return True, checked_energies_dict, miner_energies_dict, "valid"
 
@@ -429,6 +467,7 @@ async def validate(self, validator=None, job_id=None, axon=None):
 
         # Use the final checkpoint's energy for the score
         if "final" in checked_energies_dict and checked_energies_dict["final"]:
+            logger.success(f"Hotkey {self.hotkey_alias} passed validation!")
             final_energies = checked_energies_dict["final"]
             # Take the median of the last ENERGY_WINDOW_SIZE values
             median_energy = np.median(final_energies[-c.ENERGY_WINDOW_SIZE :])
@@ -481,6 +520,17 @@ async def get_intermediate_checkpoints(
     def name(self) -> str:
         return "SyntheticMD"
 
+    def get_miner_log_file_energies(
+        self, start_index: int, end_index: int
+    ) -> np.ndarray:
+        """Get the energies from the miner log file for a given range of steps."""
+        miner_energies: np.ndarray = self.log_file[
+            (self.log_file['#"Step"'] > start_index)
+            & (self.log_file['#"Step"'] <= end_index)
+        ]["Potential Energy (kJ/mole)"].values
+
+        return miner_energies
+
     def is_checkpoint_valid(
         self,
         checkpoint_path: str,
@@ -544,7 +594,6 @@ def is_checkpoint_valid(
 
         try:
             if not self.check_gradient(check_energies=np.array(state_energies)):
-                logger.warning(f"state energies: {state_energies}")
                 logger.warning(
                     f"hotkey {self.hotkey_alias} failed state-gradient check for {self.pdb_id}, checkpoint_num: {checkpoint_num}, ... Skipping!"
                 )
@@ -589,10 +638,11 @@ def is_checkpoint_valid(
 
             max_step = current_cpt_step + steps_to_run
 
-            miner_energies: np.ndarray = self.log_file[
-                (self.log_file['#"Step"'] > current_cpt_step)
-                & (self.log_file['#"Step"'] <= max_step)
-            ]["Potential Energy (kJ/mole)"].values
+            miner_energies: np.ndarray = self.get_miner_log_file_energies(
+                start_index=current_cpt_step, end_index=max_step
+            )
+
+            self.miner_reported_energies[checkpoint_num] = miner_energies
 
             if len(np.unique(check_energies)) == 1:
                 logger.warning(
@@ -601,7 +651,6 @@ def is_checkpoint_valid(
                 raise ValidationError(message="reprod-energies-identical")
 
             if not self.check_gradient(check_energies=np.array(check_energies)):
-                logger.warning(f"check_energies: {check_energies}")
                 logger.warning(
                     f"hotkey {self.hotkey_alias} failed cpt-gradient check for {self.pdb_id}, checkpoint_num: {checkpoint_num}, ... Skipping!"
                 )
@@ -641,9 +690,9 @@ def is_checkpoint_valid(
 
             return True, check_energies.tolist(), miner_energies.tolist(), "valid"
 
-        except ValidationError as E:
-            logger.warning(f"{E}")
-            return False, [], [], E.message
+        except ValidationError as e:
+            logger.warning(f"{e}")
+            return False, [], [], e.message
 
 
 class OrganicMDEvaluator(SyntheticMDEvaluator):
 
@@ -11,6 +11,9 @@
 ENERGY_WINDOW_SIZE = (
     10  # Number of steps to compute median/mean energies when comparing
 )
+MINER_CHECKPOINT_SIMILARITY_TOLERANCE = (
+    0.05  # Tolerance for cpts to be considered similar. NOT in percent.
+)
 
 # MinerRegistry constants
 MAX_JOBS_IN_MEMORY = 1000
 
@@ -443,3 +443,24 @@ def write_pdb_file(
         positions=positions,
         file=open(pdb_location_path, "w"),
     )
+
+
+def are_vectors_too_similar(vec1, vec2, tol=0.01):
+    """Check if two vectors are similar within a tolerance."""
+    if np.array_equal(vec1, vec2):
+        return True  # Identical
+    if np.allclose(vec1, vec2, rtol=tol, atol=0):
+        return True  # Too close within tolerance
+    return False
+
+
+def check_uniqueness(vectors, tol=0.01):
+    """Check if all vectors are unique within a tolerance."""
+    vectors_np = [np.array(v) for v in vectors]
+    n = len(vectors_np)
+
+    for i in range(n):
+        for j in range(i + 1, n):
+            if are_vectors_too_similar(vectors_np[i], vectors_np[j], tol):
+                return False
+    return True
@@ -9,6 +9,7 @@
 
 from folding_api.chain import SubtensorService
 from folding_api.protein import router
+from folding_api.utility_endpoints import router as utility_router
 from folding_api.validator_registry import ValidatorRegistry
 from folding_api.auth import APIKeyManager, get_api_key, api_key_router
 from folding_api.vars import (
@@ -79,6 +80,6 @@ async def lifespan(app: FastAPI):
 # Include routes
 app.include_router(router, dependencies=[Depends(get_api_key)])
 app.include_router(api_key_router)  # API key management routes
-
+app.include_router(utility_router)  # Utility endpoints
 if __name__ == "__main__":
     uvicorn.run("main:app", host="0.0.0.0", port=8029)
@@ -176,3 +176,81 @@ class APIKeyCreate(BaseModel):
 
 class APIKeyResponse(APIKeyBase):
     key: str
+
+
+class PDB(BaseModel):
+    pdb_id: str
+    source: str
+
+
+class PDBSearchResponse(BaseModel):
+    """
+    Represents a response from a PDB search.
+    """
+
+    matches: List[PDB] = Field(..., description="List of matching PDB IDs")
+    total: int = Field(..., description="Total number of matches found")
+
+
+class PDBInfoResponse(BaseModel):
+    """
+    Represents detailed information about a PDB structure from RCSB.
+    """
+
+    pdb_id: str = Field(..., description="PDB ID")
+    molecule_name: Optional[str] = Field(
+        None, description="Name of the molecule/protein"
+    )
+    classification: Optional[str] = Field(None, description="Structural classification")
+    organism: Optional[str] = Field(None, description="Source organism")
+    expression_system: Optional[str] = Field(None, description="Expression system used")
+
+
+class Job(BaseModel):
+    id: str
+    type: Literal["organic", "synthetic"]
+    job_id: str
+    pdb_id: str
+    created_at: str
+    status: Literal["active", "inactive", "failed"]
+    priority: int
+    validator_hotkey: str
+    best_hotkey: str
+    s3_links: dict[str, str]
+
+
+class JobPoolResponse(BaseModel):
+    """
+    Represents a response from a job pool.
+    """
+
+    jobs: List[Job] = Field(..., description="List of jobs")
+    total: int = Field(..., description="Total number of jobs")
+
+
+class Miner(BaseModel):
+    uid: str
+    hotkey: str
+    energy: dict
+
+
+class JobResponse(BaseModel):
+    pdb_id: str
+    pdb_file_link: str
+    classification: Optional[str] = Field(None, description="Structural classification")
+    expression_system: Optional[str] = Field(None, description="Expression system used")
+    mutations: Optional[bool] = Field(None, description="Mutations in the PDB")
+    source: str
+    temperature: float
+    friction: float
+    pressure: float
+    time_to_live: float
+    ff: str
+    water: str
+    box: str
+    miners: List[Miner]
+    status: Literal["active", "inactive", "failed"] = Field(
+        "inactive", description="Job status"
+    )
+    created_at: str = Field("", description="Job creation timestamp")
+    updated_at: str = Field("", description="Job last update timestamp")
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "2.4.1"`
	`1`	`+__version__ = "2.4.2"`
`2`	`2`	`version_split = __version__.split(".")`
`3`	`3`	`__spec_version__ = (`
`4`	`4`	`(10000 * int(version_split[0]))`
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,9 @@`
`11`	`11`	`ENERGY_WINDOW_SIZE = (`
`12`	`12`	`10 # Number of steps to compute median/mean energies when comparing`
`13`	`13`	`)`
	`14`	`+MINER_CHECKPOINT_SIMILARITY_TOLERANCE = (`
	`15`	`+ 0.05 # Tolerance for cpts to be considered similar. NOT in percent.`
	`16`	`+)`
`14`	`17`
`15`	`18`	`# MinerRegistry constants`
`16`	`19`	`MAX_JOBS_IN_MEMORY = 1000`