macrocosm-os
diff --git a/‎.env.example
Lines changed: 4 additions & 0 deletions b/‎.env.example
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 7 additions & 0 deletions b/‎README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎folding/__init__.py
Lines changed: 1 addition & 1 deletion b/‎folding/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎folding/store.py
Lines changed: 105 additions & 53 deletions b/‎folding/store.py
Lines changed: 105 additions & 53 deletions
diff --git a/‎folding/utils/epistula_utils.py
Lines changed: 37 additions & 0 deletions b/‎folding/utils/epistula_utils.py
Lines changed: 37 additions & 0 deletions
@@ -0,0 +1,4 @@
+S3_REGION = "nyc3"
+S3_ENDPOINT = "https://nyc3.digitaloceanspaces.com"
+S3_KEY = "s3_key"
+S3_SECRET = "secret_key"
@@ -161,6 +161,13 @@ Validators are heavily recommended to run the autoprocess script to ensure that
 bash run_autoprocess.sh
 ```
 
+The following environment variables need to be set in your system or application environment (`.env` file):
+- `S3_REGION = "nyc3"`: The AWS region or S3-compatible region where the bucket is located.
+- `S3_ENDPOINT = "https://nyc3.digitaloceanspaces.com"`: The endpoint URL for your S3-compatible service.
+- `S3_KEY`: Your S3 access key ID.
+- `S3_SECRET`: Your S3 secret access key.
+see `.env.example` for an example of how to set these variables.
+
 ### Miner
 There are many parameters that one can configure for a simulation. The base command-line args that are needed to run the miner are below. 
 ```bash
 
@@ -1,7 +1,7 @@
 from .protocol import JobSubmissionSynapse
 from .validators.protein import Protein
 
-__version__ = "1.3.4"
+__version__ = "1.4.0"
 version_split = __version__.split(".")
 __spec_version__ = (
     (10000 * int(version_split[0]))
 
@@ -1,17 +1,20 @@
-import json
 import os
+import json
+import string
 import random
 import sqlite3
-import string
-from dataclasses import asdict, dataclass
-from datetime import datetime
+import requests
 from queue import Queue
-from typing import List
+from typing import Dict, List
+
+from datetime import datetime, timezone
+from dataclasses import asdict, dataclass
 
 import numpy as np
 import pandas as pd
-import requests
+
 from atom.epistula.epistula import Epistula
+from folding.utils.epistula_utils import get_epistula_body
 
 DB_DIR = os.path.join(os.path.dirname(__file__), "db")
 
@@ -53,7 +56,10 @@ def init_db(self):
                     water TEXT,
                     epsilon REAL,
                     system_kwargs TEXT,
-                    min_updates INTEGER
+                    min_updates INTEGER,
+                    job_id TEXT,
+                    s3_links TEXT,
+                    best_cpt_links TEXT
                 )
             """
             )
@@ -67,9 +73,7 @@ def _row_to_job(self, row) -> "Job":
         # Convert stored JSON strings back to Python objects
         data["hotkeys"] = json.loads(data["hotkeys"])
         data["event"] = json.loads(data["event"]) if data["event"] else None
-        data["system_kwargs"] = (
-            json.loads(data["system_kwargs"]) if data["system_kwargs"] else None
-        )
+        data["system_kwargs"] = json.loads(data["system_kwargs"]) if data["system_kwargs"] else None
 
         # Convert timestamps
         for field in ["created_at", "updated_at", "best_loss_at"]:
@@ -80,9 +84,7 @@ def _row_to_job(self, row) -> "Job":
 
         # Convert intervals
         data["update_interval"] = pd.Timedelta(seconds=data["update_interval"])
-        data["max_time_no_improvement"] = pd.Timedelta(
-            seconds=data["max_time_no_improvement"]
-        )
+        data["max_time_no_improvement"] = pd.Timedelta(seconds=data["max_time_no_improvement"])
 
         # Convert boolean
         data["active"] = bool(data["active"])
@@ -93,12 +95,13 @@ def _job_to_dict(self, job: "Job") -> dict:
         """Convert a Job object to a dictionary for database storage."""
         data = job.to_dict()
 
-        # Convert Python objects to JSON strings
-        data["hotkeys"] = json.dumps(data["hotkeys"])
-        data["event"] = json.dumps(data["event"]) if data["event"] else None
-        data["system_kwargs"] = (
-            json.dumps(data["system_kwargs"]) if data["system_kwargs"] else None
-        )
+        # Convert Python list or dict objects to JSON strings for sqlite 
+        data_to_update = {}
+        for k, v in data.items():
+            if isinstance(v, (list,dict)):
+                data_to_update[k] = json.dumps(v)
+
+        data.update(data_to_update)
 
         # Convert timestamps to strings
         for field in ["created_at", "updated_at", "best_loss_at"]:
@@ -109,9 +112,7 @@ def _job_to_dict(self, job: "Job") -> dict:
 
         # Convert intervals to seconds
         data["update_interval"] = int(data["update_interval"].total_seconds())
-        data["max_time_no_improvement"] = int(
-            data["max_time_no_improvement"].total_seconds()
-        )
+        data["max_time_no_improvement"] = int(data["max_time_no_improvement"].total_seconds())
 
         # Convert boolean to integer
         data["active"] = int(data["active"])
@@ -154,6 +155,7 @@ def insert(
         hotkeys: List[str],
         epsilon: float,
         system_kwargs: dict,
+        job_id: str,
         **kwargs,
     ):
         """Insert a new job into the database."""
@@ -176,6 +178,7 @@ def insert(
                 updated_at=pd.Timestamp.now().floor("s"),
                 epsilon=epsilon,
                 system_kwargs=system_kwargs,
+                job_id=job_id,
                 **kwargs,
             )
 
@@ -202,6 +205,34 @@ def update(self, job: "Job"):
 
             cur.execute(query, list(data.values()) + [pdb])
 
+    def update_gjp_job(self, job: "Job", gjp_address: str, keypair, job_id: str):
+        """
+        Updates a GJP job with the given parameters.
+        Args:
+            job (Job): The job object containing job details.
+            gjp_address (str): The address of the GJP server.
+            keypair (Keypair): The keypair for authentication.
+            job_id (str): The ID of the job to be updated.
+        Raises:
+            ValueError: If the job update fails (response status code is not 200).
+        Returns:
+            str: The ID of the updated job.
+        """
+
+        body = get_epistula_body(job=job)
+
+        body_bytes = self.epistula.create_message_body(body)
+        headers = self.epistula.generate_header(hotkey=keypair, body=body_bytes)
+
+        response = requests.post(
+            f"http://{gjp_address}/jobs/update/{job_id}",
+            headers=headers,
+            data=body_bytes,
+        )
+        if response.status_code != 200:
+            raise ValueError(f"Failed to upload job: {response.text}")
+        return response.json()["job_id"]
+
     def get_all_pdbs(self) -> list:
         """
         Retrieve all PDB IDs from the job store.
@@ -229,32 +260,55 @@ def upload_job(
         water: str,
         hotkeys: list,
         system_kwargs: dict,
-        hotkey,
+        keypair,
         gjp_address: str,
+        epsilon: float,
+        s3_links: Dict[str, str],
+        **kwargs,
     ):
-        """Upload a job to the database."""
-
-        body = {
-            "pdb_id": pdb,
-            "hotkeys": hotkeys,
-            "system_config": {
-                "ff": ff,
-                "box": box,
-                "water": water,
-                "system_kwargs": system_kwargs,
-            },
-            "em_s3_link": "s3://path/to/em",
-            "priority": 1,
-            "organic": False
-        }
-        body_bytes = self.epistula.create_message_body(body)
-        headers = self.epistula.generate_header(hotkey=hotkey, body=body_bytes)
+        """
+        Upload a job to the global job pool database.
+
+        Args:
+            pdb (str): The PDB ID of the job.
+            ff (str): The force field configuration.
+            box (str): The box configuration.
+            water (str): The water configuration.
+            hotkeys (list): A list of hotkeys.
+            system_kwargs (dict): Additional system configuration arguments.
+            keypair (Keypair): The keypair for generating headers.
+            gjp_address (str): The address of the api server.
+            event (dict): Additional event data.
 
-        response = requests.post(
-            f"http://{gjp_address}/jobs", headers=headers, data=body_bytes
+        Returns:
+            str: The job ID of the uploaded job.
+
+        Raises:
+            ValueError: If the job upload fails.
+        """
+        job = Job(
+            pdb=pdb,
+            ff=ff,
+            box=box,
+            water=water,
+            hotkeys=hotkeys,
+            created_at=pd.Timestamp.now().floor("s"),
+            updated_at=pd.Timestamp.now().floor("s"),
+            epsilon=epsilon,
+            system_kwargs=system_kwargs,
+            s3_links=s3_links,
+            **kwargs,
         )
+
+        body = get_epistula_body(job=job)
+
+        body_bytes = self.epistula.create_message_body(body)
+        headers = self.epistula.generate_header(hotkey=keypair, body=body_bytes)
+
+        response = requests.post(f"http://{gjp_address}/jobs", headers=headers, data=body_bytes)
         if response.status_code != 200:
             raise ValueError(f"Failed to upload job: {response.text}")
+        return response.json()["job_id"]
 
 
 # Keep the Job and MockJob classes as they are, they work well with both implementations
@@ -280,6 +334,9 @@ class Job:
     epsilon: float = 5  # percentage.
     event: dict = None
     system_kwargs: dict = None
+    job_id: str = None
+    s3_links: Dict[str, str] = None
+    best_cpt_links: list = None
 
     def to_dict(self):
         return asdict(self)
@@ -301,25 +358,21 @@ async def update(self, loss: float, hotkey: str, hotkeys: List[str] = None):
         self.updated_at = pd.Timestamp.now().floor("s")
         self.updated_count += 1
 
-        never_updated_better_loss = (
-            np.isnan(percent_improvement) and loss < self.best_loss
-        )
+        never_updated_better_loss = np.isnan(percent_improvement) and loss < self.best_loss
         better_loss = percent_improvement >= self.epsilon
 
         if never_updated_better_loss or better_loss:
             self.best_loss = loss
             self.best_loss_at = pd.Timestamp.now().floor("s")
             self.best_hotkey = hotkey
         elif (
-            pd.Timestamp.now().floor("s") - self.best_loss_at
-            > self.max_time_no_improvement
+            pd.Timestamp.now().floor("s") - self.best_loss_at > self.max_time_no_improvement
             and self.updated_count >= self.min_updates
         ):
             self.active = False
         elif (
             isinstance(self.best_loss_at, pd._libs.tslibs.nattype.NaTType)
-            and pd.Timestamp.now().floor("s") - self.created_at
-            > self.max_time_no_improvement
+            and pd.Timestamp.now().floor("s") - self.created_at > self.max_time_no_improvement
         ):
             self.active = False
 
@@ -339,10 +392,9 @@ def __init__(self, n_hotkeys=5, update_seconds=5, stop_after_seconds=3600 * 24):
         self.box = "cube"
         self.water = "tip3p"
         self.hotkeys = self._make_hotkeys(n_hotkeys)
-        self.created_at = (
-            pd.Timestamp.now().floor("s")
-            - pd.Timedelta(seconds=random.randint(0, 3600 * 24))
-        ).floor("s")
+        self.created_at = (pd.Timestamp.now().floor("s") - pd.Timedelta(seconds=random.randint(0, 3600 * 24))).floor(
+            "s"
+        )
         self.updated_at = self.created_at
         self.best_loss = 0
         self.best_hotkey = random.choice(self.hotkeys)
 
@@ -0,0 +1,37 @@
+import numpy as np
+from datetime import datetime, timezone
+import pandas as pd
+import json
+
+
+def get_epistula_body(job: "Job") -> dict:
+    """Obtain the body of information needed to upload or update a job to the GJP server.
+
+    Args:
+        job (Job): a Job object containing the job details.
+
+    Returns:
+        body (dict): The body of information needed to upload or update a job to the GJP server.
+    """
+
+    body = job.to_dict()
+    body["pdb_id"] = body.pop("pdb")
+    body["system_config"] = {
+        "ff": body.pop("ff"),
+        "box": body.pop("box"),
+        "water": body.pop("water"),
+        "system_kwargs": body.pop("system_kwargs"),
+    }
+    body["s3_links"] = json.dumps(body.pop("s3_links"))
+    body["priority"] = body.get("priority", 1)
+    body["is_organic"] = body.get("is_organic", False)
+    body["update_interval"] = body.pop("update_interval").total_seconds()
+    body["max_time_no_improvement"] = body.pop("max_time_no_improvement").total_seconds()
+    body["best_loss_at"] = (
+         body["best_loss_at"] if pd.notna(body["best_loss_at"]) else datetime.now(timezone.utc).isoformat()
+    )
+    body["event"] = json.dumps(body.pop("event"))
+    body["best_hotkey"] = "" if body["best_hotkey"] is None else body["best_hotkey"]
+    body["best_loss"] = 0 if body["best_loss"] == np.inf else body["best_loss"]
+
+    return body