Skip to content

Commit 621182b

Browse files
authored
Merge pull request #375 from macrocosm-os/staging
Staging
2 parents 22a8839 + 9b0d93c commit 621182b

File tree

11 files changed

+69
-44
lines changed

11 files changed

+69
-44
lines changed

.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
S3_REGION=nyc3
22
S3_ENDPOINT=https://nyc3.digitaloceanspaces.com
3+
S3_BUCKET=sn25-folding-mainnet
34
S3_KEY=s3_key
45
S3_SECRET=secret_key
56
RQLITE_HTTP_ADDR=0.0.0.0:4001

documentation/get_started.md

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Bash install will use poetry to build the environment correctly.
5151
SN25 uses DigitalOcean S3 data buckets for data transfer. Therefore, the following environment variables need to be set in your system or application environment (`.env` file):
5252
- `S3_REGION = "nyc3"`: The AWS region or S3-compatible region where the bucket is located.
5353
- `S3_ENDPOINT = "https://nyc3.digitaloceanspaces.com"`: The endpoint URL for your S3-compatible service.
54+
- `S3_BUCKET = sn25-folding-mainnet`: The name of the s3 bucket.
5455
- `S3_KEY`: Your S3 access key ID.
5556
- `S3_SECRET`: Your S3 secret access key.
5657

folding/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.1.2"
1+
__version__ = "2.1.3"
22
version_split = __version__.split(".")
33
__spec_version__ = (
44
(10000 * int(version_split[0]))

folding/registries/miner_registry.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,29 @@ def add_credibilities(self, miner_uid: int, task: str, credibilities: List[float
5656

5757
def update_credibility(self, miner_uid: int, task: str):
5858
"""
59-
Updates the credibility of a miner based:
60-
1. The credibility of the miner's previous results. Intially set as STARTING_CREDIBILITY
59+
Updates the credibility of a miner based on:
60+
1. The credibility of the miner's previous results. Initially set as STARTING_CREDIBILITY
6161
2. The credibility of the miner's current results.
6262
3. The number of previous and current entries to act as a weighting factor
6363
4. The EMA with credibility_alpha as the smoothing factor
64+
65+
If the miner_uid doesn't exist in the registry, it will be instantiated first.
66+
67+
Args:
68+
miner_uid (int): The unique identifier of the miner
69+
task (str): The task name to update credibility for
6470
"""
71+
# Check if miner_uid exists, if not instantiate it
72+
if miner_uid not in self.registry:
73+
self.registry[miner_uid] = {}
74+
self.registry[miner_uid]["overall_credibility"] = c.STARTING_CREDIBILITY
75+
for task_name in self.tasks:
76+
self.registry[miner_uid][task_name] = {
77+
"credibility": c.STARTING_CREDIBILITY,
78+
"credibilities": [],
79+
"score": 0.0,
80+
"results": [],
81+
}
6582

6683
task_credibilities = list(
6784
chain.from_iterable(self.registry[miner_uid][task]["credibilities"])

folding/utils/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def add_args(cls, parser):
254254
"--s3.bucket_name",
255255
type=str,
256256
help="The name of the S3 bucket to log to.",
257-
default="vali-s3-demo-do",
257+
default="sn25-folding-mainnet",
258258
)
259259

260260

folding/utils/s3_utils.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ async def upload_to_s3(
8686
try:
8787
s3_links = {}
8888
input_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
89-
89+
s3_endpoint = os.getenv("S3_ENDPOINT")
90+
s3_bucket = os.getenv("S3_BUCKET")
9091
for file_type in ["pdb", "cpt"]:
9192
if file_type == "cpt":
9293
file_path = os.path.join(validator_directory, simulation_cpt)
@@ -104,9 +105,7 @@ async def upload_to_s3(
104105
location=location,
105106
public=True,
106107
)
107-
s3_links[file_type] = os.path.join(
108-
"https://nyc3.digitaloceanspaces.com/vali-s3-demo-do/", key
109-
)
108+
s3_links[file_type] = os.path.join(f"{s3_endpoint}/{s3_bucket}/", key)
110109
await asyncio.sleep(0.10)
111110

112111
return s3_links
@@ -138,6 +137,9 @@ async def upload_output_to_s3(
138137
Raises:
139138
Exception: If any error occurs during file upload.
140139
"""
140+
s3_endpoint = os.getenv("S3_ENDPOINT")
141+
s3_bucket = os.getenv("S3_BUCKET")
142+
141143
try:
142144
output_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
143145
location = os.path.join(
@@ -149,7 +151,7 @@ async def upload_output_to_s3(
149151
location=location,
150152
public=True,
151153
)
152-
return os.path.join("https://nyc3.digitaloceanspaces.com/vali-s3-demo-do/", key)
154+
return os.path.join(f"{s3_endpoint}/{s3_bucket}/", key)
153155
except Exception as e:
154156
logger.error(f"Exception during output file upload: {str(e)}")
155157
raise

folding/validators/forward.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ async def try_prepare_md_challenge(self, config, pdb_id: str) -> Dict:
297297
if "validator_search_status" not in event:
298298
if not config.s3.off:
299299
try:
300-
logger.info(f"Uploading to {self.handler}")
300+
logger.info(f"Uploading to {self.handler.bucket_name}")
301301
s3_links = await upload_to_s3(
302302
handler=self.handler,
303303
pdb_location=protein.pdb_location,

folding/validators/reward.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,10 @@ def evaluate(
5050
responses: List[JobSubmissionSynapse],
5151
uids: List[int],
5252
job_type: str,
53-
event: dict,
5453
):
5554
reported_energies = np.zeros(len(uids))
5655
evaluators = [None] * len(uids)
57-
seed = []
56+
seed = [-1] * len(uids)
5857
best_cpt = [""] * len(uids)
5958
process_md_output_time = [0.0] * len(uids)
6059

@@ -67,6 +66,7 @@ def evaluate(
6766
continue
6867

6968
start_time = time.time()
69+
seed[i] = resp.miner_seed
7070
evaluator = EVALUATION_REGISTRY[job_type](
7171
pdb_id=protein.pdb_id,
7272
pdb_location=protein.pdb_location,
@@ -82,7 +82,6 @@ def evaluate(
8282
can_process = evaluator.evaluate()
8383
if not can_process:
8484
continue
85-
seed.append(resp.miner_seed)
8685
best_cpt[i] = (
8786
evaluator.checkpoint_path
8887
if hasattr(evaluator, "checkpoint_path")
@@ -135,7 +134,7 @@ def get_energies(
135134

136135
# Get initial evaluations
137136
reported_energies, evaluators, seed, best_cpt, process_md_output_time = evaluate(
138-
protein, responses, uids, job_type, event
137+
protein, responses, uids, job_type
139138
)
140139

141140
# Sort all lists by reported energy
@@ -157,7 +156,7 @@ def get_energies(
157156
unique_energies = set() # Track unique energy values
158157

159158
# Process responses until we get TOP_K valid non-duplicate ones or run out of responses
160-
for i, (reported_energy, response, uid, evaluator, s, bc, pmt) in enumerate(
159+
for i, (reported_energy, response, uid, evaluator, seed, best_cpt, process_md_output_time) in enumerate(
161160
sorted_data
162161
):
163162
try:

neurons/validator.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from folding.store import Job, SQLiteJobStore
2727
from folding.utils.logger import logger
2828
from folding.utils.logging import log_event
29-
from folding.utils.uids import get_random_uids
29+
from folding.utils.uids import get_all_miner_uids
3030
from folding.utils.s3_utils import upload_output_to_s3
3131
from folding.utils.s3_utils import DigitalOceanS3Handler
3232
from folding.validators.forward import create_new_challenge, run_ping_step, run_step
@@ -46,9 +46,7 @@ def __init__(self, config=None):
4646

4747
# Sample all the uids on the network, and return only the uids that are non-valis.
4848
logger.info("Determining all miner uids...⏳")
49-
self.all_miner_uids: List = get_random_uids(
50-
self, k=int(self.metagraph.n), exclude=None
51-
).tolist()
49+
self.all_miner_uids: List = get_all_miner_uids(self)
5250

5351
# If we do not have any miner registry saved to the machine, create.
5452
if not hasattr(self, "miner_registry"):
@@ -316,14 +314,15 @@ async def update_job(self, job: Job):
316314
# If there is an exploit on the cpt file detected via the state-checkpoint, reduce score.
317315
if reason == "state-checkpoint":
318316
logger.warning(
319-
f"Setting uid {uid} score to zero, State-checkpoint check failed."
317+
f"Reducing uid {uid} score, State-checkpoint check failed."
320318
)
321319
self.scores[uid] = 0.5 * self.scores[uid]
322320

323-
credibility = [0.0] if reason != "" else [1.0]
321+
credibility = [0.0] if reason != "valid" else [1.0]
324322
self.miner_registry.add_credibilities(
325323
miner_uid=uid, task=job.job_type, credibilities=credibility
326324
)
325+
self.miner_registry.update_credibility(miner_uid=uid, task=job.job_type)
327326

328327
best_index = np.argmin(energies)
329328
best_loss = energies[best_index].item() # item because it's a torch.tensor
@@ -487,7 +486,6 @@ async def update_jobs(self):
487486
await asyncio.sleep(self.config.neuron.update_interval)
488487

489488
logger.info("Updating jobs.")
490-
logger.info(f"step({self.step}) block({self.block})")
491489

492490
for job in self.store.get_queue(
493491
ready=True, validator_hotkey=self.wallet.hotkey.ss58_address
@@ -513,6 +511,8 @@ async def update_jobs(self):
513511
# Determine the status of the job based on the current energy and the previous values (early stopping)
514512
# Update the DB with the current status
515513
await self.update_job(job=job)
514+
logger.info(f"step({self.step}) block({self.block})")
515+
516516
except Exception as e:
517517
logger.error(f"Error in update_jobs: {traceback.format_exc()}")
518518

@@ -571,7 +571,12 @@ async def monitor_db(self):
571571
while True:
572572
try:
573573
await asyncio.sleep(60)
574-
outdated = await self.store.monitor_db()
574+
try:
575+
outdated = await self.store.monitor_db()
576+
except Exception as e:
577+
logger.error(f"Error in monitor_db: {traceback.format_exc()}")
578+
await self.start_rqlite()
579+
575580
if outdated:
576581
logger.error("Database is outdated. Restarting rqlite.")
577582
await self.start_rqlite()

0 commit comments

Comments
 (0)