1
+ import os
1
2
import time
2
3
import glob
3
- import os
4
- import pickle
4
+ import base64
5
5
import random
6
- import re
7
6
import shutil
8
7
from collections import defaultdict
9
8
from dataclasses import dataclass
10
9
from pathlib import Path
11
10
from typing import Dict , List , Literal
12
- import base64
13
11
14
- import plotly .express as px
15
12
import bittensor as bt
16
- import openmm as mm
17
- import pandas as pd
18
13
import numpy as np
14
+ import pandas as pd
15
+ import plotly .express as px
19
16
from openmm import app , unit
20
17
from pdbfixer import PDBFixer
18
+
19
+ from folding .base .simulation import OpenMMSimulation
21
20
from folding .store import Job
22
21
from folding .utils .opemm_simulation_config import SimulationConfig
23
22
from folding .utils .ops import (
23
+ OpenMMException ,
24
+ ValidationError ,
24
25
check_and_download_pdbs ,
25
26
check_if_directory_exists ,
26
27
load_pdb_ids ,
27
28
select_random_pdb_id ,
28
29
write_pkl ,
29
30
)
30
- from folding .store import Job
31
- from folding .base .simulation import OpenMMSimulation
32
31
33
32
ROOT_DIR = Path (__file__ ).resolve ().parents [2 ]
34
33
@@ -118,8 +117,8 @@ def from_job(job: Job, config: Dict):
118
117
bt .logging .error (
119
118
f"from_job failed for { protein .pdb_id } with Exception { E } ."
120
119
)
121
- finally :
122
- return protein
120
+ return None
121
+ return protein
123
122
124
123
@staticmethod
125
124
def load_pdb_as_string (pdb_path : str ) -> str :
@@ -185,7 +184,7 @@ def read_and_return_files(self, filenames: List) -> Dict:
185
184
name
186
185
] = f .read () # This would be the pdb file.
187
186
188
- except Exception as E :
187
+ except Exception :
189
188
continue
190
189
return files_to_return
191
190
@@ -221,6 +220,13 @@ def setup_simulation(self):
221
220
222
221
self .pdb_complexity = Protein ._get_pdb_complexity (self .pdb_location )
223
222
self .init_energy = self .calc_init_energy ()
223
+
224
+ # Checking if init energy is nan
225
+ if np .isnan (self .init_energy ):
226
+ raise OpenMMException (
227
+ f"Failed to calculate initial energy for { self .pdb_id } "
228
+ )
229
+
224
230
self ._calculate_epsilon ()
225
231
226
232
def __str__ (self ):
@@ -355,6 +361,9 @@ def get_miner_data_directory(self, hotkey: str):
355
361
def process_md_output (
356
362
self , md_output : dict , seed : int , state : str , hotkey : str
357
363
) -> bool :
364
+ MIN_LOGGING_ENTRIES = 500
365
+ MIN_SIMULATION_STEPS = 5000
366
+
358
367
required_files_extensions = ["cpt" , "log" ]
359
368
hotkey_alias = hotkey [:8 ]
360
369
self .current_state = state
@@ -409,11 +418,16 @@ def process_md_output(
409
418
self .log_file = pd .read_csv (log_file_path )
410
419
self .log_step = self .log_file ['#"Step"' ].iloc [- 1 ]
411
420
421
+ # Checks to see if we have enough steps in the log file to start validation
422
+ if len (self .log_file ) < MIN_LOGGING_ENTRIES :
423
+ raise ValidationError (
424
+ f"Miner { hotkey_alias } did not run enough steps in the simulation... Skipping!"
425
+ )
426
+
412
427
# Make sure that we are enough steps ahead in the log file compared to the checkpoint file.
413
- # Checks if log_file is 5000 steps ahead of checkpoint AND that the log_file has at least 5000 steps
414
- if (
415
- self .log_step - self .simulation .currentStep
416
- ) < 5000 and len (self .log_file ) >= 5000 :
428
+ # Checks if log_file is MIN_STEPS steps ahead of checkpoint
429
+ if (self .log_step - self .simulation .currentStep ) < MIN_SIMULATION_STEPS :
430
+ # If the miner did not run enough steps, we will load the old checkpoint
417
431
checkpoint_path = os .path .join (
418
432
self .miner_data_directory , f"{ self .current_state } _old.cpt"
419
433
)
@@ -422,13 +436,17 @@ def process_md_output(
422
436
f"Miner { hotkey_alias } did not run enough steps since last checkpoint... Loading old checkpoint"
423
437
)
424
438
self .simulation .loadCheckpoint (checkpoint_path )
439
+ # Checking to see if the old checkpoint has enough steps to validate
440
+ if (
441
+ self .log_step - self .simulation .currentStep
442
+ ) < MIN_SIMULATION_STEPS :
443
+ raise ValidationError (
444
+ f"Miner { hotkey_alias } did not run enough steps in the simulation... Skipping!"
445
+ )
425
446
else :
426
- bt . logging . warning (
447
+ raise ValidationError (
427
448
f"Miner { hotkey_alias } did not run enough steps and no old checkpoint found... Skipping!"
428
449
)
429
- return False
430
- else :
431
- self .simulation .loadCheckpoint (checkpoint_path )
432
450
433
451
self .cpt_step = self .simulation .currentStep
434
452
self .checkpoint_path = checkpoint_path
@@ -444,6 +462,10 @@ def process_md_output(
444
462
write_mode = "wb" ,
445
463
)
446
464
465
+ except ValidationError as E :
466
+ bt .logging .warning (f"{ E } " )
467
+ return False
468
+
447
469
except Exception as e :
448
470
bt .logging .error (f"Failed to recreate simulation: { e } " )
449
471
return False
@@ -500,7 +522,6 @@ def is_run_valid(self):
500
522
501
523
# calculating absolute percent difference per step
502
524
percent_diff = abs (((check_energies - miner_energies ) / miner_energies ) * 100 )
503
- min_length = len (percent_diff )
504
525
505
526
# This is some debugging information for plotting the information from the miner.
506
527
df = pd .DataFrame ([check_energies , miner_energies ]).T
@@ -559,7 +580,7 @@ def remove_pdb_directory(self):
559
580
"""
560
581
shutil .rmtree (self .pdb_directory )
561
582
562
- def calc_init_energy (self ):
583
+ def calc_init_energy (self ) -> float :
563
584
"""Calculate the potential energy from an edr file using gmx energy.
564
585
Args:
565
586
output_dir (str): directory containing the edr file
0 commit comments