Skip to content

Commit 2537367

Browse files
committed
Run second pretraining iteration
1 parent 3ff862e commit 2537367

File tree

5 files changed

+219
-466
lines changed

5 files changed

+219
-466
lines changed

experiments/plantcad/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ rm -rf local_store/evaluation/dna-conservation*; python -m experiments.plantcad.
121121

122122
# Checkpoint upload
123123
find local_store | grep -E 'hf/step-[0-9]+$' | xargs -I {} echo "hf upload plantcad/_dev_marin_plantcad1_v2_train {} {} --repo-type model" | bash /dev/stdin
124+
125+
find local_store | grep -E 'checkpoints/step-[0-9]+$' | \
126+
grep -E 'step-26780$|step-24102|step-21424|step-18746$' | \
127+
xargs -I {} echo "hf upload plantcad/_dev_marin_plantcad1_v2_train {} {} --repo-type model" | \
128+
bash /dev/stdin
124129
```
125130

126131
```bash
@@ -151,6 +156,12 @@ Second iteration:
151156
0.604521 8034 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-8034
152157
0.626729 10712 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-10712
153158
0.631095 13390 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-13390
159+
0.607316 16068 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-16068
160+
0.622988 18746 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-18746
161+
0.639093 21424 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-21424
162+
0.650973 24102 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-24102
163+
0.657882 26780 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-26780
164+
0.657452 26782 hf://plantcad/_dev_marin_plantcad1_v2_train/local_store/checkpoints/plantcad-train-600m-r12-7ea0fc/hf/step-26782
154165
```
155166

156167
## EDA

experiments/plantcad/evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def evaluate_conservation_scores(scores: ConservationResult) -> dict[str, float]
660660
logger.info("EVALUATION RESULTS")
661661
logger.info("=" * 50)
662662
logger.info(f"Total examples: {results['n_total']}")
663-
logger.info(f"ROC AUC: {results['roc_auc']:.4f}")
663+
logger.info(f"ROC AUC: {results['roc_auc']:.6f}")
664664
logger.info(f"Balance: {results['balance']:.3f} ({results['n_positive']}/{results['n_total']})")
665665
logger.info("=" * 50)
666666

experiments/plantcad/misc/agg_eval_results.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121

2222
def main():
2323
data = []
24-
base_dir = "/home/ubuntu/sky_workdir/local_store/evaluation"
24+
base_dir = "~/sky_workdir/local_store/evaluation"
2525

26-
for eval_dir in Path(base_dir).glob("dna-conservation-*"):
26+
for eval_dir in Path(base_dir).expanduser().glob("dna-conservation-*"):
2727
with open(eval_dir / "results.json") as f:
2828
result = json.load(f)
2929
config = result.get("config", {})

0 commit comments

Comments
 (0)