Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions ci/benchmarks/partial-conv/evo2_finetuning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ script: |-
--devices=${gpus} \
--num-nodes=${nodes} \
--val-check-interval=${val_check_interval} \
--wandb-project=${wandb_project_name} \
--wandb-group=${model}_${variant}_${config_name}_${task}_${target} \
--create-tensorboard-logger \
--activation-checkpoint-recompute-num-layers=${activation_checkpoint_layers} \
--disable-checkpointing \
--early-stop-on-step=${stop_steps} \
--wandb-project=${wandb_project_name} \
--wandb-group=${model}_${variant}_${config_name}_${task}_${target} \
--wandb-job-type=${pipeline_label} \
--garbage-collect-at-inference;
16 changes: 14 additions & 2 deletions ci/benchmarks/perf/geneformer_pretrain.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,20 @@ script_args:
batch_size: 32

script: |-
WANDB_API_KEY=$BIONEMO_WANDB_API_KEY ${variant}_${model} \
--data-dir ${data_path} \
COPY_FLAG="/tmp/copy_done_${{SLURMD_NODENAME}}";
NEW_DATA_PATH="/dev/shm/data_path_${{SLURMD_NODENAME}}";
if [ "$SLURM_LOCALID" = "0" ]; then
df -h;
echo $NEW_DATA_PATH;
time cp -r ${data_path}/ $NEW_DATA_PATH;
touch $COPY_FLAG
fi
# All ranks wait until install flag file appears
while [ ! -f $COPY_FLAG ]; do
sleep 1
done
WANDB_API_KEY=$BIONEMO_WANDB_API_KEY ${variant}_${model} \
--data-dir $NEW_DATA_PATH \
--experiment-name ${batch_size}bs_${nodes}node_${gpus}gpu_${max_steps}s_${precision}prec \
--num-gpus ${gpus} \
--save-last-checkpoint \
Expand Down