ProteinGym · tintinrevient · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/README.md b/README.md
@@ -38,12 +38,23 @@ You can reference [this guide](https://github.com/ProteinGym2/pg2-dataset?tab=re
 
 The benchmark is defined in the [benchmark](benchmark/) folder, where there exist two games: supervised and zero-shot.
 
+First of all, you need to select the models and datasets for each game as below command before running DVC:
+
+- `-g` or `--game`: You can choose either `supervised` or `zero_shot` for the game.
+- `-e` or `--env`: You can choose either `local` or `aws` environment to run the benchmarking.
+
+```
+$ uv run pg2-benchmark select models datasets -g supervised -e local
+```
+
+It is an interactive tool for you to choose the model and dataset permutations. Once you confirm your choice, `dvc.yaml` and `params.yaml`, which are standard DVC configurations, will be generated in the defined location for different environments, e.g., `benchmark/<game>/<env>` folder.
+
 ### Local environment
 
-There are two games to benchmark: supervised and zero-shot. Each game has its selected list of models and datasets defined in `dvc.yaml`.
+There are two games to benchmark: supervised and zero-shot. Each game has its selected list of models and datasets defined in [benchmark/models](benchmark/models) and [benchmark/datasets](benchmark/datasets).
 
-- Supervised game is defined in this [dvc.yaml](supervised/local/dvc.yaml)
-- Zero-shot game is defined in this [dvc.yaml](zero_shot/local/dvc.yaml)
+- Supervised game is defined in this Jinja template: [dvc.yaml.jinja](benchmark/supervised/local/dvc.yaml.jinja)
+- Zero-shot game is defined in this Jinja template: [dvc.yaml.jinja](benchmark/zero_shot/local/dvc.yaml.jinja))
 
 The models and datasets are defined in `vars` at the top, and DVC translates `vars` into a matrix, which is namely a loop defined as the following pseudo-code:
 

diff --git a/benchmark/supervised/aws/dvc.lock b/benchmark/supervised/aws/dvc.lock
diff --git a/benchmark/zero_shot/aws/dvc.yaml → benchmark/supervised/aws/dvc.yaml.jinja b/benchmark/zero_shot/aws/dvc.yaml → benchmark/supervised/aws/dvc.yaml.jinja
@@ -1,24 +1,28 @@
-vars:  
+vars:
   - params.yaml
-  
+
   - datasets:
-    - name: ranganathan
-      aws_prefix: ranganathan
+{%- for dataset in datasets %}
+    - name: {{ dataset.name }}
+      aws_prefix: {{ dataset.aws_prefix }}
+{%- endfor %}
 
   - models:
-    - name: esm
-      aws_prefix: esm
-      dockerfile: ../../../models/esm/Dockerfile
+{%- for model in models %}
+    - name: {{ model.name }}
+      aws_prefix: {{ model.aws_prefix }}
+      dockerfile: {{ model.dockerfile }}
+{%- endfor %}
 
 stages:
 
   setup:
-    cmd: 
+    cmd:
       - mkdir -p logs ${destination.output_dir} ${destination.metric_dir}
       - echo "Created local directories" > logs/setup.txt
     outs:
       - logs/setup.txt
-  
+
   upload_to_s3:
     cmd:
       - aws s3 cp ${source.datasets_dir}/ s3://${aws.s3_training_data_prefix}/datasets --recursive --exclude "*" --include "*.zip"
@@ -31,7 +35,7 @@ stages:
     outs:
       - logs/s3_upload_complete.txt:
           cache: true
-  
+
   deploy_to_ecr:
     matrix:
       model: ${models}
@@ -51,10 +55,10 @@ stages:
           cache: true
 
   create_training_job:
-    matrix: 
+    matrix:
       dataset: ${datasets}
       model: ${models}
-    
+
     cmd: >
       uv run pg2-benchmark sagemaker create-training-job
       --model-name ${item.model.name}
@@ -74,35 +78,35 @@ stages:
       - logs/s3_upload_complete.txt
       - logs/ecr_push_complete.txt
       - logs/image_uri.txt
-    
+
     outs:
       - logs/create_job_${item.dataset.name}_${item.model.name}.txt:
           cache: true
-  
+
   monitor_training_job:
-    matrix: 
+    matrix:
       dataset: ${datasets}
       model: ${models}
-    
+
     cmd: >
       uv run pg2-benchmark sagemaker monitor-training-job
       --region-name ${aws.region_name}
       --job-name $(cat logs/create_job_${item.dataset.name}_${item.model.name}.txt)
       > logs/monitor_job_${item.dataset.name}_${item.model.name}.txt
-    
+
     deps:
       - ../../../src/pg2_benchmark/cli/sagemaker.py
       - logs/create_job_${item.dataset.name}_${item.model.name}.txt
-    
+
     outs:
      - logs/monitor_job_${item.dataset.name}_${item.model.name}.txt:
           cache: true
 
   calculate_metric:
-    matrix: 
+    matrix:
       dataset: ${datasets}
       model: ${models}
-    
+
     cmd:
       - aws s3 cp s3://${aws.s3_output_prefix}/$(cat logs/create_job_${item.dataset.name}_${item.model.name}.txt)/output/model.tar.gz ${destination.output_dir}/
       - tar -xzf ${destination.output_dir}/model.tar.gz -C ${destination.output_dir}/

diff --git a/benchmark/zero_shot/aws/params.yaml → benchmark/supervised/aws/params.yaml.jinja b/benchmark/zero_shot/aws/params.yaml → benchmark/supervised/aws/params.yaml.jinja
@@ -11,9 +11,9 @@ git:
   git_cache_bust: 1
 
 source:
-  datasets_dir: ../../../datasets
-  models_dir: ../../../models
-  
+  datasets_dir: {{ datasets_dir }}
+  models_dir: {{ models_dir }}
+
 destination:
   output_dir: output
   metric_dir: metric