ProteinGym · tintinrevient · Oct 16, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
diff --git a/.github/workflows/cml.yaml b/.github/workflows/cml.yaml
@@ -15,31 +15,37 @@ jobs:
 
       - uses: iterative/setup-cml@v2
 
-      - uses: astral-sh/setup-uv@v1
-
       - uses: actions/setup-python@v4
         with:
-          python-version-file: ".python-version"
+          python-version: '3.12'
+
+      - name: Create virtual environment
+        run: python -m venv .venv
+
+      - name: Activate virtual environment and install requirements
+        run: |
+          source .venv/bin/activate
+          pip install -r requirements.txt
 
       - name: Generate datasets and models
         run: |
           # Supervised game: Generate datasets and models
-          uv run proteingym-base list-datasets datasets | jq 'map(select(.name == "charge_ladder" or .name == "NEIME_2019")) | map({name: .name, input_filename: .input_filename}) | . = {"datasets": .}' > benchmark/supervised/local/datasets.json
-          uv run proteingym-base list-models models | jq 'map(select(.name == "pls")) | map({name: .name, input_filename: .input_filename}) | . = {"models": .}' > benchmark/supervised/local/models.json
+          proteingym-base list-datasets datasets | jq 'map(select(.name == "charge_ladder" or .name == "NEIME_2019")) | map({name: .name, input_filename: .input_filename}) | . = {"datasets": .}' > benchmark/supervised/local/datasets.json
+          proteingym-base list-models models | jq 'map(select(.name == "pls")) | map({name: .name, input_filename: .input_filename}) | . = {"models": .}' > benchmark/supervised/local/models.json
 
           # Zero-shot game: Generate datasets and models
-          uv run proteingym-base list-datasets datasets | jq 'map(select(.name == "ranganathan")) | map({name: .name, input_filename: .input_filename}) | . = {"datasets": .}' > benchmark/zero_shot/local/datasets.json
-          uv run proteingym-base list-models models | jq 'map(select(.name == "esm")) | map({name: .name, input_filename: .input_filename}) | . = {"models": .}' > benchmark/zero_shot/local/models.json
+          proteingym-base list-datasets datasets | jq 'map(select(.name == "ranganathan")) | map({name: .name, input_filename: .input_filename}) | . = {"datasets": .}' > benchmark/zero_shot/local/datasets.json
+          proteingym-base list-models models | jq 'map(select(.name == "esm")) | map({name: .name, input_filename: .input_filename}) | . = {"models": .}' > benchmark/zero_shot/local/models.json
 
       - name: Run model validation
         env:
           repo_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
         run: | 
           # Supervised game: Run DVC repro
-          uv run dvc repro benchmark/supervised/local/dvc.yaml
+          dvc repro benchmark/supervised/local/dvc.yaml
 
           # Zero-shot game: Run DVC repro
-          uv run dvc repro benchmark/zero_shot/local/dvc.yaml
+          dvc repro benchmark/zero_shot/local/dvc.yaml
 
           # Create report with metrics
           # Pick one supervised model

diff --git a/.gitignore b/.gitignore
@@ -4,23 +4,14 @@ metric
 logs
 
 # Python-generated files
-__pycache__/
-.pytest_cache/
-*.py[oc]
-build/
 dist/
-wheels/
-*.egg-info
-.ruff_cache/
+__pycache__/
 
 # Virtual environments
 .venv
 
 # MasOS
 .DS_Store
 
-# Git
-git-auth.txt
-
 # Environment variables
 .env
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,29 +11,3 @@ repos:
   rev: v5.0.0
   hooks:
     - id: check-merge-conflict
-
-- repo: https://github.com/abravalheri/validate-pyproject
-  rev: v0.24.1
-  hooks:
-    - id: validate-pyproject
-
-- repo: https://github.com/pycqa/isort
-  rev: 5.13.2
-  hooks:
-    - id: isort
-      args: [--profile=black]
-
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
-  hooks:
-    - id: ruff-format
-    - id: ruff
-      args: [--fix, --exit-non-zero-on-fix]
-      types_or: [python, pyi]
-      require_serial: true
-
-- repo: https://github.com/astral-sh/uv-pre-commit
-  rev: 0.6.1
-  hooks:
-    - id: uv-lock
-    - id: uv-sync
diff --git a/.python-version b/.python-version
diff --git a/README.md b/README.md
@@ -1,13 +1,5 @@
 # ProteinGym2 Benchmark
 
-## Getting started
-
-Before you start, you need to create a `git-auth.txt` file in two folders respectively - [supervised](supervised) and [zero_shot](zero_shot):
-
-```
-https://username:token@github.com
-```
-
 ## Models
 
 The models are included in the [models](models/) folder, where each model occupies a subfolder as its repo.
@@ -18,14 +10,6 @@ A model repo contains its README.md as a model card, which comes in two parts:
 
 For more information, you can reference Hugging Face's [model cards](https://huggingface.co/docs/hub/en/model-cards).
 
-### Model validation
-
-You can validate if your model will work with Protein Gym benchmark:
-
-```shell
-$ uv run proteingym-benchmark validate <your_model_package_path>
-```
-
 ## Datasets
 
 The datasets are included in the [dataset](datasets/) folder, where each dataset goes into a subfolder.
@@ -61,14 +45,14 @@ for dataset in datasets:
 
 You can benchmark a group of supervised models:
 ```shell
-uv run dvc repro benchmark/supervised/local/dvc.yaml
+dvc repro benchmark/supervised/local/dvc.yaml
 ```
 
 #### Zero-shot
 
 You can benchmark a group of zero-shot models:
 ```shell
-uv run dvc repro benchmark/zero_shot/local/dvc.yaml
+dvc repro benchmark/zero_shot/local/dvc.yaml
 ```
 
 ### AWS environment
@@ -100,23 +84,12 @@ The difference of the AWS environment is that:
 
 You can benchmark a group of supervised models:
 ```shell
-AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy uv run dvc repro benchmark/supervised/aws/dvc.yaml
+AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy dvc repro benchmark/supervised/aws/dvc.yaml
 ```
 
 #### Zero-shot
 
 You can benchmark a group of zero-shot models:
 ```shell
-AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy uv run dvc repro benchmark/zero_shot/aws/dvc.yaml
-```
-
-## Generate dummy data
-
-You can generate dummy data by the following command:
-```shell
-uv run proteingym-benchmark dataset generate-dummy-data supervised/data/dummy/charge_ladder.csv --n-rows 5 --sequence-length 100
+AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy dvc repro benchmark/zero_shot/aws/dvc.yaml
 ```
-
-
-
-
diff --git a/models/esm/README.md b/models/esm/README.md
@@ -1,8 +1,10 @@
 ---
 # Model identifier used for referencing this model in the benchmark system
-name: "esm"
+name: esm
 
-hyper_params:
+tags: ["zero-shot"]
+
+hyper_parameters:
     # HuggingFace model checkpoint identifier for the specific ESM-2 variant
     location: "esm2_t30_150M_UR50D"
     # Scoring method: calculates marginal probabilities for wild-type amino acids

diff --git a/models/pls/README.md b/models/pls/README.md
@@ -1,8 +1,10 @@
 ---
 # Model identifier used for referencing this model in the benchmark system
-name: "pls"
+name: pls
 
-hyper_params:
+tags: ["supervised"]
+
+hyper_parameters:
     # Number of PLS components to extract (dimensionality of the reduced space)
     n_components: 2
     # Standard 20 amino acid single-letter codes

diff --git a/pyproject.toml b/pyproject.toml
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,6 @@
 polars[pyarrow]>=1.30.0,<2.0.0
 pycm==4.4
-boto3>=1.38.27
+boto3>=1.38.27
+dvc>=3.59.2
+pre-commit>=4.2.0
+dist/proteingym_base-0.1.0b1-py3-none-any.whl
diff --git a/src/proteingym/benchmark/__about__.py b/src/proteingym/benchmark/__about__.py
diff --git a/src/proteingym/benchmark/__init__.py b/src/proteingym/benchmark/__init__.py