Add pg2-benchmark validate cmd

tintinrevient · tintinrevient · commit 64c972d10cd7 · 2025-09-08T15:56:54.000+02:00
diff --git a/README.md b/README.md
@@ -18,6 +18,21 @@ A model repo contains its README.md as a model card, which comes in two parts:
 
 For more information, you can reference Hugging Face's [model cards](https://huggingface.co/docs/hub/en/model-cards).
 
+In order to validate whether you containerise your model correclty, you can run:
+
+```shell
+uv run pg2-benchmark validate <your_model_name>
+```
+
+For example, after running `uv run pg2-benchmark validate esm`, you will get the following messages to ensure that the model [esm](models/esm/) is containerised correctly with the right model card and entrypoint:
+
+```shell
+Uninstalled 34 packages in 504ms
+Installed 34 packages in 83ms
+✅ Loaded esm with hyper parameters {'location': 'esm2_t30_150M_UR50D', 'scoring_strategy': 'wt-marginals', 'nogpu': False, 'offset_idx': 24}.
+✅ Model esm has a valid 'train' entrypoint with required params: ['dataset_file', 'model_card_file']
+```
+
 ## Datasets
 
 The datasets are included in the [dataset](datasets/) folder, where each dataset goes into a subfolder.
@@ -53,14 +68,14 @@ for dataset in datasets:
 
 You can benchmark a group of supervised models:
 ```shell
-dvc repro benchmark/supervised/local/dvc.yaml
+uv run dvc repro benchmark/supervised/local/dvc.yaml
 ```
 
 #### Zero-shot
 
 You can benchmark a group of zero-shot models:
 ```shell
-dvc repro benchmark/zero_shot/local/dvc.yaml
+uv run dvc repro benchmark/zero_shot/local/dvc.yaml
 ```
 
 ### AWS environment
@@ -92,14 +107,14 @@ The difference of the AWS environment is that:
 
 You can benchmark a group of supervised models:
 ```shell
-AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy dvc repro benchmark/supervised/aws/dvc.yaml
+AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy uv run dvc repro benchmark/supervised/aws/dvc.yaml
 ```
 
 #### Zero-shot
 
 You can benchmark a group of zero-shot models:
 ```shell
-AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy dvc repro benchmark/zero_shot/aws/dvc.yaml
+AWS_ACCOUNT_ID=xxx AWS_PROFILE=yyy uv run dvc repro benchmark/zero_shot/aws/dvc.yaml
 ```
 
 ## Generate dummy data
diff --git a/models/esm/pyproject.toml b/models/esm/pyproject.toml
@@ -24,7 +24,7 @@ build-backend = "hatchling.build"
 
 [tool.uv.sources]
 pg2-dataset = { git = "https://github.com/ProteinGym2/pg2-dataset.git",  rev = "58c327e13bade1effe1312eb2b8d5445016a5a8f" }
-pg2-benchmark = { path = "./pg2-benchmark", editable = true }
+pg2-benchmark = { git = "https://github.com/ProteinGym2/pg2-benchmark.git", rev = "main" }
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/pg2_model_esm"]
diff --git a/models/pls/pyproject.toml b/models/pls/pyproject.toml
@@ -22,7 +22,7 @@ build-backend = "hatchling.build"
 
 [tool.uv.sources]
 pg2-dataset = { git = "https://github.com/ProteinGym2/pg2-dataset.git", rev = "58c327e13bade1effe1312eb2b8d5445016a5a8f" }
-pg2-benchmark = { path = "./pg2-benchmark", editable = true }
+pg2-benchmark = { git = "https://github.com/ProteinGym2/pg2-benchmark.git", rev = "main" }
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/pg2_model_pls"]
diff --git a/src/pg2_benchmark/__main__.py b/src/pg2_benchmark/__main__.py
@@ -1,7 +1,25 @@
+import json
+import subprocess
+from pathlib import Path
+from typing import Annotated
+
 import typer
+
 from pg2_benchmark.cli.dataset import dataset_app
 from pg2_benchmark.cli.metric import metric_app
 from pg2_benchmark.cli.sagemaker import sagemaker_app
+from pg2_benchmark.model_card import ModelCard
+
+
+class ModelPath:
+    ROOT_PATH = Path("models")
+    SRC_PATH = Path("src")
+    PACKAGE_PREFIX = "pg2_model"
+    MODEL_CARD_PATH = Path("README.md")
+    MAIN_PY_PATH = Path("__main__.py")
+    COMMAND_NAME = "train"
+    COMMAND_PARAMS = ["dataset_file", "model_card_file"]
+
 
 app = typer.Typer(
     name="benchmark",
@@ -15,8 +33,129 @@
 
 
 @app.command()
-def ping():
-    typer.echo("pong")
+def validate(
+    model_name: Annotated[
+        str, typer.Argument(help="The model name listed in the `models` folder")
+    ],
+):
+    model_card_path = ModelPath.ROOT_PATH / model_name / ModelPath.MODEL_CARD_PATH
+
+    if not model_card_path.exists():
+        typer.echo(
+            f"❌ Model {model_name} does not have a model card at {model_card_path}"
+        )
+        raise typer.Exit(1)
+
+    try:
+        model_card = ModelCard.from_path(model_card_path)
+        typer.echo(
+            f"✅ Loaded {model_card.name} with hyper parameters {model_card.hyper_params}."
+        )
+
+    except Exception as e:
+        typer.echo(f"❌ Error loading model card from {model_card_path}: {e}")
+        raise typer.Exit(1)
+
+    main_py_path = (
+        ModelPath.ROOT_PATH
+        / model_name
+        / ModelPath.SRC_PATH
+        / f"{ModelPath.PACKAGE_PREFIX}_{model_name}"
+        / ModelPath.MAIN_PY_PATH
+    )
+
+    if not main_py_path.exists():
+        typer.echo(
+            f"❌ Model {model_name} does not have a {ModelPath.MAIN_PY_PATH} file at {main_py_path}"
+        )
+        raise typer.Exit(1)
+
+    try:
+        result = subprocess.run(
+            [
+                "uv",
+                "run",
+                "--active",
+                "python",
+                "-c",
+                f"""
+import importlib.util
+import inspect
+import json
+import sys
+
+try:
+    spec = importlib.util.spec_from_file_location('{ModelPath.PACKAGE_PREFIX}_{model_name}.__main__', '{ModelPath.PACKAGE_PREFIX}_{model_name}/__main__.py')
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+
+    app = getattr(module, "app")
+        
+    entrypoint_command_found = False
+    entrypoint_params_found = False
+
+    for command in app.registered_commands:
+        if '{ModelPath.COMMAND_NAME}' == command.callback.__name__:
+            entrypoint_command_found = True
+
+            sig = inspect.signature(command.callback)
+            
+            if {ModelPath.COMMAND_PARAMS} == list(sig.parameters.keys()):
+                entrypoint_params_found = True
+            
+            break
+
+    validation_result = {{
+        'success': True,
+        'entrypoint_command_found': entrypoint_command_found,
+        'entrypoint_params_found': entrypoint_params_found,
+        'module_loaded': True
+    }}
+
+    print(json.dumps(validation_result))
+    
+except Exception as e:
+    error_result = {{
+        'success': False,
+        'entrypoint_command_found': False,
+        'entrypoint_params_found': False,
+        'module_loaded': False,
+        'error': str(e)
+    }}
+    print(json.dumps(error_result))
+    sys.exit(1)
+            """,
+            ],
+            cwd=ModelPath.ROOT_PATH / model_name / ModelPath.SRC_PATH,
+            capture_output=True,
+            text=True,
+        )
+
+        if result.returncode == 0:
+            validation_data = json.loads(result.stdout.strip())
+
+            if not validation_data["entrypoint_command_found"]:
+                typer.echo(
+                    f"❌ Model {model_name} does not have a '{ModelPath.COMMAND_NAME}' command"
+                )
+                raise typer.Exit(1)
+
+            if not validation_data["entrypoint_params_found"]:
+                typer.echo(
+                    f"❌ Model {model_name}'s '{ModelPath.COMMAND_NAME}' command does not have the required params: {ModelPath.COMMAND_PARAMS}"
+                )
+                raise typer.Exit(1)
+
+            typer.echo(
+                f"✅ Model {model_name} has a valid '{ModelPath.COMMAND_NAME}' entrypoint with required params: {ModelPath.COMMAND_PARAMS}"
+            )
+        else:
+            typer.echo(f"❌ Error loading module {main_py_path}: {result.stderr}")
+            raise typer.Exit(1)
+
+    except Exception as e:
+        typer.echo(f"❌ Error running validation subprocess: {e}")
+        raise typer.Exit(1)
 
 
 if __name__ == "__main__":