Merge pull request #124 from ProteinGym/chore/move-metric-calc

tintinrevient · web-flow · commit a94c7509d9af · 2025-10-16T11:57:51.000+02:00
PR1: Move metric.py to scripts
diff --git a/benchmark/supervised/local/dvc.yaml b/benchmark/supervised/local/dvc.yaml
@@ -34,7 +34,7 @@ stages:
       dataset: ${datasets}
       model: ${models}
 
-    cmd: uv run proteingym-benchmark metric calc --output-path ${output.prediction}/${item.dataset.name}_${item.model.name}.csv --metric-path ${output.metric}/${item.dataset.name}_${item.model.name}.csv
+    cmd: python $(dvc root)/scripts/metric.py --output ${output.prediction}/${item.dataset.name}_${item.model.name}.csv --metric ${output.metric}/${item.dataset.name}_${item.model.name}.csv --actual-vector-col "test" --predict-vector-col "pred"
     deps:
       - ${output.prediction}/${item.dataset.name}_${item.model.name}.csv
     outs:
diff --git a/benchmark/zero_shot/local/dvc.yaml b/benchmark/zero_shot/local/dvc.yaml
@@ -34,7 +34,7 @@ stages:
       dataset: ${datasets}
       model: ${models}
 
-    cmd: uv run proteingym-benchmark metric calc --output-path ${output.prediction}/${item.dataset.name}_${item.model.name}.csv --metric-path ${output.metric}/${item.dataset.name}_${item.model.name}.csv
+    cmd: python $(dvc root)/scripts/metric.py --output ${output.prediction}/${item.dataset.name}_${item.model.name}.csv --metric ${output.metric}/${item.dataset.name}_${item.model.name}.csv --actual-vector-col "test" --predict-vector-col "pred"
     deps:
       - ${output.prediction}/${item.dataset.name}_${item.model.name}.csv
     outs:
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+polars[pyarrow]>=1.30.0,<2.0.0
+pycm==4.4
diff --git a/scripts/README.md b/scripts/README.md
@@ -0,0 +1,32 @@
+# Scripts
+
+This directory contains utility scripts for the ProteinGym benchmark.
+
+## Dependencies
+
+Make sure to install the required dependencies:
+
+```shell
+pip install -r requirements.txt
+```
+
+## metric.py
+
+The [metric.py](metric.py) script calculates performance metrics for machine learning models by comparing actual and predicted values.
+
+### Arguments
+
+- `--output`: Path to the CSV file containing prediction results
+- `--metric`: Path where the calculated metrics CSV will be saved
+- `--actual-vector-col`: Column name containing actual/ground truth values
+- `--predict-vector-col`: Column name containing predicted values
+
+### Example
+
+```shell
+python metric.py \
+    --output predictions.csv \
+    --metric metrics.csv \
+    --actual-vector-col "true_values" \
+    --predict-vector-col "predicted_values"
+```
diff --git a/scripts/metric.py b/scripts/metric.py
@@ -0,0 +1,109 @@
+"""
+Metric calculation script for ProteinGym benchmark evaluation.
+
+This script provides functionality to calculate performance metrics for machine learning models
+by comparing actual and predicted values. It computes classification metrics via confusion
+matrix from CSV output files.
+
+The main function `calc` reads prediction results from a CSV file, generates a confusion matrix
+with comprehensive classification statistics, and outputs all metrics to a CSV file for further analysis.
+
+Example output CSV:
+    | Metric       | Value      |
+    |--------------|------------|
+    | Overall ACC  | 0.85       |
+    | PPV Macro    | 'None'     |
+    | Kappa 95% CI | (0.0, 0.0) |
+
+Functions:
+    calc: Calculate and save performance metrics from prediction output files
+"""
+
+import argparse
+from pathlib import Path
+
+import polars as pl
+from pycm import ConfusionMatrix
+
+
+def calc(
+    output: Path, metric: Path, actual_vector_col: str, predict_vector_col: str
+) -> Path:
+    """Calculate performance metrics from prediction output and save to CSV.
+
+    Reads prediction results from a CSV file, computes classification metrics using
+    a confusion matrix. All metrics are saved to a CSV file.
+
+    Args:
+        output: Path to the CSV file containing prediction results
+        metric: Path where the calculated metrics CSV will be saved
+        actual_vector_col: Column name containing actual/ground truth values
+        predict_vector_col: Column name containing predicted values
+    """
+
+    print("Start to calculate metrics.")
+
+    output_dataframe = pl.read_csv(output)
+
+    cm = ConfusionMatrix(
+        actual_vector=output_dataframe[actual_vector_col].to_list(),
+        predict_vector=output_dataframe[predict_vector_col].to_list(),
+    )
+
+    metrics_data = [
+        {"metric_name": key, "metric_value": str(value)}
+        for key, value in cm.overall_stat.items()
+    ]
+
+    metric_dataframe = pl.DataFrame(
+        data=metrics_data,
+        schema={"metric_name": pl.String, "metric_value": pl.String},
+    )
+
+    metric_dataframe.write_csv(metric)
+
+    return metric
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Calculate metric for ProteinGym benchmark evaluation."
+    )
+
+    parser.add_argument(
+        "--output",
+        type=Path,
+        required=True,
+        help="Path to the CSV file containing prediction results",
+    )
+    parser.add_argument(
+        "--metric",
+        type=Path,
+        required=True,
+        help="Path where the calculated metrics CSV will be saved",
+    )
+    parser.add_argument(
+        "--actual-vector-col",
+        type=str,
+        required=True,
+        help="Column name containing actual/ground truth values",
+    )
+    parser.add_argument(
+        "--predict-vector-col",
+        type=str,
+        required=True,
+        help="Column name containing predicted values",
+    )
+
+    args = parser.parse_args()
+
+    return calc(
+        output=args.output,
+        metric=args.metric,
+        actual_vector_col=args.actual_vector_col,
+        predict_vector_col=args.predict_vector_col,
+    )
+
+
+if __name__ == "__main__":
+    print(f"Metrics have been saved to {main()}.")
diff --git a/src/proteingym/benchmark/__main__.py b/src/proteingym/benchmark/__main__.py
@@ -4,7 +4,6 @@
 import typer
 
 from .__about__ import __version__
-from .cli.metric import metric_app
 from .cli.sagemaker import sagemaker_app
 
 app = typer.Typer(
@@ -13,7 +12,6 @@
     add_completion=False,
 )
 
-app.add_typer(metric_app, name="metric", help="Metric operations")
 app.add_typer(sagemaker_app, name="sagemaker", help="SageMaker operations")
 
 
diff --git a/src/proteingym/benchmark/cli/metric.py b/src/proteingym/benchmark/cli/metric.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+polars[pyarrow]>=1.30.0,<2.0.0`
	`2`	`+pycm==4.4`