Skip to content

Commit 8e436e5

Browse files
roc and p-r curves generation for classification task
Signed-off-by: Lukasz Cmielowski <lcmielow@redhat.com> Assisted-by: Cursor
1 parent d3460be commit 8e436e5

12 files changed

Lines changed: 1219 additions & 399 deletions

File tree

components/training/automl/autogluon_models_training/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ output artifact so the pipeline does not require a ParallelFor loop. Each model
3030
| `sampling_config` | `Optional[dict]` | `None` | Data sampling config stored in artifact metadata. |
3131
| `split_config` | `Optional[dict]` | `None` | Data split config stored in artifact metadata. |
3232
| `extra_train_data_path` | `str` | `""` | Optional path to extra training CSV passed to ``refit_full``. |
33+
| `positive_class` | `Optional[str]` | `None` | **Binary only.** Positive class label (``int``/``str``). Passed to ``TabularPredictor`` when set; if omitted, AutoGluon infers it as the **second sorted unique class**. Ignored for multiclass/regression. |
3334

3435
## Outputs 📤
3536

@@ -47,7 +48,7 @@ output artifact so the pipeline does not require a ParallelFor loop. Each model
4748
- **Tags**:
4849
- training
4950
- automl
50-
- **Last Verified**: 2026-04-21 12:00:00+00:00
51+
- **Last Verified**: 2026-05-20 12:00:00+00:00
5152
- **Owners**:
5253
- Approvers:
5354
- LukaszCmielowski
@@ -129,7 +130,8 @@ models_artifact/
129130
├── metrics/
130131
│ ├── metrics.json # Evaluation results on test data (metric names → values)
131132
│ ├── feature_importance.json
132-
│ └── confusion_matrix.json # Classification tasks only
133+
│ ├── confusion_matrix.json # Classification tasks only
134+
│ └── curves.json # Classification tasks only (ROC + precision-recall)
133135
└── notebooks/
134136
└── automl_predictor_notebook.ipynb # Pre-filled inference notebook
135137
```

components/training/automl/autogluon_models_training/component.py

Lines changed: 328 additions & 33 deletions
Large diffs are not rendered by default.

components/training/automl/autogluon_models_training/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ dependencies:
88
tags:
99
- training
1010
- automl
11-
lastVerified: 2026-04-21T12:00:00Z
11+
lastVerified: 2026-05-20T12:00:00Z

components/training/automl/autogluon_models_training/notebook_templates/classification_notebook.ipynb

Lines changed: 418 additions & 342 deletions
Large diffs are not rendered by default.

components/training/automl/autogluon_models_training/tests/test_component_unit.py

Lines changed: 277 additions & 15 deletions
Large diffs are not rendered by default.

pipelines/training/automl/autogluon_tabular_training_pipeline/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ The pipeline leverages AutoGluon's unique ensembling strategy that combines mult
5454
| `label_column` | `str` | `None` | Name of the target/label column in the dataset. |
5555
| `task_type` | `str` | `None` | "binary", "multiclass", or "regression"; drives metrics and model types. |
5656
| `top_n` | `int` | `3` | Number of top models to select and refit (default: 3); positive integer from range [1, 10]. |
57+
| `positive_class` | `Optional[str]` | `None` | **Binary only.** Label value treated as the positive class (e.g. ``"1"``, ``"yes"``). If omitted, AutoGluon infers it at fit time as the **second unique class after sorting** (e.g. ``[0, 1]````1``; ``['abc', 'def']````'def'``). Ignored for multiclass and regression. |
5758

5859
## Metadata 🗂️
5960

@@ -69,7 +70,7 @@ The pipeline leverages AutoGluon's unique ensembling strategy that combines mult
6970
- pipeline
7071
- automl
7172
- autogluon-tabular-training-pipeline
72-
- **Last Verified**: 2026-05-07 12:00:00+00:00
73+
- **Last Verified**: 2026-05-20 12:00:00+00:00
7374
- **Owners**:
7475
- Approvers:
7576
- LukaszCmielowski
@@ -98,7 +99,8 @@ Pipeline outputs are written to the artifact store (S3-compatible storage config
9899
│ │ ├── metrics/
99100
│ │ │ ├── metrics.json # model evaluation metrics (eval_metric, etc.)
100101
│ │ │ ├── feature_importance.json
101-
│ │ │ └── confusion_matrix.json # for classification tasks only
102+
│ │ │ ├── confusion_matrix.json # for classification tasks only
103+
│ │ │ └── curves.json # for classification tasks only (ROC + PR)
102104
│ │ └── notebooks/
103105
│ │ └── automl_predictor_notebook.ipynb # Jupyter notebook for inference & exploration
104106
│ └── <ModelName>_FULL/

pipelines/training/automl/autogluon_tabular_training_pipeline/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ tags:
1313
- pipeline
1414
- automl
1515
- autogluon-tabular-training-pipeline
16-
lastVerified: 2026-05-07T12:00:00Z
16+
lastVerified: 2026-05-20T12:00:00Z

pipelines/training/automl/autogluon_tabular_training_pipeline/pipeline.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Optional
2+
13
from kfp import dsl
24
from kfp_components.components.data_processing.automl.tabular_data_loader import automl_data_loader
35
from kfp_components.components.training.automl.autogluon_leaderboard_evaluation import leaderboard_evaluation
@@ -37,6 +39,7 @@ def autogluon_tabular_training_pipeline(
3739
label_column: str,
3840
task_type: str,
3941
top_n: int = 3,
42+
positive_class: Optional[str] = None,
4043
):
4144
"""AutoGluon Tabular Training Pipeline.
4245
@@ -111,6 +114,10 @@ def autogluon_tabular_training_pipeline(
111114
label_column: Name of the target/label column in the dataset.
112115
task_type: "binary", "multiclass", or "regression"; drives metrics and model types.
113116
top_n: Number of top models to select and refit (default: 3); positive integer from range [1, 10].
117+
positive_class: Optional label value for the positive class in binary classification (e.g.
118+
``"1"`` or ``"yes"``). If omitted (``None``), AutoGluon infers it at ``TabularPredictor.fit``
119+
time as the **second unique class after sorting** label values (see AutoGluon
120+
``TabularPredictor`` docs). Ignored for multiclass and regression.
114121
115122
Returns:
116123
HTML artifact with leaderboard of refitted models ranked by task_type metric (e.g. accuracy, r2).
@@ -165,6 +172,7 @@ def autogluon_tabular_training_pipeline(
165172
label_column=label_column,
166173
task_type=task_type,
167174
top_n=top_n,
175+
positive_class=positive_class,
168176
train_data_path=data_loader_task.outputs["models_selection_train_data_path"],
169177
test_data=data_loader_task.outputs["sampled_test_dataset"],
170178
workspace_path=dsl.WORKSPACE_PATH_PLACEHOLDER,

pipelines/training/automl/autogluon_tabular_training_pipeline/tests/test_pipeline_integration.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,11 @@ def _run_succeeded(detail):
6969

7070

7171
def _find_artifacts_in_s3(s3_client, bucket, prefix):
72-
"""List object keys under prefix; return lists of keys ending in .pkl, .ipynb, and keys containing 'leaderboard' or 'html_artifact'.""" # noqa: E501
72+
"""List object keys under prefix; return lists of keys by type (.pkl, .ipynb, .json, leaderboard)."""
7373
pkl_keys = []
7474
ipynb_keys = []
7575
leaderboard_keys = []
76+
json_keys = []
7677
try:
7778
paginator = s3_client.get_paginator("list_objects_v2")
7879
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
@@ -82,11 +83,13 @@ def _find_artifacts_in_s3(s3_client, bucket, prefix):
8283
pkl_keys.append(key)
8384
elif key.endswith(".ipynb"):
8485
ipynb_keys.append(key)
86+
elif key.endswith(".json"):
87+
json_keys.append(key)
8588
elif "leaderboard" in key.lower() or "html_artifact" in key.lower():
8689
leaderboard_keys.append(key)
8790
except Exception:
8891
pass
89-
return pkl_keys, ipynb_keys, leaderboard_keys
92+
return pkl_keys, ipynb_keys, leaderboard_keys, json_keys
9093

9194

9295
@pytest.mark.integration
@@ -130,9 +133,34 @@ def test_autogluon_pipeline_with_config(
130133
if s3_client and config.get("s3_bucket_artifacts"):
131134
bucket = config["s3_bucket_artifacts"]
132135
prefix = f"{PIPELINE_DISPLAY_NAME}/{run_id}"
133-
pkl_keys, ipynb_keys, leaderboard_keys = _find_artifacts_in_s3(s3_client, bucket, prefix)
136+
pkl_keys, ipynb_keys, leaderboard_keys, json_keys = _find_artifacts_in_s3(s3_client, bucket, prefix)
134137
assert len(pkl_keys) >= 1, f"Expected at least one .pkl model artifact under {prefix}; found {pkl_keys}"
135138
assert len(ipynb_keys) >= 1, f"Expected at least one .ipynb notebook under {prefix}; found {ipynb_keys}"
136139
assert len(leaderboard_keys) >= 1, (
137140
f"Expected leaderboard/html artifact under {prefix}; found {leaderboard_keys}"
138141
)
142+
143+
# Verify core metric files (all task types)
144+
metrics_json = [k for k in json_keys if k.endswith("metrics/metrics.json")]
145+
feature_imp_json = [k for k in json_keys if k.endswith("metrics/feature_importance.json")]
146+
147+
assert len(metrics_json) >= 1, (
148+
f"Expected at least one metrics.json under {prefix}; found {metrics_json}"
149+
)
150+
assert len(feature_imp_json) >= 1, (
151+
f"Expected at least one feature_importance.json under {prefix}; found {feature_imp_json}"
152+
)
153+
154+
# Verify classification-specific metric files
155+
if test_config.task_type in {"binary", "multiclass"}:
156+
cm_json = [k for k in json_keys if k.endswith("metrics/confusion_matrix.json")]
157+
curves_json = [k for k in json_keys if k.endswith("metrics/curves.json")]
158+
159+
assert len(cm_json) >= 1, (
160+
f"Expected at least one confusion_matrix.json for {test_config.task_type} task "
161+
f"under {prefix}; found {cm_json}"
162+
)
163+
assert len(curves_json) >= 1, (
164+
f"Expected at least one curves.json for {test_config.task_type} task "
165+
f"under {prefix}; found {curves_json}"
166+
)

pipelines/training/automl/autogluon_tabular_training_pipeline/tests/test_pipeline_unit.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def test_pipeline_signature(self):
5252
"label_column",
5353
"task_type",
5454
"top_n",
55+
"positive_class",
5556
}
5657
inputs = autogluon_tabular_training_pipeline.component_spec.inputs
5758
params = set(inputs.keys())
@@ -76,6 +77,7 @@ def test_compiled_pipeline_has_expected_inputs(self):
7677
"label_column",
7778
"task_type",
7879
"top_n",
80+
"positive_class",
7981
):
8082
assert name in content, f"Expected pipeline input '{name}' in compiled YAML"
8183
except Exception as e:
@@ -112,6 +114,7 @@ def test_compiled_pipeline_wires_loader_outputs_to_training_task(self):
112114
assert "outputParameterKey: split_config" in content
113115
assert "outputParameterKey: sample_config" in content
114116
assert "outputParameterKey: extra_train_data_path" in content
117+
assert "positive_class" in content
115118

116119
def test_compiled_pipeline_data_loader_declares_task_type_and_label(self):
117120
"""Tabular data loader component exposes task_type and label_column inputs."""

0 commit comments

Comments
 (0)