Skip to content

Commit fa8eb42

Browse files
fix the feedback from Mateusz
Signed-off-by: Lukasz Cmielowski <lcmielow@redhat.com> Assisted-by: Cursor
1 parent fd5199f commit fa8eb42

18 files changed

Lines changed: 403 additions & 148414 deletions

File tree

components/training/automl/autogluon_leaderboard_evaluation/tests/test_component_unit.py

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
11
"""Tests for the leaderboard_evaluation component."""
22

33
import json
4-
import sys
54
from pathlib import Path
65
from unittest import mock
76

87
import pytest
98

10-
11-
@pytest.fixture(autouse=True, scope="module")
12-
def isolated_sys_modules():
13-
"""Patch pandas in sys.modules only for this test module; restored on module teardown."""
14-
with mock.patch.dict(sys.modules, clear=False) as mocked_modules:
15-
mocked_modules["pandas"] = mock.MagicMock()
16-
yield
17-
18-
19-
from ..component import leaderboard_evaluation # noqa: E402
9+
from ..component import leaderboard_evaluation
2010

2111

2212
def _make_models_artifact(
@@ -296,3 +286,71 @@ def test_component_imports_correctly(self):
296286
assert callable(leaderboard_evaluation)
297287
assert hasattr(leaderboard_evaluation, "python_func")
298288
assert hasattr(leaderboard_evaluation, "component_spec")
289+
290+
291+
def _write_model_metrics(base_path: Path, model_name: str, metrics: dict) -> None:
292+
metrics_dir = base_path / model_name / "metrics"
293+
metrics_dir.mkdir(parents=True, exist_ok=True)
294+
(metrics_dir / "metrics.json").write_text(json.dumps(metrics), encoding="utf-8")
295+
296+
297+
class TestLeaderboardMetricSorting:
298+
"""Verify AutoGluon negated-metric convention produces correct best-model ranking."""
299+
300+
def test_negated_rmse_ranks_higher_value_first(self, tmp_path):
301+
"""Flipped RMSE (-0.3 beats -0.8) selects the better model as best_model."""
302+
combined_root = tmp_path / "models"
303+
_write_model_metrics(combined_root, "ModelA", {"root_mean_squared_error": -0.8})
304+
_write_model_metrics(combined_root, "ModelB", {"root_mean_squared_error": -0.3})
305+
306+
models_artifact = mock.MagicMock()
307+
models_artifact.path = str(combined_root)
308+
models_artifact.uri = "http://example.com/artifacts"
309+
models_artifact.metadata = {"model_names": json.dumps(["ModelA", "ModelB"])}
310+
311+
html_artifact = mock.MagicMock()
312+
html_artifact.path = str(tmp_path / "leaderboard.html")
313+
html_artifact.metadata = {}
314+
315+
component_status = mock.MagicMock()
316+
component_status.path = str(tmp_path / "status")
317+
component_status.metadata = {}
318+
319+
result = leaderboard_evaluation.python_func(
320+
models_artifact=models_artifact,
321+
eval_metric="root_mean_squared_error",
322+
html_artifact=html_artifact,
323+
component_status=component_status,
324+
)
325+
326+
assert result.best_model == "ModelB"
327+
html = Path(html_artifact.path).read_text(encoding="utf-8")
328+
assert html.index("ModelB") < html.index("ModelA")
329+
330+
def test_mase_ranks_higher_value_first(self, tmp_path):
331+
"""Timeseries MASE values rank with higher-is-better AutoGluon convention."""
332+
combined_root = tmp_path / "models"
333+
_write_model_metrics(combined_root, "DeepAR", {"MASE": -0.55})
334+
_write_model_metrics(combined_root, "TFT", {"MASE": -0.21})
335+
336+
models_artifact = mock.MagicMock()
337+
models_artifact.path = str(combined_root)
338+
models_artifact.uri = "http://example.com/artifacts"
339+
models_artifact.metadata = {"model_names": json.dumps(["DeepAR", "TFT"])}
340+
341+
html_artifact = mock.MagicMock()
342+
html_artifact.path = str(tmp_path / "leaderboard_ts.html")
343+
html_artifact.metadata = {}
344+
345+
component_status = mock.MagicMock()
346+
component_status.path = str(tmp_path / "status_ts")
347+
component_status.metadata = {}
348+
349+
result = leaderboard_evaluation.python_func(
350+
models_artifact=models_artifact,
351+
eval_metric="MASE",
352+
html_artifact=html_artifact,
353+
component_status=component_status,
354+
)
355+
356+
assert result.best_model == "TFT"

components/training/automl/autogluon_leaderboard_evaluation/tests/test_leaderboard_metric_sorting.py

Lines changed: 0 additions & 75 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""Expected Kubernetes CPU/memory tiers for the documents indexing pipeline."""
2+
3+
from kfp_components.utils.pipeline_task_resources import ExecutorResources
4+
5+
WORKLOAD_RESOURCES = ExecutorResources("2", "8Gi", "32", "64Gi")
6+
7+
AUTORAG_INDEXING_EXECUTOR_RESOURCES = {
8+
"documents-discovery": WORKLOAD_RESOURCES,
9+
"text-extraction": WORKLOAD_RESOURCES,
10+
"documents-indexing": WORKLOAD_RESOURCES,
11+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Unit tests for documents indexing pipeline executor resource tiers."""
2+
3+
from kfp_components.utils.pipeline_task_resources import (
4+
assert_executor_resources,
5+
compile_executor_resources,
6+
)
7+
8+
from ..pipeline import documents_indexing_pipeline
9+
from .pipeline_resource_expectations import AUTORAG_INDEXING_EXECUTOR_RESOURCES
10+
11+
12+
class TestDocumentsIndexingPipelineResourceRequirements:
13+
"""Documents indexing pipeline sets workload-tier resources on all three steps."""
14+
15+
def test_documents_indexing_pipeline_executor_resources(self):
16+
"""Documents indexing pipeline sets workload-tier resources on all three steps."""
17+
assert_executor_resources(
18+
compile_executor_resources(documents_indexing_pipeline),
19+
AUTORAG_INDEXING_EXECUTOR_RESOURCES,
20+
pipeline_name="documents_indexing_pipeline",
21+
)

pipelines/data_processing/autorag/documents_indexing_pipeline/tests/test_pipeline_unit.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,12 @@ def test_compiled_pipeline_wires_s3_and_ogx_secrets(self):
116116
def test_compiled_pipeline_declares_component_resource_tiers(self):
117117
"""All indexing pipeline steps declare the workload CPU/memory tier."""
118118
from kfp_components.utils.pipeline_task_resources import (
119-
AUTORAG_INDEXING_EXECUTOR_RESOURCES,
120119
assert_executor_resources,
121120
compile_executor_resources,
122121
)
123122

123+
from .pipeline_resource_expectations import AUTORAG_INDEXING_EXECUTOR_RESOURCES
124+
124125
assert_executor_resources(
125126
compile_executor_resources(documents_indexing_pipeline),
126127
AUTORAG_INDEXING_EXECUTOR_RESOURCES,
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Expected Kubernetes CPU/memory tiers for the tabular training pipeline."""
2+
3+
from kfp_components.utils.pipeline_task_resources import ExecutorResources
4+
5+
STAGE_MAP_RESOURCES = ExecutorResources("0.5", "512Mi", "1", "1Gi")
6+
WORKLOAD_RESOURCES = ExecutorResources("2", "8Gi", "32", "64Gi")
7+
LEADERBOARD_RESOURCES = ExecutorResources("1", "4Gi", "32", "64Gi")
8+
TRAINING_SPEED_RESOURCES = ExecutorResources("4", "16Gi", "32", "64Gi")
9+
TRAINING_BALANCED_RESOURCES = ExecutorResources("8", "32Gi", "32", "64Gi")
10+
11+
AUTOML_TABULAR_EXECUTOR_RESOURCES = {
12+
"publish-component-stage-map": STAGE_MAP_RESOURCES,
13+
"automl-data-loader": WORKLOAD_RESOURCES,
14+
"autogluon-models-training": TRAINING_BALANCED_RESOURCES,
15+
"leaderboard-evaluation": LEADERBOARD_RESOURCES,
16+
"autogluon-models-training-2": TRAINING_SPEED_RESOURCES,
17+
"leaderboard-evaluation-2": LEADERBOARD_RESOURCES,
18+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Unit tests for pipeline executor resource helpers and tabular tier matrix."""
2+
3+
import pytest
4+
from kfp_components.utils.pipeline_task_resources import (
5+
ExecutorResources,
6+
assert_executor_resources,
7+
compile_executor_resources,
8+
normalize_executor_name,
9+
)
10+
11+
from ..pipeline import autogluon_tabular_training_pipeline
12+
from .pipeline_resource_expectations import (
13+
AUTOML_TABULAR_EXECUTOR_RESOURCES,
14+
TRAINING_BALANCED_RESOURCES,
15+
TRAINING_SPEED_RESOURCES,
16+
)
17+
18+
19+
class TestPipelineTaskResourcesHelpers:
20+
"""Tests for compile/assert helpers in pipeline_task_resources."""
21+
22+
def test_normalize_executor_name_strips_prefix(self):
23+
"""Executor keys drop the ``exec-`` prefix for stable task names."""
24+
assert normalize_executor_name("exec-automl-data-loader") == "automl-data-loader"
25+
26+
def test_assert_executor_resources_detects_cpu_change(self):
27+
"""Mismatched CPU requests fail with the task name in the error."""
28+
actual = {
29+
"exec-automl-data-loader": ExecutorResources("2", "8Gi", "32", "64Gi"),
30+
}
31+
expected = {
32+
"automl-data-loader": ExecutorResources("4", "8Gi", "32", "64Gi"),
33+
}
34+
with pytest.raises(AssertionError, match="automl-data-loader"):
35+
assert_executor_resources(actual, expected, pipeline_name="test-pipeline")
36+
37+
def test_assert_executor_resources_allow_extra_executors(self):
38+
"""Partial expected maps can ignore additional executors when allow_extra is set."""
39+
actual = {
40+
"exec-automl-data-loader": ExecutorResources("2", "8Gi", "32", "64Gi"),
41+
"exec-leaderboard-evaluation": ExecutorResources("1", "4Gi", "32", "64Gi"),
42+
}
43+
expected = {
44+
"automl-data-loader": ExecutorResources("2", "8Gi", "32", "64Gi"),
45+
}
46+
assert_executor_resources(actual, expected, pipeline_name="test-pipeline", allow_extra=True)
47+
48+
49+
class TestAutogluonTabularPipelineResourceRequirements:
50+
"""Tabular pipeline declares preset-dependent training tiers plus shared loader/leaderboard tiers."""
51+
52+
def test_tabular_pipeline_executor_resources(self):
53+
"""All tabular pipeline executors match the declared CPU/memory matrix."""
54+
assert_executor_resources(
55+
compile_executor_resources(autogluon_tabular_training_pipeline),
56+
AUTOML_TABULAR_EXECUTOR_RESOURCES,
57+
pipeline_name="autogluon_tabular_training_pipeline",
58+
)
59+
60+
def test_default_speed_preset_uses_lower_training_tier(self):
61+
"""Default speed preset branch requests less CPU/memory than balanced."""
62+
actual = compile_executor_resources(autogluon_tabular_training_pipeline)
63+
speed_keys = [name for name in actual if name.endswith("-2") and "models-training" in name]
64+
balanced_keys = [name for name in actual if "models-training" in name and not name.endswith("-2")]
65+
assert len(speed_keys) == 1
66+
assert len(balanced_keys) == 1
67+
speed = actual[speed_keys[0]]
68+
balanced = actual[balanced_keys[0]]
69+
assert speed == TRAINING_SPEED_RESOURCES
70+
assert balanced == TRAINING_BALANCED_RESOURCES
71+
assert float(speed.cpu_request) < float(balanced.cpu_request)

pipelines/training/automl/autogluon_tabular_training_pipeline/tests/test_pipeline_unit.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,12 @@ def test_compiled_pipeline_wires_preset_to_training_task(self):
175175
def test_compiled_pipeline_declares_speed_and_balanced_resource_tiers(self):
176176
"""Speed and balanced preset branches request different training CPU/memory."""
177177
from kfp_components.utils.pipeline_task_resources import (
178-
AUTOML_TABULAR_EXECUTOR_RESOURCES,
179178
assert_executor_resources,
180179
compile_executor_resources,
181180
)
182181

182+
from .pipeline_resource_expectations import AUTOML_TABULAR_EXECUTOR_RESOURCES
183+
183184
actual = compile_executor_resources(autogluon_tabular_training_pipeline)
184185
assert_executor_resources(
185186
actual,

0 commit comments

Comments
 (0)