Skip to content

Commit 1f97d20

Browse files
Merge upstream/main into autox_clean_up_stages
Resolved conflict in timeseries_data_loader/component.py: - Kept enhanced sample_rows logic with ISO timestamp conversion (from PR opendatahub-io#132) - Kept display_name metadata at start of context (from this PR) - Kept write_outputs status tracking (from PR opendatahub-io#132) Merged changes: - PR opendatahub-io#132: AutoML timeseries notebook backtesting charts - PR opendatahub-io#138: ai4rag 0.6.4 and ogx-client 1.1.0 updates Signed-off-by: Lukasz Cmielowski <lcmielow@redhat.com> Assisted-by: Cursor
2 parents 01c4d67 + 2040846 commit 1f97d20

29 files changed

Lines changed: 1470 additions & 505 deletions

File tree

components/data_processing/automl/timeseries_data_loader/component.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def timeseries_data_loader(
6161
NamedTuple: sample_config, split_config, sample_rows, models_selection_train_data_path, extra_train_data_path.
6262
"""
6363
import io
64+
import json
6465
import logging
6566
import os
6667
from pathlib import Path
@@ -421,8 +422,19 @@ def _concat_sorted(parts: list, sort_by: list) -> pd.DataFrame:
421422
"selection_train_size": selection_train_size,
422423
}
423424

424-
# Sample row for downstream use (JSON string to avoid NaN issues)
425-
sample_rows = test_df.tail(min(5, len(test_df))).to_json(orient="records")
425+
status.record("write_outputs", "started")
426+
status.record("write_outputs", "completed")
427+
428+
# Sample rows for downstream use (ISO timestamps when supported; JSON string to avoid NaN issues)
429+
sample_tail = test_df.tail(min(5, len(test_df)))
430+
if hasattr(sample_tail, "to_dict"):
431+
from kfp_components.components.training.automl.shared.timeseries_notebook_utils import (
432+
_json_records,
433+
)
434+
435+
sample_rows = json.dumps(_json_records(sample_tail))
436+
else:
437+
sample_rows = sample_tail.to_json(orient="records")
426438

427439
return NamedTuple(
428440
"outputs",

components/data_processing/automl/timeseries_data_loader/tests/test_component_unit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ def test_sample_rows_json_matches_test_tail(self, tmp_path):
279279
assert isinstance(parsed, list)
280280
assert len(parsed) == 5
281281
assert parsed[-1]["target"] == "99"
282+
assert isinstance(parsed[0]["timestamp"], str)
282283

283284
@mock.patch.dict(os.environ, mocked_env_variables, clear=True)
284285
def test_sampling_truncates_below_minimum_raises(self, tmp_path):

components/data_processing/autorag/documents_discovery/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def example_pipeline(
6363
- Name: Pipelines, Version: >=2.15.2
6464
- External Services:
6565
- Name: RHOAI Connections API, Version: >=1.0.0
66-
- Name: ai4rag, Version: ~=0.6.3
66+
- Name: ai4rag, Version: ~=0.6.4
6767
- **Tags**:
6868
- data-processing
6969
- autorag

components/data_processing/autorag/documents_discovery/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies:
99
- name: RHOAI Connections API
1010
version: ">=1.0.0"
1111
- name: ai4rag
12-
version: "~=0.6.3"
12+
version: "~=0.6.4"
1313
tags:
1414
- data-processing
1515
- autorag

components/data_processing/autorag/documents_indexing/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def example_pipeline(
7979
- Name: Pipelines, Version: >=2.15.2
8080
- External Services:
8181
- Name: RHOAI Connections API, Version: >=1.0.0
82-
- Name: ai4rag, Version: ~=0.6.3
82+
- Name: ai4rag, Version: ~=0.6.4
8383
- **Tags**:
8484
- data-indexing
8585
- autorag

components/data_processing/autorag/documents_indexing/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies:
99
- name: RHOAI Connections API
1010
version: ">=1.0.0"
1111
- name: ai4rag
12-
version: "~=0.6.3"
12+
version: "~=0.6.4"
1313
tags:
1414
- data-indexing
1515
- autorag

components/training/automl/autogluon_timeseries_models_training/README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,4 @@ Under each ``{model_name}_FULL/metrics/`` directory:
140140
- **`back_testing.json`**: Multi-window backtest with ``per_window_metrics`` and ``series_analysis``
141141
(best/worst forecast timelines). Window error metrics use **natural positive** signs via
142142
``filter_finite_metrics``. Best-effort after refit; omitted if backtest APIs or history are
143-
insufficient.
144-
145-
The timeseries notebook template loads ``back_testing.json`` when present for model insights.
143+
insufficient. Visualized in the generated inference notebook when present.

components/training/automl/autogluon_timeseries_models_training/component.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,12 @@ def autogluon_timeseries_models_training(
8383
logger = logging.getLogger(__name__)
8484

8585
from kfp_components.components.training.automl.shared.back_testing import build_back_testing_json
86+
from kfp_components.components.training.automl.shared.back_testing_charts import (
87+
notebook_backtest_charts_source,
88+
)
8689
from kfp_components.components.training.automl.shared.timeseries_notebook_utils import (
8790
build_predict_sample_artifact,
91+
notebook_timeseries_sample_helpers_source,
8892
)
8993

9094
status = ComponentStatusTracker(component_status.path, "autogluon_timeseries_models_training")
@@ -399,6 +403,8 @@ def replace_placeholder_in_notebook(notebook, replacements):
399403
"<REPLACE_PIPELINE_NAME>": pipeline_name_trimmed,
400404
"<REPLACE_MODEL_NAME>": model_name_full,
401405
"<REPLACE_PREDICT_SAMPLE>": str(predict_sample),
406+
"<REPLACE_BACKTEST_PLOT_HELPERS>": notebook_backtest_charts_source(),
407+
"<REPLACE_TIMESERIES_SAMPLE_HELPERS>": notebook_timeseries_sample_helpers_source(),
402408
}
403409
notebook = replace_placeholder_in_notebook(notebook, replacements)
404410

components/training/automl/shared/back_testing.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -140,34 +140,30 @@ def _mean_prediction_column(predictions: pd.DataFrame) -> str:
140140

141141

142142
def _quantile_bounds(predictions: pd.DataFrame) -> tuple[str | None, str | None]:
143-
"""Extract lower/upper quantile columns for uncertainty visualization.
143+
"""Extract lower/upper quantile columns aligned with ``TimeSeriesPredictor.plot`` defaults.
144144
145-
Selects the first quantile <= 0.2 for lower bound and >= 0.8 for upper bound.
146-
These thresholds provide a ~60% coverage interval suitable for chart rendering
147-
without overwhelming the visualization with too many bands.
148-
149-
AutoGluon typically generates quantiles at [0.1, 0.2, ..., 0.9]. If present,
150-
this function will select 0.2 and 0.8. If AutoGluon uses different levels
151-
(e.g., 0.1/0.9), the function adapts by picking the closest match.
152-
153-
Args:
154-
predictions: Forecast DataFrame with quantile columns (numeric names like "0.1")
155-
156-
Returns:
157-
Tuple of (lower_quantile_column, upper_quantile_column) as strings, or None if not found
145+
Prefers quantile levels closest to 0.1 and 0.9 (AutoGluon's typical P10/P90 band).
158146
"""
159-
lower = None
160-
upper = None
147+
levels: list[tuple[float, str]] = []
161148
for col in predictions.columns:
162-
col_text = str(col)
163149
try:
164-
level = float(col)
150+
levels.append((float(col), str(col)))
165151
except (TypeError, ValueError):
166152
continue
167-
if level <= 0.2 and lower is None:
168-
lower = col_text
169-
if level >= 0.8 and upper is None:
170-
upper = col_text
153+
if not levels:
154+
return None, None
155+
156+
def _closest(candidates: list[tuple[float, str]], target: float) -> str | None:
157+
if not candidates:
158+
return None
159+
return min(candidates, key=lambda item: abs(item[0] - target))[1]
160+
161+
lower = _closest(levels, 0.1)
162+
if lower is None:
163+
return None, None
164+
165+
remaining = [(value, name) for value, name in levels if name != lower]
166+
upper = _closest(remaining, 0.9)
171167
return lower, upper
172168

173169

@@ -244,10 +240,12 @@ def _forecast_data_for_item(
244240
lower = to_finite_float(pred_item.loc[ts, lower_col])
245241
if lower is not None:
246242
row["lower_bound"] = lower
243+
row["lower_quantile"] = float(lower_col)
247244
if upper_col is not None:
248245
upper = to_finite_float(pred_item.loc[ts, upper_col])
249246
if upper is not None:
250247
row["upper_bound"] = upper
248+
row["upper_quantile"] = float(upper_col)
251249
rows.append(row)
252250
return rows
253251

@@ -502,6 +500,7 @@ def build_back_testing_json(
502500
per_window_metrics.append(
503501
{
504502
"window_id": window_id,
503+
"cutoff": cutoff,
505504
"test_start": test_start,
506505
"test_end": test_end,
507506
"metrics": window_scores,

0 commit comments

Comments
 (0)