Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/data_processing/dataset_download/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


@dsl.component(
base_image="quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3",
base_image="quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4",
packages_to_install=["datasets>=2.14.0", "huggingface-hub>=0.20.0", "s3fs>=2023.1.0"],
)
def dataset_download(
Expand Down
4 changes: 0 additions & 4 deletions components/deployment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,3 @@ This directory contains components in the **Deployment** category:
- [Deploy Embedding Model](./deploy_embedding_model/README.md): Deploy a text embedding model using KServe InferenceService.
- [Kubeflow Model Registry](./kubeflow_model_registry/README.md): Register model to Kubeflow Model Registry with full provenance tracking.
- [Model Deployment](./model_deployment/README.md): Deploy a model on OpenShift AI using vLLM InferenceService.

## Subcategories

- [Autorag](./autorag/README.md)
2 changes: 1 addition & 1 deletion components/deployment/kubeflow_model_registry/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


@dsl.component(
base_image="quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3",
base_image="quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4",
packages_to_install=["model-registry==0.3.4"],
)
def kubeflow_model_registry(
Expand Down
7 changes: 2 additions & 5 deletions components/evaluation/lm_eval/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,10 @@ def universal_llm_evaluator(
import torch

# Delayed imports for lm-eval
from lm_eval import tasks
from lm_eval.api.instance import Instance
from lm_eval.api.metrics import mean
from lm_eval.api.registry import get_model
from lm_eval.api.task import TaskConfig
from lm_eval.api.task import Task, TaskConfig
from lm_eval.evaluator import evaluate
from lm_eval.tasks import get_task_dict
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Expand Down Expand Up @@ -167,7 +166,7 @@ def validate_chat_format(data: list, logger) -> tuple:
# =========================================================================
# Custom Chat Holdout Task
# =========================================================================
class ChatHoldoutTask(tasks.Task):
class ChatHoldoutTask(Task):
"""A custom lm-eval task for evaluating on chat-format holdout data."""

VERSION = 0
Expand All @@ -181,14 +180,12 @@ def __init__(
prompts_log: list = None,
):
self.dataset_path = dataset_path
self.task_name = task_name
self.max_gen_toks = max_gen_toks
self.log_prompts = log_prompts
self.prompts_log = [] if prompts_log is None else prompts_log

config = TaskConfig(task=task_name, dataset_path=dataset_path)
super().__init__(config=config)
self.config.task = task_name
self.fewshot_rnd = random.Random()

def download(self, data_dir=None, cache_dir=None, download_mode=None, **kwargs) -> None:
Expand Down
6 changes: 3 additions & 3 deletions components/training/finetuning/lora/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


@dsl.component(
base_image="quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3",
base_image="quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4",
packages_to_install=[
"kubernetes",
"olot",
Expand Down Expand Up @@ -347,13 +347,13 @@ def _params() -> Dict:

params = _params()

def _train_func(p):
def _train_func(**p):
import os

from training_hub import lora_sft as tr

print("[PY] Launching LoRA training...", flush=True)
result = tr(**(p or {}))
result = tr(**p)

# Merge LoRA adapter weights into base model for eval/deployment compatibility.
# LoRA training saves adapter-only files (adapter_config.json, adapter_model.safetensors)
Expand Down
6 changes: 3 additions & 3 deletions components/training/finetuning/osft/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


@dsl.component(
base_image="quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3",
base_image="quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4",
packages_to_install=[
"kubernetes",
"olot",
Expand Down Expand Up @@ -236,8 +236,8 @@ def _params() -> Dict:

params = _params()

def _train_func(p):
a = dict(p or {})
def _train_func(**p):
a = dict(p)
fsdp = a.pop("fsdp_sharding_strategy", None)
from training_hub import osft as tr

Expand Down
6 changes: 3 additions & 3 deletions components/training/finetuning/sft/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


@dsl.component(
base_image="quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3",
base_image="quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4",
packages_to_install=[
"kubernetes",
"olot",
Expand Down Expand Up @@ -206,8 +206,8 @@ def _params() -> Dict:

params = _params()

def _train_func(p):
a = dict(p or {})
def _train_func(**p):
a = dict(p)
fsdp = a.pop("fsdp_sharding_strategy", None)
from training_hub import sft as tr

Expand Down
2 changes: 1 addition & 1 deletion scripts/validate_base_images/base_image_allowlist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
# Schema:
# - allowed_images: exact image strings
# - allowed_image_patterns: list of regex patterns matched against the full image
allowed_images:

Check warning on line 12 in scripts/validate_base_images/base_image_allowlist.yaml

View workflow job for this annotation

GitHub Actions / yaml-lint

12:1 [document-start] missing document start "---"
- 'quay.io/opendatahub/odh-training-th04-cpu-torch29-py312-rhel9:cpu-3.3'
- 'quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4'

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Tag-based references in touched files:"
rg -n 'odh-th06-cpu-torch291-py312:odh-3\.4' \
  scripts/validate_base_images/base_image_allowlist.yaml \
  components/data_processing/dataset_download/component.py \
  components/deployment/kubeflow_model_registry/component.py \
  components/training/finetuning/lora/component.py \
  components/training/finetuning/osft/component.py \
  components/training/finetuning/sft/component.py

echo
echo "Digest-pinned references in touched files (expected after fix):"
rg -n '`@sha256`:' \
  scripts/validate_base_images/base_image_allowlist.yaml \
  components/data_processing/dataset_download/component.py \
  components/deployment/kubeflow_model_registry/component.py \
  components/training/finetuning/lora/component.py \
  components/training/finetuning/osft/component.py \
  components/training/finetuning/sft/component.py || true

Repository: opendatahub-io/pipelines-components

Length of output: 950


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Locate allowlist loader + validator:"
rg -n "load_base_image_allowlist|is_valid_base_image|validate_base_images\.py" -S \
  scripts/validate_base_images || true

echo
echo "Show implementation details (if present):"
rg -n "def load_base_image_allowlist|def is_valid_base_image" -S \
  scripts/validate_base_images/validate_base_images.py || true

echo
echo "Print the allowlist-loading snippet area (around matched lines):"
rg -n "load_base_image_allowlist" -n scripts/validate_base_images/validate_base_images.py -C 3 || true

Repository: opendatahub-io/pipelines-components

Length of output: 8766


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Find lib/base_image implementation files:"
fd -a "base_image*" scripts/validate_base_images -t f

echo
echo "Locate load_base_image_allowlist + is_valid_base_image definitions:"
rg -n "def load_base_image_allowlist|def is_valid_base_image" -S \
  scripts/validate_base_images/lib || true

echo
echo "Show the relevant implementation blocks (around definitions):"
for f in $(rg -l "def load_base_image_allowlist|def is_valid_base_image" -S scripts/validate_base_images/lib); do
  echo "---- $f ----"
  rg -n "def load_base_image_allowlist|def is_valid_base_image" -n "$f" -C 3
done

echo
echo "Check tests for digest usage/behavior:"
rg -n "`@sha256`:|digest|digests" scripts/validate_base_images/tests/test_validate_base_images.py -n || true

Repository: opendatahub-io/pipelines-components

Length of output: 686


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Search for BaseImageAllowlist / load_base_image_allowlist definitions across repo:"
rg -n "BaseImageAllowlist|load_base_image_allowlist|is_valid_base_image" -S . || true

echo
echo "Show where base_image.py lives (if any):"
fd -i "base_image.py" . || true

echo
echo "Search for '`@sha256`:' handling in code:"
rg -n "`@sha256`:|digest(s)?|sha256" -S . || true

echo
echo "Inspect allowlist matcher logic by opening candidate file(s):"
for f in $(rg -l "load_base_image_allowlist|is_valid_base_image" -S .); do
  echo "---- $f ----"
  sed -n '1,220p' "$f" | sed -n '1,220p' || true
done

Repository: opendatahub-io/pipelines-components

Length of output: 50392


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== scripts/validate_base_images/base_image_allowlist.yaml (top) ==="
sed -n '1,120p' scripts/validate_base_images/base_image_allowlist.yaml

echo
echo "=== scripts/lib/base_image.py: show load_base_image_allowlist + matcher ==="
sed -n '1,260p' scripts/lib/base_image.py

echo
echo "=== scripts/lib/base_image.py: show is_valid_base_image ==="
sed -n '260,420p' scripts/lib/base_image.py

echo
echo "=== scripts/validate_base_images/tests/test_validate_base_images.py: digest-related assertions ==="
rg -n "sha256:|digest" scripts/validate_base_images/tests/test_validate_base_images.py || true

echo
echo "=== scripts/validate_base_images/tests/test_validate_base_images.py: allowlist parsing behavior ==="
rg -n "load_base_image_allowlist|BaseImageAllowlist|allowlist" scripts/validate_base_images/tests/test_validate_base_images.py -n | head -n 80

Repository: opendatahub-io/pipelines-components

Length of output: 21121


Pin allowlisted base images to immutable digests (CWE-494, major).

The validator allows base images via exact string match / regex match only (image in allowed_images or allowed_image_patterns), with no digest/content integrity enforcement—so mutable tags like quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4 will keep passing even if upstream retags.

Exploit scenario: the registry retags :odh-3.4 (or attacker swaps bytes for that tag) and component runtime images change while validate_base_images still accepts the original tag string.

Remediation code
 allowed_images:
-  - 'quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4'
+  - 'quay.io/opendatahub/odh-th06-cpu-torch291-py312@sha256:<approved_digest>'

Update the base_image="..." literals currently using the tag:

  • components/data_processing/dataset_download/component.py
  • components/deployment/kubeflow_model_registry/component.py
  • components/training/finetuning/lora/component.py
  • components/training/finetuning/osft/component.py
  • components/training/finetuning/sft/component.py

to quay.io/opendatahub/odh-th06-cpu-torch291-py312@sha256:<approved_digest>.

Also remove/replace the tag-based allowed_image_patterns entry that matches ^quay\.io/opendatahub/odh-[\w-]+:odh-.+$ (otherwise unrelated retags can still pass via the regex).

</details>

<details>
<summary>🤖 Prompt for AI Agents</summary>

Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In @scripts/validate_base_images/base_image_allowlist.yaml at line 13, Update
the allowlist and source files to require immutable digests: replace the
tag-based entry 'quay.io/opendatahub/odh-th06-cpu-torch291-py312:odh-3.4' in
base_image_allowlist.yaml with the approved digest form
quay.io/opendatahub/odh-th06-cpu-torch291-py312@sha256:<approved_digest>, change
the hardcoded base_image literals in
components/data_processing/dataset_download/component.py,
components/deployment/kubeflow_model_registry/component.py,
components/training/finetuning/{lora,osft,sft}/component.py to use the same
@sha256 digest form, and remove or tighten the tag-matching
allowed_image_patterns entry (the regex
^quay.io/opendatahub/odh-[\w-]+:odh-.+$) so the validator
(validate_base_images) no longer accepts mutable tag-based images.


</details>

<!-- fingerprinting:phantom:poseidon:hawk -->

<!-- This is an auto-generated comment by CodeRabbit -->

- 'registry.access.redhat.com/ubi9/python-311:latest'

@Fiona-Waters Fiona-Waters May 29, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think 'registry.access.redhat.com/ubi9/python-311:latest' should be removed?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I will put it back. Should had ask about it before removing.

allowed_image_patterns:
- '^ghcr\.io/kubeflow/.*$'
Expand Down
Loading