Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions clients/python/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion clients/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ aiohttp-retry = "^2.8.3"
nest-asyncio2 = "^1.7.1"

huggingface-hub = { version = ">=0.20.1,<1.8.0", optional = true }
olot = { version = "^0.1.6", optional = true }
olot = { version = "^0.1.17", optional = true }
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upgrade needed for root_dir kwarg support in oci_layers_on_top

boto3 = { version = "^1.37.34", optional = true }
rh-model-signing = { version = "0.1.0", optional = true }
platformdirs = { version = "^4.0.0", optional = true }
Expand Down
9 changes: 3 additions & 6 deletions clients/python/src/model_registry/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,9 @@ def save_to_oci_registry( # noqa: C901 ( complex args >8 )
if auth_file is not None:
params["authfile"] = auth_file.name
backend_def.pull(base_image, local_image_path, **params)
# Pass top-level entries (dir/*) instead of individual leaf files (dir/**/*).
# olot's tarball_from_file preserves subdirectory structure when given a
# directory, but flattens everything to basename when given individual files.
model_path = Path(model_files_path)
files = [model_path] if model_path.is_file() else sorted(model_path.iterdir())
oci_layers_on_top(local_image_path, files, modelcard)
# Extract the absolute path from the files found in the path
files = [file[0] for file in _get_files_from_path(model_files_path)] # type: ignore[arg-type]
oci_layers_on_top(local_image_path, files, modelcard, root_dir=model_files_path)
Comment on lines +396 to +398
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverts back to what we add originally here, before a mitigation: https://github.com/kubeflow/model-registry/pull/2498/changes#diff-cd394845b1d31bff2290364330b210311c758b6f0b10ce787adaf91f7d6a6010L396-R400
and adds root_dir kwarg:

- oci_layers_on_top(local_image_path, files, modelcard)
+ oci_layers_on_top(local_image_path, files, modelcard, root_dir=model_files_path)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

love how simple it become

backend_def.push(local_image_path, oci_ref, **params)

# Return the OCI URI
Expand Down
46 changes: 46 additions & 0 deletions clients/python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,52 @@ def get_temp_dir_with_nested_models():
os.rmdir(temp_dir)


@pytest.fixture
def get_temp_dir_with_deeply_nested_models():
"""Model directory with multiple levels of nesting to verify structure preservation.

Creates:
my-model/
├── README.md
├── onnx/
│ ├── model.onnx
│ └── weights/
│ └── quantized.bin
└── tokenizer/
├── config.json
└── vocab.txt
"""
temp_dir = tempfile.mkdtemp()
model_dir = os.path.join(temp_dir, "my-model")
os.makedirs(model_dir)

dirs = [
os.path.join(model_dir, "onnx"),
os.path.join(model_dir, "onnx", "weights"),
os.path.join(model_dir, "tokenizer"),
]
for d in dirs:
os.makedirs(d)

file_entries = [
("README.md", b"# My Model"),
("onnx/model.onnx", b"fake-onnx-data"),
("onnx/weights/quantized.bin", b"fake-weights-data"),
("tokenizer/config.json", b'{"key": "value"}'),
("tokenizer/vocab.txt", b"hello\nworld"),
]
file_paths = []
for rel_path, content in file_entries:
full_path = os.path.join(model_dir, rel_path)
with open(full_path, "wb") as f:
f.write(content)
file_paths.append(full_path)

yield model_dir, file_paths

shutil.rmtree(temp_dir)


@pytest.fixture
def get_large_model_dir():
"""Creates a directory containing a large model file (300-500MB) for testing file size extremes."""
Expand Down
92 changes: 83 additions & 9 deletions clients/python/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import tarfile
from contextlib import contextmanager
from pathlib import Path

Expand Down Expand Up @@ -83,16 +84,24 @@ def test_s3_uri_builder_with_complete_env():


@pytest.mark.e2e(type="oci")
def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_models, get_temp_dir):
def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_deeply_nested_models, get_temp_dir):
"""Verify OCI layers preserve directory structure with one layer per file.

Uses a real skopeo backend and local OCI registry. After save_to_oci_registry
runs, inspects the local OCI layout to confirm:
- Each model file is in its own layer (one layer per file)
- Subdirectory paths are preserved in tar arcnames (not flattened)
"""
base_image = "quay.io/mmortari/hello-world-wait:latest"
dest_dir, _ = get_temp_dir_with_models
model_dir, model_files = get_temp_dir_with_deeply_nested_models
oci_ref = "localhost:5001/foo/bar:latest"
oci_dest = get_temp_dir

save_to_oci_registry(
base_image=base_image,
oci_ref=oci_ref,
model_files_path=dest_dir,
dest_dir=get_temp_dir,
model_files_path=model_dir,
dest_dir=oci_dest,
custom_oci_backend=utils._get_skopeo_backend(
push_args=[
"--dest-tls-verify=false",
Expand All @@ -102,6 +111,58 @@ def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_models, get_temp_dir
),
)

# Inspect the OCI layout to verify layer structure.
# Walk from index.json to an image manifest with layers, handling both
# single-arch images (index -> manifest) and multi-arch images
# (index -> image index -> manifest).
oci_path = Path(oci_dest)
blobs = oci_path / "blobs" / "sha256"
index = json.loads((oci_path / "index.json").read_text())
digest = index["manifests"][0]["digest"].replace("sha256:", "")
manifest = json.loads((blobs / digest).read_text())
if "layers" not in manifest:
# Multi-arch: manifest is an image index, follow to first platform
digest = manifest["manifests"][0]["digest"].replace("sha256:", "")
manifest = json.loads((blobs / digest).read_text())

# Identify model layers added by olot (they have olot annotations).
# Base image layers don't have these annotations.
model_layers = [
layer for layer in manifest["layers"]
if "olot.layer.content.inlayerpath" in layer.get("annotations", {})
]

# Each model file should be in its own layer (one layer per file)
assert len(model_layers) == len(model_files), (
f"Expected {len(model_files)} model layers (one per file), got {len(model_layers)}"
)

# Verify directory structure is preserved by checking the in-layer paths
in_layer_paths = sorted(
layer["annotations"]["olot.layer.content.inlayerpath"]
for layer in model_layers
)
expected_paths = sorted(
"/models/" + os.path.relpath(f, model_dir) for f in model_files
)
assert in_layer_paths == expected_paths, (
f"Directory structure not preserved.\n"
f"Expected: {expected_paths}\n"
f"Found: {in_layer_paths}"
)

# Also verify the actual tar contents match the annotations
for layer in model_layers:
digest = layer["digest"].replace("sha256:", "")
blob_path = blobs / digest
with tarfile.open(blob_path, "r:") as tar:
file_entries = [m.name for m in tar.getmembers() if m.isfile()]
assert len(file_entries) == 1, (
f"Expected one file per layer tar, got {file_entries}"
)
expected_tar_path = layer["annotations"]["olot.layer.content.inlayerpath"].lstrip("/")
assert file_entries[0] == expected_tar_path


def test_save_to_oci_registry_with_custom_backend(
get_temp_dir_with_models, get_temp_dir, get_mock_custom_oci_backend
Expand Down Expand Up @@ -167,9 +228,12 @@ def temp_auth_file_wrapper(auth):


def test_save_to_oci_registry_preserves_dir_structure(mocker, tmp_path):
"""Verify that subdirectories are passed as top-level entries, not flattened leaf files.
"""Verify one layer per file with correct directory structure via root_dir.

Regression test for https://github.com/kubeflow/model-registry/issues/2437
With root_dir support we expect:
- One layer per individual file (not one layer per top-level subdir)
- Original directory structure preserved (paths relative to root_dir)
"""
model_files_path = tmp_path / "my-model"
model_files_path.mkdir()
Expand All @@ -195,11 +259,21 @@ def test_save_to_oci_registry_preserves_dir_structure(mocker, tmp_path):
backend="skopeo",
)

# oci_layers_on_top should receive top-level entries (directories + files),
# NOT recursively flattened individual files.
# oci_layers_on_top should receive individual files (one layer per file),
# NOT top-level directories (which would bundle multiple files per layer).
called_files = mock_layers.call_args.args[1]
called_names = sorted(p.name for p in called_files)
assert called_names == ["README.md", "onnx", "tokenizer"]
called_rel_paths = sorted(
str(Path(f).relative_to(model_files_path)) for f in called_files
)
assert called_rel_paths == [
"README.md",
"onnx/model.onnx",
"onnx/weights/quantized.bin",
"tokenizer/vocab.txt",
]

# root_dir must be passed so olot preserves the directory structure in layers
assert mock_layers.call_args.kwargs["root_dir"] == model_files_path


@pytest.mark.e2e(type="oci")
Expand Down
Loading