Skip to content

Commit cd8ceb3

Browse files
authored
fix: use root_dir in save_to_oci_registry for correct OCI layer structure (#2552)
Pass individual file paths with root_dir to oci_layers_on_top instead of top-level directory entries. This produces one layer per file (instead of one layer per subdirectory) while preserving the original directory structure in layer arcnames via olot's root_dir support. Update the unit test to assert the new root_dir-based calling convention and add e2e test verification that inspects the actual OCI layout to confirm one-layer-per-file and correct path preservation. Related to #2498 Resolves #2520 AI-assisted: Claude Code (Opus 4.6) Signed-off-by: Jon Burdo <jon@jonburdo.com>
1 parent 1c88c27 commit cd8ceb3

5 files changed

Lines changed: 147 additions & 25 deletions

File tree

clients/python/poetry.lock

Lines changed: 14 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ aiohttp-retry = "^2.8.3"
2525
nest-asyncio2 = "^1.7.1"
2626

2727
huggingface-hub = { version = ">=0.20.1,<1.8.0", optional = true }
28-
olot = { version = "^0.1.6", optional = true }
28+
olot = { version = "^0.1.17", optional = true }
2929
boto3 = { version = "^1.37.34", optional = true }
3030
rh-model-signing = { version = "0.1.0", optional = true }
3131
platformdirs = { version = "^4.0.0", optional = true }

clients/python/src/model_registry/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,9 @@ def save_to_oci_registry( # noqa: C901 ( complex args >8 )
393393
if auth_file is not None:
394394
params["authfile"] = auth_file.name
395395
backend_def.pull(base_image, local_image_path, **params)
396-
# Pass top-level entries (dir/*) instead of individual leaf files (dir/**/*).
397-
# olot's tarball_from_file preserves subdirectory structure when given a
398-
# directory, but flattens everything to basename when given individual files.
399-
model_path = Path(model_files_path)
400-
files = [model_path] if model_path.is_file() else sorted(model_path.iterdir())
401-
oci_layers_on_top(local_image_path, files, modelcard)
396+
# Extract the absolute path from the files found in the path
397+
files = [file[0] for file in _get_files_from_path(model_files_path)] # type: ignore[arg-type]
398+
oci_layers_on_top(local_image_path, files, modelcard, root_dir=model_files_path)
402399
backend_def.push(local_image_path, oci_ref, **params)
403400

404401
# Return the OCI URI

clients/python/tests/conftest.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,52 @@ def get_temp_dir_with_nested_models():
281281
os.rmdir(temp_dir)
282282

283283

284+
@pytest.fixture
285+
def get_temp_dir_with_deeply_nested_models():
286+
"""Model directory with multiple levels of nesting to verify structure preservation.
287+
288+
Creates:
289+
my-model/
290+
├── README.md
291+
├── onnx/
292+
│ ├── model.onnx
293+
│ └── weights/
294+
│ └── quantized.bin
295+
└── tokenizer/
296+
├── config.json
297+
└── vocab.txt
298+
"""
299+
temp_dir = tempfile.mkdtemp()
300+
model_dir = os.path.join(temp_dir, "my-model")
301+
os.makedirs(model_dir)
302+
303+
dirs = [
304+
os.path.join(model_dir, "onnx"),
305+
os.path.join(model_dir, "onnx", "weights"),
306+
os.path.join(model_dir, "tokenizer"),
307+
]
308+
for d in dirs:
309+
os.makedirs(d)
310+
311+
file_entries = [
312+
("README.md", b"# My Model"),
313+
("onnx/model.onnx", b"fake-onnx-data"),
314+
("onnx/weights/quantized.bin", b"fake-weights-data"),
315+
("tokenizer/config.json", b'{"key": "value"}'),
316+
("tokenizer/vocab.txt", b"hello\nworld"),
317+
]
318+
file_paths = []
319+
for rel_path, content in file_entries:
320+
full_path = os.path.join(model_dir, rel_path)
321+
with open(full_path, "wb") as f:
322+
f.write(content)
323+
file_paths.append(full_path)
324+
325+
yield model_dir, file_paths
326+
327+
shutil.rmtree(temp_dir)
328+
329+
284330
@pytest.fixture
285331
def get_large_model_dir():
286332
"""Creates a directory containing a large model file (300-500MB) for testing file size extremes."""

clients/python/tests/test_utils.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import tarfile
34
from contextlib import contextmanager
45
from pathlib import Path
56

@@ -83,16 +84,24 @@ def test_s3_uri_builder_with_complete_env():
8384

8485

8586
@pytest.mark.e2e(type="oci")
86-
def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_models, get_temp_dir):
87+
def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_deeply_nested_models, get_temp_dir):
88+
"""Verify OCI layers preserve directory structure with one layer per file.
89+
90+
Uses a real skopeo backend and local OCI registry. After save_to_oci_registry
91+
runs, inspects the local OCI layout to confirm:
92+
- Each model file is in its own layer (one layer per file)
93+
- Subdirectory paths are preserved in tar arcnames (not flattened)
94+
"""
8795
base_image = "quay.io/mmortari/hello-world-wait:latest"
88-
dest_dir, _ = get_temp_dir_with_models
96+
model_dir, model_files = get_temp_dir_with_deeply_nested_models
8997
oci_ref = "localhost:5001/foo/bar:latest"
98+
oci_dest = get_temp_dir
9099

91100
save_to_oci_registry(
92101
base_image=base_image,
93102
oci_ref=oci_ref,
94-
model_files_path=dest_dir,
95-
dest_dir=get_temp_dir,
103+
model_files_path=model_dir,
104+
dest_dir=oci_dest,
96105
custom_oci_backend=utils._get_skopeo_backend(
97106
push_args=[
98107
"--dest-tls-verify=false",
@@ -102,6 +111,58 @@ def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_models, get_temp_dir
102111
),
103112
)
104113

114+
# Inspect the OCI layout to verify layer structure.
115+
# Walk from index.json to an image manifest with layers, handling both
116+
# single-arch images (index -> manifest) and multi-arch images
117+
# (index -> image index -> manifest).
118+
oci_path = Path(oci_dest)
119+
blobs = oci_path / "blobs" / "sha256"
120+
index = json.loads((oci_path / "index.json").read_text())
121+
digest = index["manifests"][0]["digest"].replace("sha256:", "")
122+
manifest = json.loads((blobs / digest).read_text())
123+
if "layers" not in manifest:
124+
# Multi-arch: manifest is an image index, follow to first platform
125+
digest = manifest["manifests"][0]["digest"].replace("sha256:", "")
126+
manifest = json.loads((blobs / digest).read_text())
127+
128+
# Identify model layers added by olot (they have olot annotations).
129+
# Base image layers don't have these annotations.
130+
model_layers = [
131+
layer for layer in manifest["layers"]
132+
if "olot.layer.content.inlayerpath" in layer.get("annotations", {})
133+
]
134+
135+
# Each model file should be in its own layer (one layer per file)
136+
assert len(model_layers) == len(model_files), (
137+
f"Expected {len(model_files)} model layers (one per file), got {len(model_layers)}"
138+
)
139+
140+
# Verify directory structure is preserved by checking the in-layer paths
141+
in_layer_paths = sorted(
142+
layer["annotations"]["olot.layer.content.inlayerpath"]
143+
for layer in model_layers
144+
)
145+
expected_paths = sorted(
146+
"/models/" + os.path.relpath(f, model_dir) for f in model_files
147+
)
148+
assert in_layer_paths == expected_paths, (
149+
f"Directory structure not preserved.\n"
150+
f"Expected: {expected_paths}\n"
151+
f"Found: {in_layer_paths}"
152+
)
153+
154+
# Also verify the actual tar contents match the annotations
155+
for layer in model_layers:
156+
digest = layer["digest"].replace("sha256:", "")
157+
blob_path = blobs / digest
158+
with tarfile.open(blob_path, "r:") as tar:
159+
file_entries = [m.name for m in tar.getmembers() if m.isfile()]
160+
assert len(file_entries) == 1, (
161+
f"Expected one file per layer tar, got {file_entries}"
162+
)
163+
expected_tar_path = layer["annotations"]["olot.layer.content.inlayerpath"].lstrip("/")
164+
assert file_entries[0] == expected_tar_path
165+
105166

106167
def test_save_to_oci_registry_with_custom_backend(
107168
get_temp_dir_with_models, get_temp_dir, get_mock_custom_oci_backend
@@ -167,9 +228,12 @@ def temp_auth_file_wrapper(auth):
167228

168229

169230
def test_save_to_oci_registry_preserves_dir_structure(mocker, tmp_path):
170-
"""Verify that subdirectories are passed as top-level entries, not flattened leaf files.
231+
"""Verify one layer per file with correct directory structure via root_dir.
171232
172233
Regression test for https://github.com/kubeflow/model-registry/issues/2437
234+
With root_dir support we expect:
235+
- One layer per individual file (not one layer per top-level subdir)
236+
- Original directory structure preserved (paths relative to root_dir)
173237
"""
174238
model_files_path = tmp_path / "my-model"
175239
model_files_path.mkdir()
@@ -195,11 +259,21 @@ def test_save_to_oci_registry_preserves_dir_structure(mocker, tmp_path):
195259
backend="skopeo",
196260
)
197261

198-
# oci_layers_on_top should receive top-level entries (directories + files),
199-
# NOT recursively flattened individual files.
262+
# oci_layers_on_top should receive individual files (one layer per file),
263+
# NOT top-level directories (which would bundle multiple files per layer).
200264
called_files = mock_layers.call_args.args[1]
201-
called_names = sorted(p.name for p in called_files)
202-
assert called_names == ["README.md", "onnx", "tokenizer"]
265+
called_rel_paths = sorted(
266+
str(Path(f).relative_to(model_files_path)) for f in called_files
267+
)
268+
assert called_rel_paths == [
269+
"README.md",
270+
"onnx/model.onnx",
271+
"onnx/weights/quantized.bin",
272+
"tokenizer/vocab.txt",
273+
]
274+
275+
# root_dir must be passed so olot preserves the directory structure in layers
276+
assert mock_layers.call_args.kwargs["root_dir"] == model_files_path
203277

204278

205279
@pytest.mark.e2e(type="oci")

0 commit comments

Comments
 (0)