Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions clients/python/scripts/check_oci_dir_structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""Test that save_to_oci_registry preserves nested directory structure

Ref: test_save_to_oci_registry_with_skopeo in tests/test_utils.py:86

Usage:

python scripts/check_oci_dir_structure.py

# or

poetry run python scripts/check_oci_dir_structure.py
"""

import json
import subprocess
import sys
import tarfile
import tempfile
from pathlib import Path

from model_registry.utils import save_to_oci_registry

OCI_REF = "quay.io/jburdo/test1:dir-structure-test"
BASE_IMAGE = "quay.io/mmortari/hello-world-wait:latest"

EXPECTED_PATHS = {
"README.md",
"onnx/model.onnx",
"onnx/weights/quantized.bin",
"tokenizer/vocab.txt",
"tokenizer/config.json",
}

with tempfile.TemporaryDirectory() as tmpdir:
tmp = Path(tmpdir)
model_dir = tmp / "my-model"
model_dir.mkdir()

# Create nested model structure
(model_dir / "README.md").write_text("# Test Model\n")
(model_dir / "onnx").mkdir()
(model_dir / "onnx" / "model.onnx").write_bytes(b"\x00" * 64)
(model_dir / "onnx" / "weights").mkdir()
(model_dir / "onnx" / "weights" / "quantized.bin").write_bytes(b"\x01" * 32)
(model_dir / "tokenizer").mkdir()
(model_dir / "tokenizer" / "vocab.txt").write_text("hello\nworld\n")
(model_dir / "tokenizer" / "config.json").write_text('{"type": "test"}\n')

# Push
print(f"Pushing to {OCI_REF}...")
save_to_oci_registry(
base_image=BASE_IMAGE,
oci_ref=OCI_REF,
model_files_path=model_dir,
dest_dir=tmp / "oci-dest",
backend="skopeo",
)

# Pull back
print("Pulling back...")
pull_dir = tmp / "oci-pull"
pull_dir.mkdir()
subprocess.run(
["skopeo", "copy", f"docker://{OCI_REF}", f"oci:{pull_dir}:latest"],
check=True,
capture_output=True,
)

# Collect all file paths from tar layers (skip base image layer 0)
index = json.loads((pull_dir / "index.json").read_text())
algo, digest = index["manifests"][0]["digest"].split(":")
manifest = json.loads((pull_dir / "blobs" / algo / digest).read_text())

found_paths = set()
layer_num = 0
print("\nModel layers:")
for layer in manifest["layers"]:
algo, digest = layer["digest"].split(":")
blob = pull_dir / "blobs" / algo / digest
try:
with tarfile.open(blob, "r:*") as tar:
model_entries = [
m for m in tar.getmembers() if m.name.startswith("models/")
]
if not model_entries:
continue
print(f" layer {layer_num} ({digest[:12]})")
for m in sorted(model_entries, key=lambda m: m.name):
path = m.name.removeprefix("models/")
kind = "dir" if m.isdir() else "file"
print(f" {'└── ' if m == model_entries[-1] else '├── '}{path} ({kind})")
found_paths.add(path)
layer_num += 1
except tarfile.TarError:
continue

# Report
print(f"\nExpected: {sorted(EXPECTED_PATHS)}")
print(f"Found: {sorted(found_paths)}")
missing = EXPECTED_PATHS - found_paths
if missing:
print(f"FAIL - missing paths: {sorted(missing)}")
sys.exit(1)
else:
print("PASS - all expected paths preserved")
9 changes: 3 additions & 6 deletions clients/python/src/model_registry/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,9 @@ def save_to_oci_registry( # noqa: C901 ( complex args >8 )
if auth_file is not None:
params["authfile"] = auth_file.name
backend_def.pull(base_image, local_image_path, **params)
# Pass top-level entries (dir/*) instead of individual leaf files (dir/**/*).
# olot's tarball_from_file preserves subdirectory structure when given a
# directory, but flattens everything to basename when given individual files.
model_path = Path(model_files_path)
files = [model_path] if model_path.is_file() else sorted(model_path.iterdir())
oci_layers_on_top(local_image_path, files, modelcard)
# Extract the absolute path from the files found in the path
files = [file[0] for file in _get_files_from_path(model_files_path)] # type: ignore[arg-type]
oci_layers_on_top(local_image_path, files, modelcard, root_dir=model_files_path)
backend_def.push(local_image_path, oci_ref, **params)

# Return the OCI URI
Expand Down
Loading