11import json
22import os
3+ import tarfile
34from contextlib import contextmanager
45from pathlib import Path
56
@@ -83,16 +84,24 @@ def test_s3_uri_builder_with_complete_env():
8384
8485
8586@pytest .mark .e2e (type = "oci" )
86- def test_save_to_oci_registry_with_skopeo (get_temp_dir_with_models , get_temp_dir ):
87+ def test_save_to_oci_registry_with_skopeo (get_temp_dir_with_deeply_nested_models , get_temp_dir ):
88+ """Verify OCI layers preserve directory structure with one layer per file.
89+
90+ Uses a real skopeo backend and local OCI registry. After save_to_oci_registry
91+ runs, inspects the local OCI layout to confirm:
92+ - Each model file is in its own layer (one layer per file)
93+ - Subdirectory paths are preserved in tar arcnames (not flattened)
94+ """
8795 base_image = "quay.io/mmortari/hello-world-wait:latest"
88- dest_dir , _ = get_temp_dir_with_models
96+ model_dir , model_files = get_temp_dir_with_deeply_nested_models
8997 oci_ref = "localhost:5001/foo/bar:latest"
98+ oci_dest = get_temp_dir
9099
91100 save_to_oci_registry (
92101 base_image = base_image ,
93102 oci_ref = oci_ref ,
94- model_files_path = dest_dir ,
95- dest_dir = get_temp_dir ,
103+ model_files_path = model_dir ,
104+ dest_dir = oci_dest ,
96105 custom_oci_backend = utils ._get_skopeo_backend (
97106 push_args = [
98107 "--dest-tls-verify=false" ,
@@ -102,6 +111,58 @@ def test_save_to_oci_registry_with_skopeo(get_temp_dir_with_models, get_temp_dir
102111 ),
103112 )
104113
114+ # Inspect the OCI layout to verify layer structure.
115+ # Walk from index.json to an image manifest with layers, handling both
116+ # single-arch images (index -> manifest) and multi-arch images
117+ # (index -> image index -> manifest).
118+ oci_path = Path (oci_dest )
119+ blobs = oci_path / "blobs" / "sha256"
120+ index = json .loads ((oci_path / "index.json" ).read_text ())
121+ digest = index ["manifests" ][0 ]["digest" ].replace ("sha256:" , "" )
122+ manifest = json .loads ((blobs / digest ).read_text ())
123+ if "layers" not in manifest :
124+ # Multi-arch: manifest is an image index, follow to first platform
125+ digest = manifest ["manifests" ][0 ]["digest" ].replace ("sha256:" , "" )
126+ manifest = json .loads ((blobs / digest ).read_text ())
127+
128+ # Identify model layers added by olot (they have olot annotations).
129+ # Base image layers don't have these annotations.
130+ model_layers = [
131+ layer for layer in manifest ["layers" ]
132+ if "olot.layer.content.inlayerpath" in layer .get ("annotations" , {})
133+ ]
134+
135+ # Each model file should be in its own layer (one layer per file)
136+ assert len (model_layers ) == len (model_files ), (
137+ f"Expected { len (model_files )} model layers (one per file), got { len (model_layers )} "
138+ )
139+
140+ # Verify directory structure is preserved by checking the in-layer paths
141+ in_layer_paths = sorted (
142+ layer ["annotations" ]["olot.layer.content.inlayerpath" ]
143+ for layer in model_layers
144+ )
145+ expected_paths = sorted (
146+ "/models/" + os .path .relpath (f , model_dir ) for f in model_files
147+ )
148+ assert in_layer_paths == expected_paths , (
149+ f"Directory structure not preserved.\n "
150+ f"Expected: { expected_paths } \n "
151+ f"Found: { in_layer_paths } "
152+ )
153+
154+ # Also verify the actual tar contents match the annotations
155+ for layer in model_layers :
156+ digest = layer ["digest" ].replace ("sha256:" , "" )
157+ blob_path = blobs / digest
158+ with tarfile .open (blob_path , "r:" ) as tar :
159+ file_entries = [m .name for m in tar .getmembers () if m .isfile ()]
160+ assert len (file_entries ) == 1 , (
161+ f"Expected one file per layer tar, got { file_entries } "
162+ )
163+ expected_tar_path = layer ["annotations" ]["olot.layer.content.inlayerpath" ].lstrip ("/" )
164+ assert file_entries [0 ] == expected_tar_path
165+
105166
106167def test_save_to_oci_registry_with_custom_backend (
107168 get_temp_dir_with_models , get_temp_dir , get_mock_custom_oci_backend
@@ -167,9 +228,12 @@ def temp_auth_file_wrapper(auth):
167228
168229
169230def test_save_to_oci_registry_preserves_dir_structure (mocker , tmp_path ):
170- """Verify that subdirectories are passed as top-level entries, not flattened leaf files .
231+ """Verify one layer per file with correct directory structure via root_dir .
171232
172233 Regression test for https://github.com/kubeflow/model-registry/issues/2437
234+ With root_dir support we expect:
235+ - One layer per individual file (not one layer per top-level subdir)
236+ - Original directory structure preserved (paths relative to root_dir)
173237 """
174238 model_files_path = tmp_path / "my-model"
175239 model_files_path .mkdir ()
@@ -195,11 +259,21 @@ def test_save_to_oci_registry_preserves_dir_structure(mocker, tmp_path):
195259 backend = "skopeo" ,
196260 )
197261
198- # oci_layers_on_top should receive top-level entries (directories + files ),
199- # NOT recursively flattened individual files.
262+ # oci_layers_on_top should receive individual files (one layer per file ),
263+ # NOT top-level directories (which would bundle multiple files per layer) .
200264 called_files = mock_layers .call_args .args [1 ]
201- called_names = sorted (p .name for p in called_files )
202- assert called_names == ["README.md" , "onnx" , "tokenizer" ]
265+ called_rel_paths = sorted (
266+ str (Path (f ).relative_to (model_files_path )) for f in called_files
267+ )
268+ assert called_rel_paths == [
269+ "README.md" ,
270+ "onnx/model.onnx" ,
271+ "onnx/weights/quantized.bin" ,
272+ "tokenizer/vocab.txt" ,
273+ ]
274+
275+ # root_dir must be passed so olot preserves the directory structure in layers
276+ assert mock_layers .call_args .kwargs ["root_dir" ] == model_files_path
203277
204278
205279@pytest .mark .e2e (type = "oci" )
0 commit comments