Skip to content

Commit 43aff64

Browse files
thesayynalexeagleaspect-workflows[bot]
authored
fix: adjust incremental loader to find the largest chain (#411)
* fix: adjust incremental loader to find the largest chain * Update oci_python_image/hello_world/integration_test.py Co-authored-by: aspect-workflows[bot] <143031405+aspect-workflows[bot]@users.noreply.github.com> --------- Co-authored-by: Alex Eagle <[email protected]> Co-authored-by: aspect-workflows[bot] <143031405+aspect-workflows[bot]@users.noreply.github.com>
1 parent 800d715 commit 43aff64

File tree

1 file changed

+91
-84
lines changed

1 file changed

+91
-84
lines changed

oci_python_image/hello_world/integration_test.py

Lines changed: 91 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,52 @@
33
# TODO: upstream some of this to the testcontainers library to simplify similar code for users.
44
from testcontainers.core.container import DockerContainer
55
from testcontainers.core.waiting_utils import wait_for_logs
6-
import docker
7-
import tarfile
8-
import json
9-
import os
10-
import tempfile
11-
import io
12-
13-
14-
def add_json_file(tar, name, contents):
15-
content = json.dumps(contents).encode("utf-8")
16-
info = tarfile.TarInfo(name=name)
17-
info.size = len(content)
18-
tar.addfile(info, fileobj=io.BytesIO(content))
19-
20-
21-
def add_file(tar, name, fileobj):
22-
info = tarfile.TarInfo(name=name)
23-
info.size = os.fstat(fileobj.fileno()).st_size
24-
tar.addfile(info, fileobj=fileobj)
25-
fileobj.close()
26-
27-
28-
def get_blob_path(image, digest):
29-
return "%s/blobs/%s" % (image, digest.replace(":", "/"))
6+
import logging
307

8+
# This function is self container for easy-to-use by just copy-pasting.
9+
def OCIImageContainer(image):
10+
import docker
11+
import tarfile
12+
import json
13+
import io
14+
import logging
15+
16+
logger = logging.getLogger("incremental_loader")
17+
def tar(*args):
18+
buffer = io.BytesIO()
19+
with tarfile.open(fileobj=buffer, mode="w:") as t:
20+
for name, size, contents in args:
21+
info = tarfile.TarInfo(name=name)
22+
info.size = size
23+
if isinstance(contents, list) or isinstance(contents, dict):
24+
content = json.dumps(contents).encode("utf-8")
25+
info.size = len(content)
26+
contents = io.BytesIO(content)
27+
28+
t.addfile(info, fileobj=contents)
29+
contents.close()
30+
return buffer
31+
32+
def config_json(diff_ids):
33+
return ("config.json", None, {
34+
"rootfs": {
35+
"type": "layers",
36+
"diff_ids": diff_ids
37+
}
38+
})
39+
40+
def manifest_json(layers):
41+
return ("manifest.json", None, [{
42+
"Config": "config.json",
43+
"RepoTags": [],
44+
"Layers": layers
45+
}])
3146

32-
def open_blob(image, digest):
33-
return open(get_blob_path(image, digest), "rb")
3447

48+
def open_blob(image, digest):
49+
blob_path = "%s/blobs/%s" % (image, digest.replace(":", "/"))
50+
return open(blob_path, "rb")
3551

36-
def OCIImageContainer(image):
3752
with open("%s/index.json" % image) as indexp:
3853
indexjson = json.load(indexp)
3954

@@ -45,72 +60,64 @@ def OCIImageContainer(image):
4560

4661
client = docker.from_env()
4762

48-
# Probe and layer loading phase
49-
layers = manifest["layers"]
50-
needed = []
51-
52-
# Probing phase
53-
for i, layer in enumerate(layers):
54-
tmp = tempfile.NamedTemporaryFile(suffix=".tar")
55-
tar = tarfile.open(fileobj=tmp, mode="w")
56-
add_json_file(
57-
tar,
58-
name="manifest.json",
59-
contents=[
60-
{
61-
"Config": "config.json",
62-
"RepoTags": [],
63-
"Layers": [layer["digest"]],
64-
}
65-
],
66-
)
67-
add_json_file(
68-
tar,
69-
name="config.json",
70-
contents={
71-
"rootfs": {
72-
"type": "layers",
73-
"diff_ids": [config["rootfs"]["diff_ids"][i]],
74-
}
75-
},
63+
# Happy path: check if the image exists in the cache
64+
try:
65+
probe = tar(
66+
manifest_json(layers=[layer["digest"] + ".tar" for layer in manifest["layers"]]),
67+
("config.json", manifest["config"]["size"], open_blob(image, manifest["config"]["digest"]))
7668
)
69+
client.images.load(probe.getvalue())
70+
logger.debug("Image was a cache hit")
71+
return DockerContainer(manifest["config"]["digest"])
72+
except docker.errors.ImageLoadError as e:
73+
logger.debug("Image was a cache miss because %s" % e)
74+
75+
76+
# Unhappy path: image is not in the cache, so we need to load it incrementally
77+
layers = manifest["layers"]
78+
start_from = 0
7779

78-
tar.close()
80+
# Probe the cache for the first layer that is not in the cache
81+
for i in range(1, len(layers)):
82+
previous_layer = layers[i-1]
83+
layer = layers[i]
7984

85+
probe = tar(
86+
manifest_json(layers=[layer["digest"] + ".tar" for layer in manifest["layers"][:i]]),
87+
config_json(diff_ids=config["rootfs"]["diff_ids"][:i])
88+
)
89+
8090
try:
81-
# os.system("tar -tvf %s" % tmp.name)
82-
client.images.load(
83-
open(tmp.name, "rb"),
84-
)
85-
except docker.errors.ImageLoadError:
86-
needed.append(layer["digest"])
87-
88-
# Loading phase
89-
tmp = tempfile.NamedTemporaryFile(suffix=".tar")
90-
tar = tarfile.open(fileobj=tmp, mode="w")
91-
add_json_file(
92-
tar,
93-
name="manifest.json",
94-
contents=[
95-
{
96-
"Config": "config.json",
97-
"RepoTags": [],
98-
"Layers": list(map(lambda x: x["digest"], manifest["layers"])),
99-
}
100-
],
101-
)
102-
add_file(
103-
tar, name="config.json", fileobj=open_blob(image, manifest["config"]["digest"])
91+
probe_res = client.images.load(probe.getvalue())
92+
logger.debug("Layer %s was a cache hit" % layer["digest"], probe_res[0].id)
93+
except docker.errors.ImageLoadError as e:
94+
logger.debug("Layer %s was a cache miss because %s" % (layer["digest"], e))
95+
# From this point on we need to load everything.
96+
start_from = i-1
97+
break
98+
99+
logger.debug("Need to load all the layers after: ", start_from)
100+
101+
# Load all layers at once
102+
load = tar(
103+
# Send only needed layers
104+
*[(layer["digest"] + ".tar", layer["size"], open_blob(image, layer["digest"])) for layer in layers[start_from:]],
105+
manifest_json(layers=[layer["digest"] + ".tar" for layer in manifest["layers"]]),
106+
("config.json", manifest["config"]["size"], open_blob(image, manifest["config"]["digest"]))
104107
)
105-
for layer in needed:
106-
add_file(tar, name=layer, fileobj=open_blob(image, layer))
107108

108-
tar.close()
109-
r = client.images.load(open(tmp.name, "rb"))
110-
return DockerContainer(r[0].id)
109+
load_res = client.images.load(load.getvalue())
110+
111+
# Extra safety check: assert that the final image id is equal to the one in the manifest
112+
assert load_res[0].id == manifest["config"]["digest"], "final image id should match manifest %s == %s" % (load_res[0].id, manifest["config"]["digest"])
113+
114+
return DockerContainer(manifest["config"]["digest"])
115+
116+
111117

112118

113119
def test_wait_for_hello():
120+
logging.basicConfig(level=logging.DEBUG)
114121
print("Starting container")
115122
with OCIImageContainer("oci_python_image/hello_world/image") as container:
116123
wait_for_logs(container, "hello py_image_layer!")

0 commit comments

Comments
 (0)