Skip to content

Commit 8ce5f10

Browse files
committed
Support concatenated gzip (mgzip/pigz) in ztoc generation and extraction
Signed-off-by: Praful Gupta <prafulgupta6@gmail.com>
1 parent b4baddb commit 8ce5f10

4 files changed

Lines changed: 389 additions & 7 deletions

File tree

integration/util_test.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package integration
3434

3535
import (
36+
"archive/tar"
3637
"bytes"
3738
"crypto/rand"
3839
"crypto/rsa"
@@ -43,8 +44,10 @@ import (
4344
"encoding/pem"
4445
"errors"
4546
"fmt"
47+
"io"
4648
"math/big"
4749
"os"
50+
"os/exec"
4851
"path/filepath"
4952
"runtime"
5053
"sort"
@@ -1040,3 +1043,125 @@ func withContentStoreConfig(opts ...store.Option) snapshotterConfigOpt {
10401043
c.ServiceConfig.FSConfig.ContentStoreConfig = store.NewStoreConfig(opts...).ContentStoreConfig
10411044
}
10421045
}
1046+
1047+
type pigzImageInfo struct {
1048+
ref string
1049+
files map[string]string
1050+
layerCount int
1051+
}
1052+
1053+
// buildPigzImage constructs a minimal OCI image with a single pigz-compressed layer
1054+
// and imports it into containerd via "ctr images import"
1055+
func buildPigzImage(t *testing.T, sh *shell.Shell, imageName string) pigzImageInfo {
1056+
t.Helper()
1057+
1058+
pigzPath, err := exec.LookPath("pigz")
1059+
if err != nil {
1060+
t.Fatal("pigz is required but not installed")
1061+
}
1062+
1063+
r := testutil.NewTestRand(t)
1064+
testFiles := map[string]string{
1065+
"testfile1.txt": "pigz-test-content-alpha-" + string(r.RandomByteData(1<<20)), // ~1 MB
1066+
"testfile2.txt": "pigz-test-content-beta-" + string(r.RandomByteData(1<<20)), // ~1 MB
1067+
}
1068+
1069+
entries := []testutil.TarEntry{
1070+
testutil.File("testfile1.txt", testFiles["testfile1.txt"]),
1071+
testutil.File("padding1.bin", string(r.RandomByteData(1<<23))), // 8 MB
1072+
testutil.File("testfile2.txt", testFiles["testfile2.txt"]),
1073+
testutil.File("padding2.bin", string(r.RandomByteData(1<<23))), // 8 MB
1074+
}
1075+
tarData, err := io.ReadAll(testutil.BuildTar(entries))
1076+
if err != nil {
1077+
t.Fatalf("failed to build tar: %v", err)
1078+
}
1079+
1080+
// Compress with pigz using 128KB block size (produces concatenated gzip members).
1081+
pigzCmd := exec.Command(pigzPath, "-b", "128", "-c")
1082+
pigzCmd.Stdin = bytes.NewReader(tarData)
1083+
compressedData, err := pigzCmd.Output()
1084+
if err != nil {
1085+
t.Fatalf("pigz compression failed: %v", err)
1086+
}
1087+
layerDigest := digest.FromBytes(compressedData)
1088+
1089+
platform := spec.Platform{Architecture: runtime.GOARCH, OS: "linux"}
1090+
1091+
// Build OCI image config.
1092+
configBytes, err := json.Marshal(spec.Image{
1093+
Platform: platform,
1094+
RootFS: spec.RootFS{Type: "layers", DiffIDs: []digest.Digest{digest.FromBytes(tarData)}},
1095+
})
1096+
if err != nil {
1097+
t.Fatalf("failed to marshal config: %v", err)
1098+
}
1099+
1100+
configDigest := digest.FromBytes(configBytes)
1101+
manifest := spec.Manifest{
1102+
MediaType: spec.MediaTypeImageManifest,
1103+
Config: spec.Descriptor{MediaType: spec.MediaTypeImageConfig, Digest: configDigest, Size: int64(len(configBytes))},
1104+
Layers: []spec.Descriptor{{MediaType: spec.MediaTypeImageLayerGzip, Digest: layerDigest, Size: int64(len(compressedData))}},
1105+
}
1106+
manifest.SchemaVersion = 2
1107+
manifestBytes, err := json.Marshal(manifest)
1108+
if err != nil {
1109+
t.Fatalf("failed to marshal manifest: %v", err)
1110+
}
1111+
manifestDigest := digest.FromBytes(manifestBytes)
1112+
1113+
// Build OCI index.
1114+
index := spec.Index{
1115+
MediaType: spec.MediaTypeImageIndex,
1116+
Manifests: []spec.Descriptor{
1117+
{
1118+
MediaType: spec.MediaTypeImageManifest,
1119+
Digest: manifestDigest,
1120+
Size: int64(len(manifestBytes)),
1121+
Platform: &platform,
1122+
Annotations: map[string]string{"io.containerd.image.name": imageName},
1123+
},
1124+
},
1125+
}
1126+
index.SchemaVersion = 2
1127+
indexBytes, err := json.Marshal(index)
1128+
if err != nil {
1129+
t.Fatalf("failed to marshal index: %v", err)
1130+
}
1131+
1132+
// Assemble OCI image layout as a tar archive.
1133+
var buf bytes.Buffer
1134+
tw := tar.NewWriter(&buf)
1135+
for name, data := range map[string][]byte{
1136+
"oci-layout": []byte(`{"imageLayoutVersion":"1.0.0"}`),
1137+
"index.json": indexBytes,
1138+
"blobs/sha256/" + layerDigest.Encoded(): compressedData,
1139+
"blobs/sha256/" + configDigest.Encoded(): configBytes,
1140+
"blobs/sha256/" + manifestDigest.Encoded(): manifestBytes,
1141+
} {
1142+
if err := tw.WriteHeader(&tar.Header{Name: name, Mode: 0644, Size: int64(len(data))}); err != nil {
1143+
t.Fatalf("failed to write tar header for %s: %v", name, err)
1144+
}
1145+
if _, err := tw.Write(data); err != nil {
1146+
t.Fatalf("failed to write tar data for %s: %v", name, err)
1147+
}
1148+
}
1149+
if err := tw.Close(); err != nil {
1150+
t.Fatalf("failed to close tar writer: %v", err)
1151+
}
1152+
ociTar := buf.Bytes()
1153+
1154+
// Write OCI tar to test container and import into containerd.
1155+
tmpPath := "/tmp/pigz-image-" + xid.New().String() + ".tar"
1156+
if err := testutil.WriteFileContents(sh, tmpPath, ociTar, 0644); err != nil {
1157+
t.Fatalf("failed to write OCI tar to container: %v", err)
1158+
}
1159+
sh.X("ctr", "images", "import", tmpPath)
1160+
sh.X("rm", "-f", tmpPath)
1161+
1162+
return pigzImageInfo{
1163+
ref: imageName,
1164+
files: testFiles,
1165+
layerCount: 1,
1166+
}
1167+
}

integration/ztoc_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,52 @@ func dedupeZtocBlobs(ztocBlobs []*v1.Descriptor) []*v1.Descriptor {
474474
return dedupedZtocBlobs
475475
}
476476

477+
func TestSociZtocWithPigzLayers(t *testing.T) {
478+
t.Parallel()
479+
480+
regConfig := newRegistryConfig()
481+
sh, done := newShellWithRegistry(t, regConfig)
482+
defer done()
483+
484+
rebootContainerd(t, sh, getContainerdConfigToml(t, false), getSnapshotterConfigToml(t))
485+
486+
pigzImg := buildPigzImage(t, sh, "pigz-test:latest")
487+
mirrorRef := regConfig.mirror("pigz-test:latest")
488+
sh.X("ctr", "i", "tag", pigzImg.ref, mirrorRef.ref)
489+
sh.X(append([]string{"nerdctl", "push", "-q"}, encodeImageInfoNerdctl(mirrorRef)[0]...)...)
490+
491+
indexDigest := buildIndex(sh, mirrorRef, withMinLayerSize(0))
492+
if indexDigest == "" {
493+
t.Fatal("failed to create SOCI index for pigz-compressed image")
494+
}
495+
496+
// Verify a ztoc was created for the pigz layer and extract files to check content
497+
sociIndex, err := sociIndexFromDigest(sh, indexDigest)
498+
if err != nil {
499+
t.Fatalf("failed to read SOCI index: %v", err)
500+
}
501+
for _, blob := range sociIndex.Blobs {
502+
if blob.MediaType != soci.SociLayerMediaType {
503+
continue
504+
}
505+
for fileName, expectedContent := range pigzImg.files {
506+
output, err := sh.OLog("soci", "ztoc", "get-file", blob.Digest.String(), fileName)
507+
if err != nil {
508+
t.Fatalf("soci ztoc get-file failed for %s: %v", fileName, err)
509+
}
510+
actual := strings.TrimRight(string(output), "\n")
511+
if actual != expectedContent {
512+
t.Fatalf("file %s content mismatch: expected %q, got %q", fileName, expectedContent, actual)
513+
}
514+
}
515+
}
516+
517+
sh.X("soci", "push", "--user", regConfig.creds(), mirrorRef.ref)
518+
sh.X("ctr", "i", "rm", mirrorRef.ref)
519+
sh.X(append(imagePullCmd, "--soci-index-digest", indexDigest, mirrorRef.ref)...)
520+
checkFuseMounts(t, sh, pigzImg.layerCount)
521+
}
522+
477523
func verifyInfoOutput(zinfo Info, ztoc *ztoc.Ztoc) error {
478524
if zinfo.Version != string(ztoc.Version) {
479525
return fmt.Errorf("different versions: expected %s got %s", ztoc.Version, zinfo.Version)

ztoc/compression/gzip_zinfo.c

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@
4848
#include <stdlib.h>
4949
#include <string.h>
5050

51-
#define CHUNK (1 << 14) // file input buffer size
51+
#define CHUNK (1 << 14) // file input buffer size
52+
#define GZIP_TRAILER_SIZE 8 // gzip trailer: 4-byte CRC32 + 4-byte ISIZE
5253

5354

5455
// zinfo - internal helpers start.
@@ -253,8 +254,19 @@ int generate_zinfo_from_fp(FILE* in, offset_t span, struct gzip_zinfo** idx) {
253254
ret = Z_DATA_ERROR;
254255
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
255256
goto build_index_error;
256-
if (ret == Z_STREAM_END)
257+
if (ret == Z_STREAM_END) {
258+
/* Handle concatenated gzip streams (e.g., mgzip/pigz).
259+
If there's more data, reset inflate for the next member. */
260+
if (strm.avail_in > 0 ||
261+
ungetc(getc(in), in) != EOF) {
262+
ret = inflateReset2(&strm, 47);
263+
if (ret != Z_OK)
264+
goto build_index_error;
265+
if (strm.avail_in > 0)
266+
continue;
267+
}
257268
break;
269+
}
258270

259271
/* if at end of block, consider adding an index entry (note that if
260272
data_type indicates an end-of-block, then all of the
@@ -308,10 +320,11 @@ int generate_zinfo_from_file(const char *filepath, offset_t span, struct gzip_zi
308320

309321
int extract_data_from_fp(FILE *in, struct gzip_zinfo *index, offset_t offset, void *buffer, int len) {
310322
int ret, skip;
323+
int raw_mode = 1; // track if we're in initial raw inflate mode
311324
z_stream strm;
312325
struct gzip_checkpoint *here;
313326
unsigned char input[CHUNK], discard[WINSIZE];
314-
uchar* buf = buffer;
327+
uchar* buf = buffer;
315328

316329
/* proceed only if something reasonable to do */
317330
if (len < 0)
@@ -379,8 +392,43 @@ int extract_data_from_fp(FILE *in, struct gzip_zinfo *index, offset_t offset, vo
379392
ret = Z_DATA_ERROR;
380393
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
381394
goto extract_ret;
382-
if (ret == Z_STREAM_END)
395+
if (ret == Z_STREAM_END) {
396+
/* Handle concatenated gzip member boundary */
397+
if (skip || strm.avail_out > 0) {
398+
if (raw_mode) {
399+
/* Skip the 8-byte gzip trailer (CRC32 + ISIZE).
400+
In raw inflate mode (-15), the trailer is NOT
401+
consumed by inflate. After resetting to gzip mode
402+
(47), subsequent trailers are consumed automatically,
403+
so this is only needed once. */
404+
unsigned drop = GZIP_TRAILER_SIZE;
405+
if (strm.avail_in >= drop) {
406+
strm.avail_in -= drop;
407+
strm.next_in += drop;
408+
} else {
409+
drop -= strm.avail_in;
410+
strm.avail_in = 0;
411+
do {
412+
if (getc(in) == EOF) {
413+
ret = ferror(in) ? Z_ERRNO : Z_BUF_ERROR;
414+
goto extract_ret;
415+
}
416+
} while (--drop);
417+
}
418+
raw_mode = 0;
419+
}
420+
if (strm.avail_in > 0 ||
421+
ungetc(getc(in), in) != EOF) {
422+
ret = inflateReset2(&strm, 47);
423+
if (ret != Z_OK)
424+
goto extract_ret;
425+
if (strm.avail_out > 0)
426+
continue;
427+
break;
428+
}
429+
}
383430
break;
431+
}
384432
} while (strm.avail_out != 0);
385433

386434
/* if reach end of stream, then don't keep trying to get more */
@@ -414,6 +462,7 @@ int extract_data_from_buffer(void *d, offset_t datalen,
414462
struct gzip_zinfo *index, offset_t offset,
415463
void *buffer, offset_t len, int first_checkpoint) {
416464
int ret, skip;
465+
int raw_mode = 1; // track if we're in initial raw inflate mode
417466
z_stream strm;
418467
unsigned char input[CHUNK], discard[WINSIZE];
419468
uchar *buf = buffer;
@@ -430,8 +479,8 @@ int extract_data_from_buffer(void *d, offset_t datalen,
430479
return ret;
431480

432481
if (bits) {
433-
int ret = data[0];
434-
inflatePrime(&strm, bits, ret >> (8 - bits));
482+
int byte_val = data[0];
483+
inflatePrime(&strm, bits, byte_val >> (8 - bits));
435484
data++;
436485
}
437486
(void)inflateSetDictionary(&strm, index->list[first_checkpoint].window,
@@ -471,8 +520,39 @@ int extract_data_from_buffer(void *d, offset_t datalen,
471520
ret = Z_DATA_ERROR;
472521
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
473522
goto extract_ret;
474-
if (ret == Z_STREAM_END)
523+
if (ret == Z_STREAM_END) {
524+
/* Handle concatenated gzip member boundary */
525+
if (skip || strm.avail_out > 0) {
526+
if (raw_mode) {
527+
/* Skip the 8-byte gzip trailer (CRC32 + ISIZE).
528+
See comment in extract_data_from_fp. */
529+
unsigned drop = GZIP_TRAILER_SIZE;
530+
if (strm.avail_in >= drop) {
531+
strm.avail_in -= drop;
532+
strm.next_in += drop;
533+
} else {
534+
drop -= strm.avail_in;
535+
strm.avail_in = 0;
536+
if (remaining < (int)drop) {
537+
ret = Z_BUF_ERROR;
538+
goto extract_ret;
539+
}
540+
data += drop;
541+
remaining -= drop;
542+
}
543+
raw_mode = 0;
544+
}
545+
if (strm.avail_in > 0 || remaining > 0) {
546+
ret = inflateReset2(&strm, 47);
547+
if (ret != Z_OK)
548+
goto extract_ret;
549+
if (strm.avail_out > 0)
550+
continue;
551+
break; // let outer loop set up next buffer
552+
}
553+
}
475554
break;
555+
}
476556
} while (strm.avail_out != 0);
477557

478558
/* if reach end of stream, then don't keep trying to get more */

0 commit comments

Comments
 (0)