Skip to content

Commit c524373

Browse files
authored
apko: make apk cache safer for multi-writers (#1564)
Currently the cached files are `mv` to the final destination. While this works for a single process of `apko`, if there are multiple `apko` processes running, an existing file read can be invalidated if the file is later replaced (with another file with the same content). This PR changes to leverage symlink instead. Since file reads in Go by default will follow symlink at file open time, overwriting the symlink won't invalidate existing reads. We also check for the existence of the destination file before attempting to advertise the cached file, and we clean up the cached file if we didn't create a symlink at all. This is safe because there should be no way anyone else could read our version of the cache file. I've tested with both NFS and gcs-fuse FS. --------- Signed-off-by: Nghia Tran <[email protected]>
1 parent 3e63d01 commit c524373

File tree

4 files changed

+119
-37
lines changed

4 files changed

+119
-37
lines changed

pkg/apk/apk/cache.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828

2929
"go.opentelemetry.io/otel"
3030
"golang.org/x/sync/singleflight"
31+
32+
"chainguard.dev/apko/pkg/paths"
3133
)
3234

3335
type Cache struct {
@@ -385,10 +387,9 @@ func (t *cacheTransport) retrieveAndSaveFile(ctx context.Context, request *http.
385387

386388
// Now that we have the file has been written, rename to atomically populate
387389
// the cache
388-
if err := os.Rename(tmp.Name(), cacheFile); err != nil {
389-
return "", fmt.Errorf("unable to populate cache: %w", err)
390+
if err := paths.AdvertiseCachedFile(tmp.Name(), cacheFile); err != nil {
391+
return "", err
390392
}
391-
392393
return cacheFile, nil
393394
}
394395

pkg/apk/apk/implementation.go

+11-34
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import (
5353
"chainguard.dev/apko/pkg/apk/expandapk"
5454
apkfs "chainguard.dev/apko/pkg/apk/fs"
5555
"chainguard.dev/apko/pkg/apk/internal/tarfs"
56+
"chainguard.dev/apko/pkg/paths"
5657

5758
"github.com/chainguard-dev/clog"
5859
)
@@ -967,7 +968,7 @@ func (a *APK) cachePackage(ctx context.Context, pkg InstallablePackage, exp *exp
967968
ctlHex := hex.EncodeToString(exp.ControlHash)
968969
ctlDst := filepath.Join(cacheDir, ctlHex+".ctl.tar.gz")
969970

970-
if err := rename(exp.ControlFile, ctlDst); err != nil {
971+
if err := paths.AdvertiseCachedFile(exp.ControlFile, ctlDst); err != nil {
971972
return nil, err
972973
}
973974

@@ -976,7 +977,7 @@ func (a *APK) cachePackage(ctx context.Context, pkg InstallablePackage, exp *exp
976977
if exp.SignatureFile != "" {
977978
sigDst := filepath.Join(cacheDir, ctlHex+".sig.tar.gz")
978979

979-
if err := rename(exp.SignatureFile, sigDst); err != nil {
980+
if err := paths.AdvertiseCachedFile(exp.SignatureFile, sigDst); err != nil {
980981
return nil, err
981982
}
982983

@@ -986,7 +987,7 @@ func (a *APK) cachePackage(ctx context.Context, pkg InstallablePackage, exp *exp
986987
datHex := hex.EncodeToString(exp.PackageHash)
987988
datDst := filepath.Join(cacheDir, datHex+".dat.tar.gz")
988989

989-
if err := rename(exp.PackageFile, datDst); err != nil {
990+
if err := paths.AdvertiseCachedFile(exp.PackageFile, datDst); err != nil {
990991
return nil, err
991992
}
992993

@@ -998,7 +999,7 @@ func (a *APK) cachePackage(ctx context.Context, pkg InstallablePackage, exp *exp
998999

9991000
tarDst := strings.TrimSuffix(exp.PackageFile, ".gz")
10001001

1001-
if err := rename(exp.TarFile, tarDst); err != nil {
1002+
if err := paths.AdvertiseCachedFile(exp.TarFile, tarDst); err != nil {
10021003
return nil, err
10031004
}
10041005

@@ -1306,11 +1307,15 @@ func (a *APK) installPackage(ctx context.Context, pkg *Package, expanded *expand
13061307
log := clog.FromContext(ctx)
13071308
log.Infof("installing %s (%s)", pkg.Name, pkg.Version)
13081309

1310+
// We don't want to call `defer expanded.Close()` to to remove tempDir because our
1311+
// cached files are advertised by symlinks pointing into them.
1312+
//
1313+
// This is not a big deal because the temp files if not referred by
1314+
// a symlink will be cleaned up anyway.
1315+
13091316
ctx, span := otel.Tracer("go-apk").Start(ctx, "installPackage", trace.WithAttributes(attribute.String("package", pkg.Name)))
13101317
defer span.End()
13111318

1312-
defer expanded.Close()
1313-
13141319
var (
13151320
err error
13161321
installedFiles []tar.Header
@@ -1376,31 +1381,3 @@ func packageRefs(pkgs []*RepositoryPackage) []string {
13761381
}
13771382
return names
13781383
}
1379-
1380-
func rename(src, dst string) error {
1381-
if err := os.Rename(src, dst); err != nil {
1382-
return fmt.Errorf("renaming %s: %w", src, err)
1383-
}
1384-
1385-
// This feels dumb but I have a hunch that it's necessary when renaming
1386-
// a file on gcsfuse. We seem to get a "file not found" error when reading
1387-
// something immediately after renaming it.
1388-
if err := flush(dst); err != nil {
1389-
return fmt.Errorf("flushing %s: %w", dst, err)
1390-
}
1391-
1392-
return nil
1393-
}
1394-
1395-
func flush(filename string) error {
1396-
f, err := os.Open(filename)
1397-
if err != nil {
1398-
return err
1399-
}
1400-
1401-
if err := f.Sync(); err != nil {
1402-
return err
1403-
}
1404-
1405-
return f.Close()
1406-
}

pkg/paths/paths.go

+42
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
1+
// Copyright 2025 Chainguard, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
114
package paths
215

316
import (
17+
"errors"
18+
"fmt"
419
"os"
520
"path"
621
)
@@ -19,3 +34,30 @@ func ResolvePath(p string, includePaths []string) (string, error) {
1934
}
2035
return "", os.ErrNotExist
2136
}
37+
38+
// AdvertisedCachedFile will create a symlink at `dst` pointing to `src`.
39+
//
40+
// In the case that `dst` already exists, another process had already created the symlink
41+
// and we can safely move on. We will also perform a best-effort attempt to clean up the
42+
// unadvertised file at `src`.
43+
func AdvertiseCachedFile(src, dst string) error {
44+
// Check if the destination already exists.
45+
if _, err := os.Stat(dst); err == nil {
46+
// Since `src` is unadvertised, it is safe to remove it. Ideally we want this to succeeds,
47+
// but we don't want to fail a build just because we couldn't clean up. This will be
48+
// left for background clean up process based on age.
49+
_ = os.Remove(src)
50+
return nil
51+
}
52+
// Create the symlink.
53+
if err := os.Symlink(src, dst); err != nil {
54+
// Ignore already exists errors. We don't even want to do clean up here even when
55+
// the symlink is pointing somewhere elese, to avoid relying too much on file system
56+
// remantics/eventual consistency, etc.
57+
if errors.Is(err, os.ErrExist) {
58+
return nil
59+
}
60+
return fmt.Errorf("linking (cached) %s to %s: %w", src, dst, err)
61+
}
62+
return nil
63+
}

pkg/paths/paths_test.go

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright 2025 Chainguard, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
package paths
15+
16+
import (
17+
"os"
18+
"testing"
19+
)
20+
21+
func TestAdvertiseCachedFile(t *testing.T) {
22+
tmpDir := t.TempDir()
23+
src1 := tmpDir + "/src1.tmp"
24+
src2 := tmpDir + "/src2.tmp"
25+
content := "content"
26+
if err := os.WriteFile(src1, []byte(content), 0644); err != nil {
27+
t.Fatal(err)
28+
}
29+
if err := os.WriteFile(src2, []byte(content), 0644); err != nil {
30+
t.Fatal(err)
31+
}
32+
dst := tmpDir + "/target"
33+
t.Run("dst does not exists", func(t *testing.T) {
34+
if err := AdvertiseCachedFile(src1, dst); err != nil {
35+
t.Fatal(err)
36+
}
37+
dstContent, err := os.ReadFile(dst)
38+
if err != nil {
39+
t.Fatal(err)
40+
}
41+
if string(dstContent) != content {
42+
t.Fatalf("content mismatch: %s != %s", string(dstContent), content)
43+
}
44+
})
45+
46+
t.Run("dst exists", func(t *testing.T) {
47+
if err := AdvertiseCachedFile(src2, dst); err != nil {
48+
t.Fatal(err)
49+
}
50+
// check the symlink
51+
if l, err := os.Readlink(dst); err != nil {
52+
t.Fatal(err)
53+
} else if l != src1 {
54+
t.Fatalf("symlink should stay in tact: %s != %s", l, src2)
55+
}
56+
57+
// check that src2 is removed
58+
if _, err := os.Stat(src2); !os.IsNotExist(err) {
59+
t.Fatalf("src2 should be removed: %v", err)
60+
}
61+
})
62+
}

0 commit comments

Comments
 (0)