Skip to content

Commit 147541e

Browse files
committed
Improvements to removeCheckoutDir
1 parent 87f0175 commit 147541e

File tree

4 files changed

+134
-24
lines changed

4 files changed

+134
-24
lines changed

internal/job/checkout.go

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"io/fs"
78
"os"
89
"path/filepath"
910
"slices"
@@ -50,47 +51,45 @@ func (e *Executor) configureHTTPSInsteadOfSSH(ctx context.Context) error {
5051
).Run(ctx, shell.ShowPrompt(false))
5152
}
5253

53-
func (e *Executor) removeCheckoutDir() error {
54+
func (e *Executor) removeCheckoutDir(ctx context.Context) error {
5455
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")
5556

5657
// on windows, sometimes removing large dirs can fail for various reasons
5758
// for instance having files open
5859
// see https://github.com/golang/go/issues/20841
59-
for range 10 {
60+
return roko.NewRetrier(
61+
roko.WithMaxAttempts(10),
62+
roko.WithStrategy(roko.Constant(10*time.Second)),
63+
).DoWithContext(ctx, func(r *roko.Retrier) error {
6064
e.shell.Commentf("Removing %s", checkoutPath)
61-
if err := os.RemoveAll(checkoutPath); err != nil {
62-
e.shell.Errorf("Failed to remove \"%s\" (%s)", checkoutPath, err)
63-
} else {
64-
if _, err := os.Stat(checkoutPath); os.IsNotExist(err) {
65-
return nil
66-
} else {
67-
e.shell.Errorf("Failed to remove %s", checkoutPath)
68-
}
69-
}
70-
e.shell.Commentf("Waiting 10 seconds")
71-
<-time.After(time.Second * 10)
72-
}
7365

74-
return fmt.Errorf("failed to remove %s", checkoutPath)
66+
if err := hardRemoveAll(checkoutPath); err != nil {
67+
e.shell.Errorf("Failed to remove %q (%s)", checkoutPath, err)
68+
return err
69+
}
70+
if _, err := os.Stat(checkoutPath); err == nil || !errors.Is(err, fs.ErrNotExist) {
71+
e.shell.Errorf("Failed to remove %q", checkoutPath)
72+
return err
73+
}
74+
return nil
75+
})
7576
}
7677

7778
func (e *Executor) createCheckoutDir() error {
7879
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")
7980

8081
if !osutil.FileExists(checkoutPath) {
8182
e.shell.Commentf("Creating \"%s\"", checkoutPath)
82-
// Actual file permissions will be reduced by umask, and won't be 0o777 unless the user has manually changed the umask to 000
83-
if err := os.MkdirAll(checkoutPath, 0o777); err != nil {
83+
// Actual file permissions will be reduced by umask, and won't be 0777
84+
// unless the user has manually changed the umask to 000
85+
if err := os.MkdirAll(checkoutPath, 0777); err != nil {
8486
return err
8587
}
8688
}
8789

8890
if e.shell.Getwd() != checkoutPath {
89-
if err := e.shell.Chdir(checkoutPath); err != nil {
90-
return err
91-
}
91+
return e.shell.Chdir(checkoutPath)
9292
}
93-
9493
return nil
9594
}
9695

@@ -112,7 +111,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
112111
// Remove the checkout directory if BUILDKITE_CLEAN_CHECKOUT is present
113112
if e.CleanCheckout {
114113
e.shell.Headerf("Cleaning pipeline checkout")
115-
if err = e.removeCheckoutDir(); err != nil {
114+
if err = e.removeCheckoutDir(ctx); err != nil {
116115
return err
117116
}
118117
}
@@ -202,7 +201,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
202201
// Checkout can fail because of corrupted files in the checkout which can leave the agent in a state where it
203202
// keeps failing. This removes the checkout dir, which means the next checkout will be a lot slower (clone vs
204203
// fetch), but hopefully will allow the agent to self-heal
205-
if err := e.removeCheckoutDir(); err != nil {
204+
if err := e.removeCheckoutDir(ctx); err != nil {
206205
e.shell.Warningf("Failed to remove checkout dir while cleaning up after a checkout error: %v", err)
207206
}
208207

@@ -221,7 +220,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
221220
e.shell.Warningf("Checkout failed! %s (%s)", err, r)
222221

223222
// If it's some kind of error that we don't know about, clean the checkout dir just to be safe
224-
if err := e.removeCheckoutDir(); err != nil {
223+
if err := e.removeCheckoutDir(ctx); err != nil {
225224
e.shell.Warningf("Failed to remove checkout dir while cleaning up after a checkout error: %v", err)
226225
}
227226

internal/job/remove_all_nonunix.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//go:build !unix
2+
3+
package job
4+
5+
import "os"
6+
7+
// hardRemoveAll only does more than os.RemoveAll on Unix-likes.
8+
func hardRemoveAll(path string) error {
9+
return os.RemoveAll(path)
10+
}

internal/job/remove_all_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//go:build unix
2+
3+
package job
4+
5+
import (
6+
"errors"
7+
"io/fs"
8+
"os"
9+
"path/filepath"
10+
"testing"
11+
)
12+
13+
func TestHardRemoveAll(t *testing.T) {
14+
container, err := os.MkdirTemp("", "TestHardRemoveAll")
15+
if err != nil {
16+
t.Fatalf("os.MkdirTemp(TestHardRemoveAll) error = %v", err)
17+
}
18+
t.Cleanup(func() { os.RemoveAll(container) }) // lol but if hardRemoveAll doesn't work...
19+
20+
dirA := filepath.Join(container, "a")
21+
dirB := filepath.Join(dirA, "b")
22+
fileC := filepath.Join(dirB, "c")
23+
if err := os.MkdirAll(dirB, 0o777); err != nil {
24+
t.Fatalf("os.MkdirAll(c%q, 0o777) = %v", dirB, err)
25+
}
26+
if err := os.WriteFile(fileC, []byte("hello!\n"), 0o664); err != nil {
27+
t.Fatalf("os.WriteFile(%q, hello!, 0o664) = %v", fileC, err)
28+
}
29+
30+
// break directory perms
31+
if err := os.Chmod(dirB, 0o666); err != nil {
32+
t.Fatalf("os.Chmod(%q, 0o666) = %v", dirB, err)
33+
}
34+
if err := os.Chmod(dirA, 0o444); err != nil {
35+
t.Fatalf("os.Chmod(%q, 0o444) = %v", dirA, err)
36+
}
37+
38+
if err := hardRemoveAll(dirA); err != nil {
39+
t.Errorf("hardRemoveAll(%q) = %v", dirA, err)
40+
}
41+
if _, err := os.Stat(dirA); !errors.Is(err, fs.ErrNotExist) {
42+
t.Errorf("os.Stat(%q) = %v, want %v", dirA, err, fs.ErrNotExist)
43+
}
44+
}

internal/job/remove_all_unix.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//go:build unix
2+
3+
package job
4+
5+
import (
6+
"errors"
7+
"os"
8+
"path/filepath"
9+
10+
"golang.org/x/sys/unix"
11+
)
12+
13+
// hardRemoveAll tries very hard to remove all items from the directory at path.
14+
// In addition to calling os.RemoveAll, it fixes missing +x bits on directories.
15+
func hardRemoveAll(path string) error {
16+
// Only try to fix the permissions on 10000 directories.
17+
// Any more and we could be here all day.
18+
for range 10000 {
19+
err := os.RemoveAll(path)
20+
if err == nil { // If os.RemoveAll worked, then exit early.
21+
return nil
22+
}
23+
// os.RemoveAll documents its only non-nil error as *os.PathError.
24+
pathErr, ok := err.(*os.PathError)
25+
if !ok {
26+
return err
27+
}
28+
29+
// Did we not have permission to open something within a directory?
30+
if pathErr.Err != unix.EACCES {
31+
return err
32+
}
33+
dir := filepath.Dir(pathErr.Path)
34+
35+
// Check that the EACCES was caused by mode on the directory.
36+
// (Note that this is a TOCTOU race, but we're not changing
37+
// owner uid/gid, and if something else is concurrently writing
38+
// files they can probably chmod +wx their files themselves)
39+
di, statErr := os.Lstat(dir)
40+
if statErr != nil {
41+
return statErr
42+
}
43+
if !di.IsDir() {
44+
return err
45+
}
46+
if unix.Faccessat(0, dir, unix.W_OK|unix.X_OK, unix.AT_EACCESS) != unix.EACCES {
47+
// Some other failure?
48+
return err
49+
}
50+
// Try to fix it with chmod +x dir
51+
if err := os.Chmod(dir, 0o777); err != nil {
52+
return err
53+
}
54+
// Now retry os.RemoveAll.
55+
}
56+
return errors.New("too many inaccessible directories")
57+
}

0 commit comments

Comments
 (0)