diff --git a/test/main_test.go b/test/main_test.go index 42f688c46..6b12b6529 100644 --- a/test/main_test.go +++ b/test/main_test.go @@ -96,6 +96,12 @@ func TestMain(m *testing.M) { cancel() }() + defer func() { + if err := testEnv.Close(); err != nil { + fmt.Fprintln(os.Stderr, "Error closing test environment:", err) + } + }() + if err := testEnv.Load(ctx, phonyRef, fixtures.PhonyFrontend); err != nil { panic(err) } diff --git a/test/testenv/buildx.go b/test/testenv/buildx.go index fdc205f44..57b22ee12 100644 --- a/test/testenv/buildx.go +++ b/test/testenv/buildx.go @@ -11,10 +11,8 @@ import ( "net" "os" "os/exec" - "runtime" "strings" "sync" - "syscall" "testing" "time" @@ -56,6 +54,20 @@ func (b *BuildxEnv) WithBuilder(builder string) *BuildxEnv { return b } +// Close closes the underlying buildkit client connection, which triggers +// cleanup of the dial-stdio process. +func (b *BuildxEnv) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + if b.client != nil { + err := b.client.Close() + b.client = nil + return err + } + return nil +} + // Load loads the output of the specified [gwclient.BuildFunc] into the buildkit instance. func (b *BuildxEnv) Load(ctx context.Context, id string, f gwclient.BuildFunc) error { if b.refs == nil { @@ -87,10 +99,7 @@ func (c *connCloseWrapper) Close() error { if c.close != nil { c.close() } - if err := c.Conn.Close(); err != nil { - return err - } - return nil + return c.Conn.Close() } func (b *BuildxEnv) dialStdio(ctx context.Context) error { @@ -106,19 +115,10 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error { // the buildx dial-stdio process from cleaning up its resources properly. cmd := exec.Command("docker", args...) cmd.Env = os.Environ() - cmd.SysProcAttr = &syscall.SysProcAttr{ - // Put the child in its own process group so we can kill the entire - // group (docker + docker-buildx plugin) during cleanup. - Setpgid: true, - // Send SIGTERM to the child process when the parent (test process) dies. - // This prevents dial-stdio processes from being orphaned when the test - // suite is interrupted or crashes. - Pdeathsig: syscall.SIGTERM, - } - c1, c2 := net.Pipe() - cmd.Stdin = c1 - cmd.Stdout = c1 + dialStdioConn, clientConn := net.Pipe() + cmd.Stdin = dialStdioConn + cmd.Stdout = dialStdioConn // Use a pipe to check when the connection is actually complete // Also write all of stderr to an error buffer so we can have more details @@ -128,27 +128,17 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error { ww := io.MultiWriter(w, errBuf) cmd.Stderr = ww - // processDone is closed when cmd.Wait() returns, signaling the cleanup + if err := cmd.Start(); err != nil { + return nil, err + } + + // chWait is closed when cmd.Wait() returns, signaling the cleanup // function that the process has exited. - processDone := make(chan struct{}) + chWait := make(chan struct{}) go func() { - // Lock this goroutine to its OS thread for the lifetime of the child process. - // Pdeathsig delivers the signal when the *thread* that forked the child exits, - // not when the process exits. Without locking, the Go runtime may destroy the - // thread that called cmd.Start(), prematurely delivering SIGTERM to the child. - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - if err := cmd.Start(); err != nil { - // Propagate the start error through the stderr pipe so the - // scanner below will surface it via scanner.Err(). - w.CloseWithError(err) - return - } - err := cmd.Wait() - close(processDone) - c1.Close() + close(chWait) + dialStdioConn.Close() // pkgerrors.Wrap will return nil if err is nil, otherwise it will give // us a wrapped error with the buffered stderr from the command. w.CloseWithError(pkgerrors.Wrapf(err, "%s", errBuf)) @@ -173,28 +163,26 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error { } out := &connCloseWrapper{ - Conn: c2, + Conn: clientConn, close: sync.OnceFunc(func() { - // Send 2 interrupt signals to the process to ensure it exits gracefully - // This is how buildx/docker plugins handle termination - - cmd.Process.Signal(os.Interrupt) //nolint:errcheck // We don't care about this error, we are going to send another one anyway - if err := cmd.Process.Signal(os.Interrupt); err != nil { - cmd.Process.Kill() //nolint:errcheck // Force kill if interrupt fails - } + // Close the stdin/stdout pipe to the process. + // This causes stdin EOF in buildx's dial-stdio, which triggers + // closeWrite(conn) on the buildkit connection and should start + // the chain reaction for docker CLI process to exit. + dialStdioConn.Close() select { - case <-processDone: + case <-chWait: case <-time.After(10 * time.Second): - // If it still doesn't exit, force kill - cmd.Process.Kill() //nolint:errcheck // Force kill if it doesn't exit after interrupt + // Safety net: force kill if still running. + cmd.Process.Kill() //nolint:errcheck + <-chWait } }), } return out, nil })) - if err != nil { return err } @@ -355,9 +343,7 @@ func (b *BuildxEnv) RunTest(ctx context.Context, t *testing.T, f TestFunc, opts t.Fatalf("%+v", err) } - var ( - ch chan *client.SolveStatus - ) + var ch chan *client.SolveStatus if cfg.SolveStatusFn != nil { ch = make(chan *client.SolveStatus, 1)