Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions test/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"os"
"os/signal"
"sync"
"syscall"
"testing"
"time"
Expand Down Expand Up @@ -74,8 +75,18 @@ func TestMain(m *testing.M) {
ctx, done := signal.NotifyContext(baseCtx, os.Interrupt)
baseCtx = ctx

closeTestEnv := sync.OnceFunc(func() {
if err := testEnv.Close(); err != nil {
fmt.Fprintln(os.Stderr, "Error closing test environment:", err)
}
})

go func() {
<-ctx.Done()

// Cleanup test env before restoring default signal handling, so we give a chance for a proper cleanup.
closeTestEnv()

// The context was cancelled due to interrupt
// This _should_ trigger builds to cancel naturally and exit the program,
// but in some cases it may not (due to timing, bugs in buildkit, uninteruptable operations, etc.).
Expand All @@ -96,11 +107,7 @@ func TestMain(m *testing.M) {
cancel()
}()

defer func() {
if err := testEnv.Close(); err != nil {
fmt.Fprintln(os.Stderr, "Error closing test environment:", err)
}
}()
defer closeTestEnv()

if err := testEnv.Load(ctx, phonyRef, fixtures.PhonyFrontend); err != nil {
panic(err)
Expand Down
126 changes: 79 additions & 47 deletions test/testenv/buildx.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package testenv

import (
"bufio"
"bytes"
"context"
"encoding/json"
Expand Down Expand Up @@ -91,12 +90,32 @@ func (b *BuildxEnv) supportsDialStdio(ctx context.Context) (bool, error) {

var errDialStdioNotSupported = errors.New("buildx dial-stdio not supported")

type connCloseWrapper struct {
// cmdConn wraps a net.Conn and replaces generic pipe errors with the
// actual error from the underlying command when it has exited.
type cmdConn struct {
net.Conn
close func()
close func()
cmdWait <-chan error
}

func (c *connCloseWrapper) Close() error {
func (c *cmdConn) Read(b []byte) (int, error) {
n, err := c.Conn.Read(b)
if err != nil {
// If the command has exited with an error, surface that instead
// of the generic pipe closed error.
select {
case cmdErr := <-c.cmdWait:
if cmdErr != nil {
return n, fmt.Errorf("%v: %w", cmdErr, err)
}
default:
Comment thread
invidian marked this conversation as resolved.
}
}

return n, err
}

func (c *cmdConn) Close() error {
if c.close != nil {
c.close()
}
Expand Down Expand Up @@ -126,68 +145,81 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error {
// the buildx dial-stdio process from cleaning up its resources properly.
cmd := exec.Command("docker", args...)
cmd.Env = os.Environ()
setSysProcAttr(cmd)

dialStdioConn, clientConn := net.Pipe()
cmd.Stdin = dialStdioConn
cmd.Stdout = dialStdioConn

// Use a pipe to check when the connection is actually complete
// Also write all of stderr to an error buffer so we can have more details
// in the error message when the command fails.
r, w := io.Pipe()
errBuf := bytes.NewBuffer(nil)
ww := io.MultiWriter(w, errBuf)
cmd.Stderr = ww
// Capture stderr so we can include it in error messages
// when the command fails.
var stderr bytes.Buffer
cmd.Stderr = &stderr
Comment thread
invidian marked this conversation as resolved.

// Use StdinPipe instead of setting cmd.Stdin directly.
// When cmd.Stdin is set to a non-*os.File (like net.Conn),
// exec creates an internal goroutine to copy data into the
// process's stdin pipe, and cmd.Wait blocks until that
// goroutine finishes. If the process exits immediately (e.g.
// bad arguments), the goroutine is stuck reading from
// dialStdioConn (nobody is writing yet), so cmd.Wait hangs.
// StdinPipe avoids the internal goroutine entirely.
stdinPipe, err := cmd.StdinPipe()
if err != nil {
_, _ = dialStdioConn.Close(), clientConn.Close()

return nil, fmt.Errorf("creating stdin pipe: %w", err)
Comment thread
invidian marked this conversation as resolved.
}

if err := cmd.Start(); err != nil {
Comment thread
invidian marked this conversation as resolved.
return nil, err
_, _, _ = stdinPipe.Close(), dialStdioConn.Close(), clientConn.Close()

if s := strings.TrimSpace(stderr.String()); s != "" {
return nil, fmt.Errorf("starting buildx dial-stdio: %s: %w", strings.TrimSpace(stderr.String()), err)
}

return nil, fmt.Errorf("starting buildx dial-stdio: %w", err)
}

// chWait is closed when cmd.Wait() returns, signaling the cleanup
// Copy client writes to the process's stdin.
// This goroutine stops when dialStdioConn is closed (read
// returns error) or stdinPipe is closed (write returns error).
go func() {
io.Copy(stdinPipe, dialStdioConn) //nolint:errcheck
stdinPipe.Close()
}()

// cmdWait is closed when cmd.Wait() returns, signaling the cleanup
// function that the process has exited.
chWait := make(chan struct{})
// waitErr is written before cmdWait is closed, so it is safe to
// read after receiving from cmdWait.
cmdWait := make(chan error, 1)
cmdDone := make(chan struct{})
go func() {
err := cmd.Wait()
close(chWait)
if stderr.Len() > 0 && err != nil {
err = fmt.Errorf("%v: %w", strings.TrimSpace(stderr.String()), err)
}
cmdWait <- err
close(cmdWait)
dialStdioConn.Close()
// pkgerrors.Wrap will return nil if err is nil, otherwise it will give
// us a wrapped error with the buffered stderr from the command.
w.CloseWithError(pkgerrors.Wrapf(err, "%s", errBuf))
close(cmdDone)
}()

defer r.Close()

scanner := bufio.NewScanner(r)
for scanner.Scan() {
txt := strings.ToLower(scanner.Text())

if strings.HasPrefix(txt, "#1 dialing builder") && strings.HasSuffix(txt, "done") {
go func() {
// Continue draining stderr so the process does not get blocked
_, _ = io.Copy(io.Discard, r)
}()
break
}
}
if err := scanner.Err(); err != nil {
return nil, err
}

out := &connCloseWrapper{
Conn: clientConn,
out := &cmdConn{
Conn: clientConn,
cmdWait: cmdWait,
close: sync.OnceFunc(func() {
// Close the stdin/stdout pipe to the process.
// This causes stdin EOF in buildx's dial-stdio, which triggers
// closeWrite(conn) on the buildkit connection and should start
// the chain reaction for docker CLI process to exit.
// Close the pipe to the process. This sends EOF on stdin
// (like Ctrl+D), which triggers closeWrite(conn) on the
// buildkit connection and starts the chain reaction for the
// docker CLI process to exit.
dialStdioConn.Close()

Comment thread
invidian marked this conversation as resolved.
select {
case <-chWait:
case <-cmdDone:
case <-time.After(10 * time.Second):
// Safety net: force kill if still running.
cmd.Process.Kill() //nolint:errcheck
<-chWait
killProcessGroup(cmd)
<-cmdWait
}
}),
}
Expand Down
20 changes: 20 additions & 0 deletions test/testenv/process_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//go:build !windows

package testenv

import (
"os/exec"
"syscall"
)

// setSysProcAttr puts the child process into its own process group so it does
// not receive SIGINT from the terminal on Ctrl+C.
func setSysProcAttr(cmd *exec.Cmd) {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
}

// killProcessGroup force-kills the entire process group (negative PID) since
// Setpgid puts the child and any subprocesses it spawns into their own group.
func killProcessGroup(cmd *exec.Cmd) {
syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) //nolint:errcheck
}
12 changes: 12 additions & 0 deletions test/testenv/process_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package testenv

import "os/exec"

// setSysProcAttr is a no-op on Windows; process-group isolation is not
// needed because Windows does not deliver console signals the same way.
func setSysProcAttr(cmd *exec.Cmd) {}

// killProcessGroup forcibly terminates the process on Windows.
func killProcessGroup(cmd *exec.Cmd) {
cmd.Process.Kill() //nolint:errcheck
}