-
Notifications
You must be signed in to change notification settings - Fork 52
test/testenv: let Docker CLI shut down dial-stdio gracefully #982
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,10 +11,8 @@ import ( | |||||||||||||||
| "net" | ||||||||||||||||
| "os" | ||||||||||||||||
| "os/exec" | ||||||||||||||||
| "runtime" | ||||||||||||||||
| "strings" | ||||||||||||||||
| "sync" | ||||||||||||||||
| "syscall" | ||||||||||||||||
| "testing" | ||||||||||||||||
| "time" | ||||||||||||||||
|
|
||||||||||||||||
|
|
@@ -56,6 +54,20 @@ func (b *BuildxEnv) WithBuilder(builder string) *BuildxEnv { | |||||||||||||||
| return b | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| // Close closes the underlying buildkit client connection, which triggers | ||||||||||||||||
| // cleanup of the dial-stdio process. | ||||||||||||||||
| func (b *BuildxEnv) Close() error { | ||||||||||||||||
| b.mu.Lock() | ||||||||||||||||
| defer b.mu.Unlock() | ||||||||||||||||
|
|
||||||||||||||||
| if b.client != nil { | ||||||||||||||||
| err := b.client.Close() | ||||||||||||||||
| b.client = nil | ||||||||||||||||
| return err | ||||||||||||||||
| } | ||||||||||||||||
| return nil | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| // Load loads the output of the specified [gwclient.BuildFunc] into the buildkit instance. | ||||||||||||||||
| func (b *BuildxEnv) Load(ctx context.Context, id string, f gwclient.BuildFunc) error { | ||||||||||||||||
| if b.refs == nil { | ||||||||||||||||
|
|
@@ -87,10 +99,7 @@ func (c *connCloseWrapper) Close() error { | |||||||||||||||
| if c.close != nil { | ||||||||||||||||
| c.close() | ||||||||||||||||
| } | ||||||||||||||||
| if err := c.Conn.Close(); err != nil { | ||||||||||||||||
| return err | ||||||||||||||||
| } | ||||||||||||||||
| return nil | ||||||||||||||||
| return c.Conn.Close() | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| func (b *BuildxEnv) dialStdio(ctx context.Context) error { | ||||||||||||||||
|
|
@@ -106,19 +115,10 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error { | |||||||||||||||
| // the buildx dial-stdio process from cleaning up its resources properly. | ||||||||||||||||
| cmd := exec.Command("docker", args...) | ||||||||||||||||
| cmd.Env = os.Environ() | ||||||||||||||||
| cmd.SysProcAttr = &syscall.SysProcAttr{ | ||||||||||||||||
| // Put the child in its own process group so we can kill the entire | ||||||||||||||||
| // group (docker + docker-buildx plugin) during cleanup. | ||||||||||||||||
| Setpgid: true, | ||||||||||||||||
| // Send SIGTERM to the child process when the parent (test process) dies. | ||||||||||||||||
| // This prevents dial-stdio processes from being orphaned when the test | ||||||||||||||||
| // suite is interrupted or crashes. | ||||||||||||||||
| Pdeathsig: syscall.SIGTERM, | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| c1, c2 := net.Pipe() | ||||||||||||||||
| cmd.Stdin = c1 | ||||||||||||||||
| cmd.Stdout = c1 | ||||||||||||||||
| dialStdioConn, clientConn := net.Pipe() | ||||||||||||||||
| cmd.Stdin = dialStdioConn | ||||||||||||||||
| cmd.Stdout = dialStdioConn | ||||||||||||||||
|
|
||||||||||||||||
| // Use a pipe to check when the connection is actually complete | ||||||||||||||||
| // Also write all of stderr to an error buffer so we can have more details | ||||||||||||||||
|
|
@@ -128,27 +128,17 @@ func (b *BuildxEnv) dialStdio(ctx context.Context) error { | |||||||||||||||
| ww := io.MultiWriter(w, errBuf) | ||||||||||||||||
| cmd.Stderr = ww | ||||||||||||||||
|
|
||||||||||||||||
| // processDone is closed when cmd.Wait() returns, signaling the cleanup | ||||||||||||||||
| if err := cmd.Start(); err != nil { | ||||||||||||||||
|
||||||||||||||||
| if err := cmd.Start(); err != nil { | |
| if err := cmd.Start(); err != nil { | |
| // Clean up all pipes if the command fails to start. | |
| _ = dialStdioConn.Close() | |
| _ = clientConn.Close() | |
| _ = w.Close() | |
| _ = r.Close() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot code review[agent] those are in-memory pipes and should get deallocated once there is no reference to them, so the explicit cleanup shouldn't be necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should do the cleanup here since this is not in func main()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are all in-memory and will be deallocated on error, there is no need for explicit cleanup. It would be different if we would use e.g. os.Pipe().
Copilot
AI
Mar 3, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After cmd.Start() succeeds, the dialer no longer has any context-based cancellation path (since it intentionally avoids exec.CommandContext). If ctx is cancelled while waiting for the dial-stdio connection handshake (later in this function), the docker/buildx process can keep running and the dial can hang. Consider wiring ctx.Done() into the startup/handshake flow and ensuring the process is signaled/killed and cmd.Wait() is unblocked on cancellation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm investigating that.
Copilot
AI
Mar 3, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The stderr-drain logic can be accidentally defeated if the io.PipeReader (r) is closed as soon as the dialer returns (e.g. via a defer r.Close()), because that makes the exec stderr copy goroutine stop early and can let the child block once its stderr pipe buffer fills. Consider keeping r open for the lifetime of the process and only closing it after cmd.Wait() completes (e.g., close r from this cmd.Wait() goroutine after w.CloseWithError).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
defer testEnv.Close()won’t run on the timeout path in the interrupt watcher goroutine, because that goroutine callsos.Exit(...)(which skips defers). If the suite hits that 30s timeout, dial-stdio can still be orphaned. Consider explicitly callingtestEnv.Close()(and possiblytp.Shutdown) just before theos.Exitin the timeout path.