Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions agent/run_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/buildkite/agent/v3/api"
"github.com/buildkite/agent/v3/core"
"github.com/buildkite/agent/v3/internal/experiments"
"github.com/buildkite/agent/v3/internal/job"
"github.com/buildkite/agent/v3/internal/job/hook"
"github.com/buildkite/agent/v3/kubernetes"
"github.com/buildkite/agent/v3/logger"
Expand Down Expand Up @@ -357,6 +358,17 @@ One or more containers connected to the agent, but then stopped communicating wi
// Collect the finished process' exit status
exit.Status = r.process.WaitStatus().ExitStatus()

// The executor (bootstrap subprocess) returns ExitCodeSetupFailure
// (125) when it fails during setUp — e.g. DNS errors fetching
// secrets, shell creation failures, or Job API init errors — before
// the user's command ever runs. Map this to -1 so it is consistent
// with other agent-level "command never ran" failures and can be
// caught with automatic_retry on exit_status -1.
if exit.Status == job.ExitCodeSetupFailure {
exit.Status = -1
exit.SignalReason = SignalReasonProcessRunError
}

if ws := r.process.WaitStatus(); ws.Signaled() {
exit.Signal = process.SignalString(ws.Signal())
}
Expand Down
22 changes: 20 additions & 2 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ func New(conf ExecutorConfig) *Executor {
}
}

const (
// ExitCodeSetupFailure is used internally by the executor subprocess
// to signal that it failed during setUp (e.g. secret-fetch DNS errors,
// shell creation failures) before the user's command ran. The parent
// job runner maps this to exit_status -1 so that it is consistent
// with other "command never ran" agent-level failures.
ExitCodeSetupFailure = 125
)

// Run the job and return the exit code
func (e *Executor) Run(ctx context.Context) (exitCode int) {
// Create a context to use for cancelation of the job
Expand Down Expand Up @@ -188,10 +197,19 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) {
}
}

// Initialize the environment, a failure here will still call the tearDown
// Initialize the environment, a failure here will still call the tearDown.
// setUp can fail due to infrastructure errors (secret fetch, env init)
// or due to a user hook (environment hook) returning non-zero.
if err = e.setUp(ctx); err != nil {
e.shell.Errorf("Error setting up job executor: %v", err)
return shell.ExitCode(err)

// If the error is a typed ExitError (e.g. from a hook), preserve
// the hook's exit code. Otherwise it's an infra error — return
// ExitCodeSetupFailure so the parent can map it to -1.
if exitErr := new(shell.ExitError); errors.As(err, &exitErr) {
return exitErr.Code
}
return ExitCodeSetupFailure
}

// Execute the job phases in order
Expand Down