Skip to content

Commit c974f84

Browse files
authored
Merge pull request #3398 from buildkite/ming/ps-906
PS-906: configurable kubernetes-bootstrap connection timeout
2 parents 3830805 + 47568cd commit c974f84

1 file changed

Lines changed: 20 additions & 3 deletions

File tree

clicommand/kubernetes_bootstrap.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ This command is used internally by Buildkite Kubernetes jobs. It is not
2828
intended to be used directly.`
2929

3030
type KubernetesBootstrapConfig struct {
31-
KubernetesContainerID int `cli:"kubernetes-container-id"`
31+
KubernetesContainerID int `cli:"kubernetes-container-id"`
32+
KubernetesBootstrapConnectionTimeout time.Duration `cli:"kubernetes-bootstrap-connection-timeout"`
3233

3334
// Global flags for debugging, etc
3435
LogLevel string `cli:"log-level"`
@@ -44,6 +45,13 @@ var KubernetesBootstrapCommand = cli.Command{
4445
Description: kubernetesBootstrapHelpDescription,
4546
Flags: []cli.Flag{
4647
KubernetesContainerIDFlag,
48+
cli.DurationFlag{
49+
Name: "kubernetes-bootstrap-connection-timeout",
50+
Usage: "This is intended to be used only by the Buildkite k8s stack " +
51+
"(github.com/buildkite/agent-stack-k8s); it set the max time a container will wait " +
52+
"to connect Agent.",
53+
EnvVar: "BUILDKITE_KUBERNETES_BOOTSTRAP_CONNECTION_TIMEOUT",
54+
},
4755

4856
// Global flags for debugging, etc
4957
DebugFlag,
@@ -52,6 +60,9 @@ var KubernetesBootstrapCommand = cli.Command{
5260
ProfileFlag,
5361
},
5462
Action: func(c *cli.Context) error {
63+
// kubernetes-bootstrap first register with the agent server container (the container that runs `buildkite-agent start`)
64+
// As part the process, it will gain a bunch of env vars.
65+
// After registration, it will run `buildkite-agent bootstrap`
5566
ctx := context.Background()
5667
ctx, cfg, l, _, done := setupLoggerAndConfig[KubernetesBootstrapConfig](ctx, c)
5768
defer done()
@@ -65,9 +76,15 @@ var KubernetesBootstrapCommand = cli.Command{
6576
// Registration passes down the env vars the agent normally sets on the
6677
// subprocess, but in this case the bootstrap is in a separate
6778
// container.
79+
timeoutDuration := 120 * time.Second
80+
if cfg.KubernetesBootstrapConnectionTimeout > 0 {
81+
timeoutDuration = cfg.KubernetesBootstrapConnectionTimeout
82+
}
83+
interval := 3 * time.Second
84+
maxAttempt := max(int(timeoutDuration.Seconds())/int(interval.Seconds()), 1)
6885
rtr := roko.NewRetrier(
69-
roko.WithMaxAttempts(7),
70-
roko.WithStrategy(roko.Exponential(2*time.Second, 0)),
86+
roko.WithMaxAttempts(maxAttempt),
87+
roko.WithStrategy(roko.Constant(interval)),
7188
)
7289
regResp, err := roko.DoFunc(ctx, rtr, func(rtr *roko.Retrier) (*kubernetes.RegisterResponse, error) {
7390
return socket.Connect(ctx)

0 commit comments

Comments
 (0)