@@ -28,7 +28,8 @@ This command is used internally by Buildkite Kubernetes jobs. It is not
2828intended to be used directly.`
2929
3030type KubernetesBootstrapConfig struct {
31- KubernetesContainerID int `cli:"kubernetes-container-id"`
31+ KubernetesContainerID int `cli:"kubernetes-container-id"`
32+ KubernetesBootstrapConnectionTimeout time.Duration `cli:"kubernetes-bootstrap-connection-timeout"`
3233
3334 // Global flags for debugging, etc
3435 LogLevel string `cli:"log-level"`
@@ -44,6 +45,13 @@ var KubernetesBootstrapCommand = cli.Command{
4445 Description : kubernetesBootstrapHelpDescription ,
4546 Flags : []cli.Flag {
4647 KubernetesContainerIDFlag ,
48+ cli.DurationFlag {
49+ Name : "kubernetes-bootstrap-connection-timeout" ,
50+ Usage : "This is intended to be used only by the Buildkite k8s stack " +
51+ "(github.com/buildkite/agent-stack-k8s); it set the max time a container will wait " +
52+ "to connect Agent." ,
53+ EnvVar : "BUILDKITE_KUBERNETES_BOOTSTRAP_CONNECTION_TIMEOUT" ,
54+ },
4755
4856 // Global flags for debugging, etc
4957 DebugFlag ,
@@ -52,6 +60,9 @@ var KubernetesBootstrapCommand = cli.Command{
5260 ProfileFlag ,
5361 },
5462 Action : func (c * cli.Context ) error {
63+ // kubernetes-bootstrap first register with the agent server container (the container that runs `buildkite-agent start`)
64+ // As part the process, it will gain a bunch of env vars.
65+ // After registration, it will run `buildkite-agent bootstrap`
5566 ctx := context .Background ()
5667 ctx , cfg , l , _ , done := setupLoggerAndConfig [KubernetesBootstrapConfig ](ctx , c )
5768 defer done ()
@@ -65,9 +76,15 @@ var KubernetesBootstrapCommand = cli.Command{
6576 // Registration passes down the env vars the agent normally sets on the
6677 // subprocess, but in this case the bootstrap is in a separate
6778 // container.
79+ timeoutDuration := 120 * time .Second
80+ if cfg .KubernetesBootstrapConnectionTimeout > 0 {
81+ timeoutDuration = cfg .KubernetesBootstrapConnectionTimeout
82+ }
83+ interval := 3 * time .Second
84+ maxAttempt := max (int (timeoutDuration .Seconds ())/ int (interval .Seconds ()), 1 )
6885 rtr := roko .NewRetrier (
69- roko .WithMaxAttempts (7 ),
70- roko .WithStrategy (roko .Exponential ( 2 * time . Second , 0 )),
86+ roko .WithMaxAttempts (maxAttempt ),
87+ roko .WithStrategy (roko .Constant ( interval )),
7188 )
7289 regResp , err := roko .DoFunc (ctx , rtr , func (rtr * roko.Retrier ) (* kubernetes.RegisterResponse , error ) {
7390 return socket .Connect (ctx )
0 commit comments