@@ -240,15 +240,30 @@ func (g *Gateway) Run(ctx context.Context) error {
240240
241241 mgmt := initStartupMgmt (tlsConfig , mgmtHostPort , config .Username , config .Password )
242242
243+ // Use a startup-scoped context for the ping loop that is cancelled when
244+ // the gateway is asked to shut down. This ensures we don't hang during
245+ // startup if a SIGTERM arrives before the cluster is reachable, while
246+ // keeping the main ctx alive for cbauth, agent manager, and graceful
247+ // shutdown paths.
248+ startupCtx , startupCancel := context .WithCancel (ctx )
249+ go func () {
250+ select {
251+ case <- g .shutdownSig :
252+ startupCancel ()
253+ case <- startupCtx .Done ():
254+ }
255+ }()
256+
243257 var clusterUUID string
244258 var bootstrapNodeAddr string
245259 for {
246- currentUUID , nodeAddr , err := pingCouchbaseCluster (ctx , mgmt , config .Logger )
260+ currentUUID , nodeAddr , err := pingCouchbaseCluster (startupCtx , mgmt , config .Logger )
247261 if err != nil {
248262 config .Logger .Warn ("failed to ping cluster" , zap .Error (err ))
249263
250264 // if we are not in daemon mode, we just immediately return the error to the user
251265 if ! config .Daemon {
266+ startupCancel ()
252267 return err
253268 }
254269
@@ -257,8 +272,8 @@ func (g *Gateway) Run(ctx context.Context) error {
257272 config .Logger .Info ("sleeping before trying to ping cluster again" , zap .Duration ("period" , waitTime ))
258273 select {
259274 case <- time .After (waitTime ):
260- case <- ctx .Done ():
261- return ctx .Err ()
275+ case <- startupCtx .Done ():
276+ return startupCtx .Err ()
262277 }
263278
264279 continue
@@ -270,6 +285,9 @@ func (g *Gateway) Run(ctx context.Context) error {
270285 break
271286 }
272287
288+ // Startup ping is done, clean up the scoped context.
289+ startupCancel ()
290+
273291 authHostPort , err := mgmtHostPortToAuthHostPort (mgmtHostPort )
274292 if err != nil {
275293 config .Logger .Error ("failed to form auth host port" , zap .Error (err ))
0 commit comments