Skip to content

Commit d87490a

Browse files
committed
ING-780: Handle SIGINT only being received by PID 0 with --auto-restart.
1 parent cea7753 commit d87490a

1 file changed

Lines changed: 64 additions & 13 deletions

File tree

cmd/gateway/main.go

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -798,17 +798,19 @@ func startGateway() {
798798
gw.Shutdown()
799799
}
800800

801-
hasReceivedSigInt := false
801+
firstSigIntTime := time.Time{}
802802
for sig := range sigCh {
803803
switch sig {
804804
case syscall.SIGINT:
805-
if hasReceivedSigInt {
805+
if !firstSigIntTime.IsZero() && time.Since(firstSigIntTime) >= 500*time.Millisecond {
806806
logger.Info("Received SIGINT a second time, terminating...")
807807
os.Exit(1)
808808
return
809809
} else {
810810
logger.Info("Received SIGINT, attempting graceful shutdown...")
811-
hasReceivedSigInt = true
811+
if firstSigIntTime.IsZero() {
812+
firstSigIntTime = time.Now()
813+
}
812814
beginGracefulShutdown()
813815
}
814816
case syscall.SIGTERM:
@@ -835,27 +837,49 @@ func startGatewayWatchdog() {
835837
_, logger := getLogger()
836838
logger = logger.Named("watchdog")
837839

840+
logger.Info("watchdog starting", zap.Int("pid", os.Getpid()))
841+
838842
execProc := os.Args[0]
839843
execArgs := append([]string{"--auto-restart-proc"}, os.Args[1:]...)
840844

841-
hasReceivedSigInt := false
845+
interruptLock := sync.Mutex{}
846+
shouldShutdown := false
847+
interruptCh := make(chan os.Signal)
848+
842849
go func() {
843850
sigCh := make(chan os.Signal, 10)
844851
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
845852

853+
firstSigIntTime := time.Time{}
846854
for sig := range sigCh {
847855
switch sig {
848856
case syscall.SIGINT:
849-
if hasReceivedSigInt {
850-
logger.Info("received sigint a second time, terminating...")
857+
if !firstSigIntTime.IsZero() && time.Since(firstSigIntTime) >= 500*time.Millisecond {
858+
logger.Info("watchdog received sigint a second time, terminating...")
851859
os.Exit(1)
852860
return
853861
} else {
854-
logger.Info("received sigint, waiting for graceful shutdown...")
855-
hasReceivedSigInt = true
862+
logger.Info("watchdog received sigint, waiting for graceful shutdown...")
863+
if firstSigIntTime.IsZero() {
864+
firstSigIntTime = time.Now()
865+
}
866+
867+
interruptLock.Lock()
868+
if !shouldShutdown {
869+
shouldShutdown = true
870+
interruptLock.Unlock()
871+
close(interruptCh)
872+
} else {
873+
interruptLock.Unlock()
874+
}
856875
}
857876
case syscall.SIGTERM:
858-
logger.Info("received sigterm, waiting for graceful shutdown...")
877+
logger.Info("watchdog received sigterm, waiting for graceful shutdown...")
878+
interruptLock.Lock()
879+
if !shouldShutdown {
880+
interruptCh <- sig
881+
}
882+
interruptLock.Unlock()
859883
}
860884
}
861885
}()
@@ -870,21 +894,48 @@ func startGatewayWatchdog() {
870894
err := cmd.Start()
871895
if err != nil {
872896
logger.Info("failed to start sub-process", zap.Error(err))
897+
time.Sleep(5 * time.Second)
898+
continue
873899
}
874900

875-
err = cmd.Wait()
876-
if err != nil {
877-
logger.Info("sub-process exited with error", zap.Error(err))
901+
closedCh := make(chan error)
902+
go func() {
903+
closedCh <- cmd.Wait()
904+
}()
905+
906+
select {
907+
case err := <-closedCh:
908+
if err != nil {
909+
logger.Info("sub-process exited with error", zap.Error(err))
910+
}
911+
case sig := <-interruptCh:
912+
// if the channel is closed, we are shutting down which is SIGINT
913+
if sig == nil {
914+
sig = os.Interrupt
915+
}
916+
917+
logger.Info("forwarding signal to sub-process", zap.String("signal", sig.String()))
918+
_ = cmd.Process.Signal(sig)
919+
920+
err := <-closedCh
921+
if err != nil {
922+
logger.Info("sub-process exited after signal with error", zap.Error(err))
923+
}
878924
}
879925

880-
if hasReceivedSigInt {
926+
interruptLock.Lock()
927+
if shouldShutdown {
928+
interruptLock.Unlock()
881929
break
882930
}
931+
interruptLock.Unlock()
883932

884933
delayTime := 1 * time.Second
885934
logger.Info("crash detected, restarting", zap.Duration("delay", delayTime))
886935
time.Sleep(delayTime)
887936
}
937+
938+
logger.Info("watchdog exiting")
888939
}
889940

890941
func main() {

0 commit comments

Comments
 (0)