@@ -529,9 +529,13 @@ func managedRollbackRestartTest(ctx context.Context, t *testing.T, info *define.
529529 // we expect ErrSkipGrace at this point, meaning that we finished installing but didn't wait for agent to become healthy
530530 require .ErrorIs (t , err , ErrSkipGrace , "managed upgrade failed with unexpected error" )
531531
532- // A few seconds after the upgrade, deliberately restart upgraded Agent a
533- // couple of times to simulate Agent crashing.
534- restartAgentNTimes (t , 3 , 10 * time .Second )
532+ installedAgentClient := from .NewClient ()
533+ targetVersion , err := to .ExecVersion (ctx )
534+ require .NoError (t , err , "failed to get target version" )
535+ restartContext , cancel := context .WithTimeout (t .Context (), 1 * time .Minute )
536+ defer cancel ()
537+ // restart the agent only if it matches the (upgraded) target version
538+ restartAgentVersion (restartContext , t , installedAgentClient , targetVersion .Binary , 10 * time .Second )
535539
536540 // wait for the agent to be healthy and correct version
537541 err = upgradetest .WaitHealthyAndVersion (ctx , from , startVersionInfo .Binary , 2 * time .Minute , 10 * time .Second , t )
@@ -657,42 +661,95 @@ func restartAgentNTimes(t *testing.T, noOfRestarts int, sleepBetweenIterations t
657661
658662 for restartIdx := 0 ; restartIdx < noOfRestarts ; restartIdx ++ {
659663 time .Sleep (sleepBetweenIterations )
664+ restartAgent (t , topPath , 5 * time .Minute )
665+ }
666+ }
660667
661- t .Logf ("Stopping agent via service to simulate crashing" )
662- err := install .StopService (topPath , install .DefaultStopTimeout , install .DefaultStopInterval )
663- if err != nil && runtime .GOOS == define .Windows && strings .Contains (err .Error (), "The service has not been started." ) {
664- // Due to the quick restarts every 10 seconds its possible that this is faster than Windows
665- // can handle. Decrementing restartIdx means that the loop will occur again.
666- t .Logf ("Got an allowed error on Windows: %s" , err )
667- err = nil
668+ func restartAgent (t * testing.T , topPath string , operationTimeout time.Duration ) {
669+ t .Logf ("Stopping agent via service to simulate crashing" )
670+ stopRequested := time .Now ()
671+ err := install .StopService (topPath , install .DefaultStopTimeout , install .DefaultStopInterval )
672+ if err != nil && runtime .GOOS == define .Windows && strings .Contains (err .Error (), "The service has not been started." ) {
673+ // Due to the quick restarts every sleepBetweenIterations its possible that this is faster than Windows
674+ // can handle. Decrementing restartIdx means that the loop will occur again.
675+ t .Logf ("Got an allowed error on Windows: %s" , err )
676+ err = nil
677+ }
678+ require .NoError (t , err )
679+
680+ // ensure that it's stopped before starting it again
681+ var status service.Status
682+ var statusErr error
683+ require .Eventuallyf (t , func () bool {
684+ status , statusErr = install .StatusService (topPath )
685+ if statusErr != nil {
686+ return false
668687 }
669- require .NoError (t , err )
688+ return status != service .StatusRunning
689+ }, operationTimeout , 500 * time .Millisecond , "service never fully stopped (status: %v): %s" , status , statusErr )
690+ t .Logf ("Stopped agent via service. Took roughly %s" , time .Since (stopRequested ))
691+
692+ // start it again
693+ t .Logf ("Starting agent via service to simulate crashing" )
694+ startRequested := time .Now ()
695+ err = install .StartService (topPath )
696+ require .NoError (t , err )
670697
671- // ensure that it's stopped before starting it again
672- var status service.Status
673- var statusErr error
674- require .Eventuallyf (t , func () bool {
675- status , statusErr = install .StatusService (topPath )
676- if statusErr != nil {
677- return false
678- }
679- return status != service .StatusRunning
680- }, 5 * time .Minute , 1 * time .Second , "service never fully stopped (status: %v): %s" , status , statusErr )
681- t .Logf ("Stopped agent via service to simulate crashing" )
698+ // ensure that it's started before next loop
699+ require .Eventuallyf (t , func () bool {
700+ status , statusErr = install .StatusService (topPath )
701+ if statusErr != nil {
702+ return false
703+ }
704+ return status == service .StatusRunning
705+ }, operationTimeout , 500 * time .Millisecond , "service never fully started (status: %v): %s" , status , statusErr )
706+ t .Logf ("Started agent after stopping to simulate crashing. Took roughly %s" , time .Since (startRequested ))
707+ }
682708
683- // start it again
684- t .Logf ("Starting agent via service to simulate crashing" )
685- err = install .StartService (topPath )
686- require .NoError (t , err )
709+ func restartAgentVersion (ctx context.Context , t * testing.T , client client.Client , targetVersion atesting.AgentBinaryVersion , restartInterval time.Duration ) {
710+ topPath := paths .Top ()
711+
712+ ticker := time .NewTicker (restartInterval )
713+ defer ticker .Stop ()
687714
688- // ensure that it's started before next loop
689- require .Eventuallyf (t , func () bool {
690- status , statusErr = install .StatusService (topPath )
691- if statusErr != nil {
692- return false
715+ for {
716+ select {
717+ case <- ctx .Done ():
718+ t .Log ("restart context is done, returning" )
719+ return
720+
721+ case <- ticker .C :
722+ if ! versionMatch (ctx , t , client , targetVersion ) {
723+ // version of running agent does not match the target, continue to the next iteration
724+ continue
693725 }
694- return status == service .StatusRunning
695- }, 5 * time .Minute , 1 * time .Second , "service never fully started (status: %v): %s" , status , statusErr )
696- t .Logf ("Started agent via service to simulate crashing" )
726+
727+ restartAgent (t , topPath , restartInterval )
728+ }
729+
730+ }
731+ }
732+
733+ func versionMatch (ctx context.Context , t * testing.T , c client.Client , targetVersion atesting.AgentBinaryVersion ) bool {
734+ err := c .Connect (ctx )
735+ if err != nil {
736+ t .Logf ("failed to connect to agent: %v" , err )
737+ return false
738+ }
739+ defer c .Disconnect ()
740+
741+ actualVersion , err := c .Version (ctx )
742+ if err != nil {
743+ t .Logf ("failed to detect agent version: %v" , err )
744+ return false
745+ }
746+
747+ if actualVersion .Version != targetVersion .Version ||
748+ actualVersion .Snapshot != targetVersion .Snapshot ||
749+ actualVersion .Commit != targetVersion .Commit ||
750+ actualVersion .Fips != targetVersion .Fips {
751+ t .Logf ("actual agent version %+v does not match target agent version %+v, skipping restart" , actualVersion , targetVersion )
752+ return false
697753 }
754+ return true
698755}
0 commit comments