@@ -452,9 +452,13 @@ func managedRollbackRestartTest(ctx context.Context, t *testing.T, info *define.
452452 // we expect ErrSkipGrace at this point, meaning that we finished installing but didn't wait for agent to become healthy
453453 require .ErrorIs (t , err , ErrSkipGrace , "managed upgrade failed with unexpected error" )
454454
455- // A few seconds after the upgrade, deliberately restart upgraded Agent a
456- // couple of times to simulate Agent crashing.
457- restartAgentNTimes (t , 3 , 10 * time .Second )
455+ installedAgentClient := from .NewClient ()
456+ targetVersion , err := to .ExecVersion (ctx )
457+ require .NoError (t , err , "failed to get target version" )
458+ restartContext , cancel := context .WithTimeout (t .Context (), 1 * time .Minute )
459+ defer cancel ()
460+ // restart the agent only if it matches the (upgraded) target version
461+ restartAgentVersion (restartContext , t , installedAgentClient , targetVersion .Binary , 10 * time .Second )
458462
459463 // wait for the agent to be healthy and correct version
460464 err = upgradetest .WaitHealthyAndVersion (ctx , from , startVersionInfo .Binary , 2 * time .Minute , 10 * time .Second , t )
@@ -591,42 +595,95 @@ func restartAgentNTimes(t *testing.T, noOfRestarts int, sleepBetweenIterations t
591595
592596 for restartIdx := 0 ; restartIdx < noOfRestarts ; restartIdx ++ {
593597 time .Sleep (sleepBetweenIterations )
598+ restartAgent (t , topPath , 5 * time .Minute )
599+ }
600+ }
594601
595- t .Logf ("Stopping agent via service to simulate crashing" )
596- err := install .StopService (topPath , install .DefaultStopTimeout , install .DefaultStopInterval )
597- if err != nil && runtime .GOOS == define .Windows && strings .Contains (err .Error (), "The service has not been started." ) {
598- // Due to the quick restarts every 10 seconds its possible that this is faster than Windows
599- // can handle. Decrementing restartIdx means that the loop will occur again.
600- t .Logf ("Got an allowed error on Windows: %s" , err )
601- err = nil
602+ func restartAgent (t * testing.T , topPath string , operationTimeout time.Duration ) {
603+ t .Logf ("Stopping agent via service to simulate crashing" )
604+ stopRequested := time .Now ()
605+ err := install .StopService (topPath , install .DefaultStopTimeout , install .DefaultStopInterval )
606+ if err != nil && runtime .GOOS == define .Windows && strings .Contains (err .Error (), "The service has not been started." ) {
607+ // Due to the quick restarts every sleepBetweenIterations its possible that this is faster than Windows
608+ // can handle. Decrementing restartIdx means that the loop will occur again.
609+ t .Logf ("Got an allowed error on Windows: %s" , err )
610+ err = nil
611+ }
612+ require .NoError (t , err )
613+
614+ // ensure that it's stopped before starting it again
615+ var status service.Status
616+ var statusErr error
617+ require .Eventuallyf (t , func () bool {
618+ status , statusErr = install .StatusService (topPath )
619+ if statusErr != nil {
620+ return false
602621 }
603- require .NoError (t , err )
622+ return status != service .StatusRunning
623+ }, operationTimeout , 500 * time .Millisecond , "service never fully stopped (status: %v): %s" , status , statusErr )
624+ t .Logf ("Stopped agent via service. Took roughly %s" , time .Since (stopRequested ))
625+
626+ // start it again
627+ t .Logf ("Starting agent via service to simulate crashing" )
628+ startRequested := time .Now ()
629+ err = install .StartService (topPath )
630+ require .NoError (t , err )
604631
605- // ensure that it's stopped before starting it again
606- var status service.Status
607- var statusErr error
608- require .Eventuallyf (t , func () bool {
609- status , statusErr = install .StatusService (topPath )
610- if statusErr != nil {
611- return false
612- }
613- return status != service .StatusRunning
614- }, 5 * time .Minute , 1 * time .Second , "service never fully stopped (status: %v): %s" , status , statusErr )
615- t .Logf ("Stopped agent via service to simulate crashing" )
632+ // ensure that it's started before next loop
633+ require .Eventuallyf (t , func () bool {
634+ status , statusErr = install .StatusService (topPath )
635+ if statusErr != nil {
636+ return false
637+ }
638+ return status == service .StatusRunning
639+ }, operationTimeout , 500 * time .Millisecond , "service never fully started (status: %v): %s" , status , statusErr )
640+ t .Logf ("Started agent after stopping to simulate crashing. Took roughly %s" , time .Since (startRequested ))
641+ }
616642
617- // start it again
618- t .Logf ("Starting agent via service to simulate crashing" )
619- err = install .StartService (topPath )
620- require .NoError (t , err )
643+ func restartAgentVersion (ctx context.Context , t * testing.T , client client.Client , targetVersion atesting.AgentBinaryVersion , restartInterval time.Duration ) {
644+ topPath := paths .Top ()
645+
646+ ticker := time .NewTicker (restartInterval )
647+ defer ticker .Stop ()
621648
622- // ensure that it's started before next loop
623- require .Eventuallyf (t , func () bool {
624- status , statusErr = install .StatusService (topPath )
625- if statusErr != nil {
626- return false
649+ for {
650+ select {
651+ case <- ctx .Done ():
652+ t .Log ("restart context is done, returning" )
653+ return
654+
655+ case <- ticker .C :
656+ if ! versionMatch (ctx , t , client , targetVersion ) {
657+ // version of running agent does not match the target, continue to the next iteration
658+ continue
627659 }
628- return status == service .StatusRunning
629- }, 5 * time .Minute , 1 * time .Second , "service never fully started (status: %v): %s" , status , statusErr )
630- t .Logf ("Started agent via service to simulate crashing" )
660+
661+ restartAgent (t , topPath , restartInterval )
662+ }
663+
664+ }
665+ }
666+
667+ func versionMatch (ctx context.Context , t * testing.T , c client.Client , targetVersion atesting.AgentBinaryVersion ) bool {
668+ err := c .Connect (ctx )
669+ if err != nil {
670+ t .Logf ("failed to connect to agent: %v" , err )
671+ return false
672+ }
673+ defer c .Disconnect ()
674+
675+ actualVersion , err := c .Version (ctx )
676+ if err != nil {
677+ t .Logf ("failed to detect agent version: %v" , err )
678+ return false
679+ }
680+
681+ if actualVersion .Version != targetVersion .Version ||
682+ actualVersion .Snapshot != targetVersion .Snapshot ||
683+ actualVersion .Commit != targetVersion .Commit ||
684+ actualVersion .Fips != targetVersion .Fips {
685+ t .Logf ("actual agent version %+v does not match target agent version %+v, skipping restart" , actualVersion , targetVersion )
686+ return false
631687 }
688+ return true
632689}
0 commit comments