Skip to content

Commit 40b4937

Browse files
committed
Fix upgrade pause issue
Upgrade run resource can activate pause_after_each_upgrade_unit attribute which pauses after each unit within the upgrade group. The upgrade should also pause after before moving on to the next component type since a user might need to do some manual operation. The UI has this capability. Also, set the delay parameter in waitUpgradeForStatus only when needed: During upgrade execution, There are long stretches of time where polling NSX has no value as Edges are restarted/hosts are being configured/Manager is being restarted. However in different cases polling can start immediately. Signed-off-by: Kobi Samoray <[email protected]>
1 parent 809876e commit 40b4937

File tree

2 files changed

+26
-16
lines changed

2 files changed

+26
-16
lines changed

Diff for: nsxt/resource_nsxt_upgrade_precheck_acknowledge.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,7 @@ func getAcknowledgedPrecheckIDs(m interface{}) ([]string, error) {
137137
return result, err
138138
}
139139
for _, warning := range precheckWarnings {
140-
acked := *warning.Acked
141-
if acked {
140+
if warning.Acked != nil && *warning.Acked {
142141
result = append(result, *warning.Id)
143142
}
144143
}

Diff for: nsxt/resource_nsxt_upgrade_run.go

+25-14
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ var (
5757
// Default waiting setup in seconds
5858
defaultUpgradeStatusCheckInterval = 30
5959
defaultUpgradeStatusCheckTimeout = 3600
60-
defaultUpgradeStatusCheckDelay = 300
61-
defaultUpgradeStatusCheckMaxRetries = 100
60+
defaultUpgradeStatusCheckDelay = 30
61+
defaultUpgradeStatusCheckMaxRetries = 150
6262
)
6363

6464
var staticComponentUpgradeStatus = []string{
@@ -480,15 +480,11 @@ func prepareUpgrade(upgradeClientSet *upgradeClientSet, d *schema.ResourceData,
480480
upgradeClientSet.PlanClient.Pause()
481481
}
482482
//#nosec G601 Ignore implicit memory aliasing in for loop temporarily
483-
err = waitUpgradeForStatus(upgradeClientSet, &component, inFlightComponentUpgradeStatus, staticComponentUpgradeStatus)
483+
curStatus, err := waitUpgradeForStatus(upgradeClientSet, &component, inFlightComponentUpgradeStatus, staticComponentUpgradeStatus, false)
484484
if err != nil {
485485
return err
486486
}
487-
status, err = getUpgradeStatus(upgradeClientSet.StatusClient, &component)
488-
if err != nil {
489-
return err
490-
}
491-
if status.Status == model.ComponentUpgradeStatus_STATUS_SUCCESS {
487+
if curStatus == model.ComponentUpgradeStatus_STATUS_SUCCESS {
492488
return fmt.Errorf("unexpected status 'SUCCESS' for component '%s. Possibly there is a concurrent upgrade run'", component)
493489
}
494490

@@ -574,7 +570,11 @@ func getUpgradeStatus(statusClient upgrade.StatusSummaryClient, component *strin
574570
}
575571

576572
// Wait component upgrade status to become target status. Using nil component for overall upgrade status.
577-
func waitUpgradeForStatus(upgradeClientSet *upgradeClientSet, component *string, pending, target []string) error {
573+
func waitUpgradeForStatus(upgradeClientSet *upgradeClientSet, component *string, pending, target []string, doDelay bool) (string, error) {
574+
delay := time.Duration(0)
575+
if doDelay {
576+
delay = time.Duration(upgradeClientSet.Delay) * time.Second
577+
}
578578
stateConf := &resource.StateChangeConf{
579579
Pending: pending,
580580
Target: target,
@@ -597,7 +597,7 @@ func waitUpgradeForStatus(upgradeClientSet *upgradeClientSet, component *string,
597597
},
598598
Timeout: time.Duration(upgradeClientSet.Timeout) * time.Second,
599599
PollInterval: time.Duration(upgradeClientSet.Interval) * time.Second,
600-
Delay: time.Duration(upgradeClientSet.Delay) * time.Second,
600+
Delay: delay,
601601
NotFoundChecks: upgradeClientSet.MaxRetries,
602602
}
603603
statusI, err := stateConf.WaitForState()
@@ -608,9 +608,13 @@ func waitUpgradeForStatus(upgradeClientSet *upgradeClientSet, component *string,
608608
status := statusI.(*upgradeStatusAndDetail)
609609
statusDetail = fmt.Sprintf(" Current status: %s. Details: %s", status.Status, status.Detail)
610610
}
611-
return fmt.Errorf("failed to wait Upgrade to be %s: %v. %s", target, err, statusDetail)
611+
return "", fmt.Errorf("failed to wait Upgrade to be %s: %v. %s", target, err, statusDetail)
612612
}
613-
return nil
613+
status := ""
614+
if statusI != nil {
615+
status = statusI.(*upgradeStatusAndDetail).Status
616+
}
617+
return status, nil
614618
}
615619

616620
func updateUpgradeUnitGroups(upgradeClientSet *upgradeClientSet, d *schema.ResourceData, component string, preResetGroupList model.UpgradeUnitGroupListResult, hasVLCM *bool) error {
@@ -825,10 +829,12 @@ func runUpgrade(upgradeClientSet *upgradeClientSet, partialUpgradeMap map[string
825829
// there is a period that overall status is still IN_PROGRESS, which will prevent us to start the upgrade of next component.
826830
// Wait here for the overall status become stable. Because there is potential upgrade triggered before, we wait here also
827831
// for the first component for safety.
828-
err = waitUpgradeForStatus(upgradeClientSet, nil, inFlightComponentUpgradeStatus, staticComponentUpgradeStatus)
832+
_, err = waitUpgradeForStatus(upgradeClientSet, nil, inFlightComponentUpgradeStatus, staticComponentUpgradeStatus, false)
829833
if err != nil {
830834
return err
831835
}
836+
837+
// Retrieve the component status - this is different from the general status which is fetched above
832838
status, err = getUpgradeStatus(upgradeClientSet.StatusClient, &component)
833839
if err != nil {
834840
return err
@@ -880,10 +886,15 @@ func runUpgrade(upgradeClientSet *upgradeClientSet, partialUpgradeMap map[string
880886
if err != nil {
881887
return err
882888
}
883-
err = waitUpgradeForStatus(upgradeClientSet, &component, pendingStatus, targetStatus)
889+
curStatus, err := waitUpgradeForStatus(upgradeClientSet, &component, pendingStatus, targetStatus, true)
884890
if err != nil {
885891
return err
886892
}
893+
894+
if partialUpgradeMap[component] && curStatus == model.ComponentUpgradeStatus_STATUS_SUCCESS {
895+
log.Printf("[INFO] Upgrade of component %s is complete, pausing", component)
896+
return nil
897+
}
887898
log.Print(completeLog)
888899
}
889900
return nil

0 commit comments

Comments
 (0)