Skip to content

Commit 3ae2e77

Browse files
committed
OCPBUGS-56913: Retry registration steps on service restart
Fix the bug where installConfig overrides and extra manifests are not applied when the service restarts after finding an existing cluster. Previously, the registerCluster() function would immediately return if a cluster already existed, skipping the steps to apply installConfig overrides and register extra manifests. This meant that if the service crashed or was restarted after cluster registration but before these steps completed, the configuration would be incomplete. Now, registerCluster() unconditionally calls both ApplyInstallConfigOverrides() and RegisterExtraManifests() after obtaining the cluster (whether newly created or existing). Since both functions are idempotent, this is safe to retry and ensures all configuration steps complete successfully. Add subsystem tests to verify: - Retry of installConfig overrides on restart (idempotent) - Application of missing overrides to existing cluster - Retry of extra manifest registration (idempotent) Assisted-by: Claude Code
1 parent 5249718 commit 3ae2e77

File tree

2 files changed

+83
-10
lines changed

2 files changed

+83
-10
lines changed

cmd/agentbasedinstaller/client/main.go

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
"github.com/kelseyhightower/envconfig"
3737
"github.com/openshift/assisted-service/client"
38+
"github.com/openshift/assisted-service/models"
3839
"github.com/openshift/assisted-service/pkg/auth"
3940
log "github.com/sirupsen/logrus"
4041
)
@@ -121,23 +122,37 @@ func registerCluster(ctx context.Context, log *log.Logger, bmInventory *client.A
121122
if err != nil {
122123
log.Fatal(err.Error())
123124
}
125+
126+
var modelsCluster *models.Cluster
124127
if existingCluster != nil {
125-
log.Infof("Skipping cluster registration. Found existing cluster with id: %s", existingCluster.ID.String())
126-
return existingCluster.ID.String()
127-
}
128+
log.Infof("Found existing cluster with id: %s", existingCluster.ID.String())
129+
modelsCluster = existingCluster
130+
} else {
131+
pullSecret, err := agentbasedinstaller.GetPullSecret(RegisterOptions.PullSecretFile)
132+
if err != nil {
133+
log.Fatal("Failed to get pull secret: ", err.Error())
134+
}
128135

129-
pullSecret, err := agentbasedinstaller.GetPullSecret(RegisterOptions.PullSecretFile)
130-
if err != nil {
131-
log.Fatal("Failed to get pull secret: ", err.Error())
136+
modelsCluster, err = agentbasedinstaller.RegisterCluster(ctx, log, bmInventory, pullSecret,
137+
RegisterOptions.ClusterDeploymentFile, RegisterOptions.AgentClusterInstallFile, RegisterOptions.ClusterImageSetFile,
138+
RegisterOptions.ReleaseImageMirror, RegisterOptions.OperatorInstallFile, false)
139+
if err != nil {
140+
log.Fatal("Failed to register cluster with assisted-service: ", err)
141+
}
132142
}
133143

134-
modelsCluster, err := agentbasedinstaller.RegisterCluster(ctx, log, bmInventory, pullSecret,
135-
RegisterOptions.ClusterDeploymentFile, RegisterOptions.AgentClusterInstallFile, RegisterOptions.ClusterImageSetFile,
136-
RegisterOptions.ReleaseImageMirror, RegisterOptions.OperatorInstallFile, false)
144+
// Apply installConfig overrides if present (idempotent)
145+
log.Info("Applying installConfig overrides...")
146+
updatedCluster, err := agentbasedinstaller.ApplyInstallConfigOverrides(ctx, log, bmInventory, modelsCluster, RegisterOptions.AgentClusterInstallFile)
137147
if err != nil {
138-
log.Fatal("Failed to register cluster with assisted-service: ", err)
148+
log.Fatal("Failed to apply installConfig overrides: ", err)
149+
}
150+
if updatedCluster != nil {
151+
modelsCluster = updatedCluster
139152
}
140153

154+
// Register extra manifests (idempotent)
155+
log.Info("Registering extra manifests...")
141156
err = agentbasedinstaller.RegisterExtraManifests(os.DirFS(RegisterOptions.ExtraManifests), ctx, log, bmInventory.Manifests, modelsCluster)
142157
if err != nil {
143158
log.Fatal("Failed to register extra manifests with assisted-service: ", err)

subsystem/agent_based_installer_client_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package subsystem
22

33
import (
44
"context"
5+
"os"
56

67
. "github.com/onsi/ginkgo"
78
. "github.com/onsi/gomega"
@@ -72,4 +73,61 @@ var _ = Describe("RegisterClusterAndInfraEnv", func() {
7273
Expect(registerClusterErr).To(HaveOccurred())
7374
Expect(modelCluster).To(BeNil())
7475
})
76+
77+
It("retry installConfig overrides on restart scenario", func() {
78+
// First registration with overrides
79+
modelCluster, registerClusterErr := agentbasedinstaller.RegisterCluster(ctx, log, utils_test.TestContext.UserBMClient, pullSecret,
80+
"../docs/hive-integration/crds/clusterDeployment.yaml",
81+
"../docs/hive-integration/crds/agentClusterInstall-with-installconfig-overrides.yaml",
82+
"../docs/hive-integration/crds/clusterImageSet.yaml", "", "", false)
83+
Expect(registerClusterErr).NotTo(HaveOccurred())
84+
Expect(modelCluster.InstallConfigOverrides).To(Equal(`{"fips": true}`))
85+
86+
// Simulate restart: Apply overrides again to existing cluster
87+
// This should be idempotent and not fail
88+
updatedCluster, err := agentbasedinstaller.ApplyInstallConfigOverrides(ctx, log, utils_test.TestContext.UserBMClient,
89+
modelCluster, "../docs/hive-integration/crds/agentClusterInstall-with-installconfig-overrides.yaml")
90+
91+
Expect(err).NotTo(HaveOccurred())
92+
// Should return nil because overrides are already correctly applied
93+
Expect(updatedCluster).To(BeNil())
94+
})
95+
96+
It("apply overrides when missing on existing cluster", func() {
97+
// Register cluster without overrides first
98+
modelCluster, registerClusterErr := agentbasedinstaller.RegisterCluster(ctx, log, utils_test.TestContext.UserBMClient, pullSecret,
99+
"../docs/hive-integration/crds/clusterDeployment.yaml",
100+
"../docs/hive-integration/crds/agentClusterInstall.yaml",
101+
"../docs/hive-integration/crds/clusterImageSet.yaml", "", "", false)
102+
Expect(registerClusterErr).NotTo(HaveOccurred())
103+
Expect(modelCluster.InstallConfigOverrides).To(BeEmpty())
104+
105+
// Now apply overrides (simulating a restart where overrides should be applied)
106+
updatedCluster, err := agentbasedinstaller.ApplyInstallConfigOverrides(ctx, log, utils_test.TestContext.UserBMClient,
107+
modelCluster, "../docs/hive-integration/crds/agentClusterInstall-with-installconfig-overrides.yaml")
108+
109+
Expect(err).NotTo(HaveOccurred())
110+
Expect(updatedCluster).NotTo(BeNil())
111+
Expect(updatedCluster.InstallConfigOverrides).To(Equal(`{"fips": true}`))
112+
})
113+
114+
It("retry extra manifests registration on restart", func() {
115+
// Register cluster first
116+
modelCluster, registerClusterErr := agentbasedinstaller.RegisterCluster(ctx, log, utils_test.TestContext.UserBMClient, pullSecret,
117+
"../docs/hive-integration/crds/clusterDeployment.yaml",
118+
"../docs/hive-integration/crds/agentClusterInstall.yaml",
119+
"../docs/hive-integration/crds/clusterImageSet.yaml", "", "", false)
120+
Expect(registerClusterErr).NotTo(HaveOccurred())
121+
122+
// Register extra manifests - this should be idempotent
123+
// First registration
124+
err := agentbasedinstaller.RegisterExtraManifests(os.DirFS("../docs/hive-integration/crds/extra-manifests"),
125+
ctx, log, utils_test.TestContext.UserBMClient.Manifests, modelCluster)
126+
Expect(err).NotTo(HaveOccurred())
127+
128+
// Retry registration (simulating restart) - should not fail
129+
err = agentbasedinstaller.RegisterExtraManifests(os.DirFS("../docs/hive-integration/crds/extra-manifests"),
130+
ctx, log, utils_test.TestContext.UserBMClient.Manifests, modelCluster)
131+
Expect(err).NotTo(HaveOccurred())
132+
})
75133
})

0 commit comments

Comments
 (0)