Skip to content

Commit d56306a

Browse files
carlydfclaude
andauthored
Include cluster UID in CONTROLLER_IDENTITY to prevent cross-cluster conflicts (#309)
Completes the two-release migration for including the cluster namespace UID in the controller identity string. Previously, the new `{identity}/{namespaceUID}` format was prepared behind a "next release" comment while the bare `{identity}` string remained active. This PR flips that: `getControllerIdentity()` now returns the full `{identity}/{suffix}` format, and `getDeprecatedControllerIdentity()` handles reclamation of Worker Deployments previously claimed under the old format. **Changes:** - `getControllerIdentity()` now returns `{CONTROLLER_IDENTITY}/{CONTROLLER_IDENTITY_SUFFIX}`; returns empty string if either is unset - `getDeprecatedControllerIdentity()` (renamed from `getControllerIdentityWithNamespaceUID`) is used only for backward-compatible reclamation - Startup guard in `main()` fails fast if `CONTROLLER_IDENTITY` is unset - Fallback guard in `Reconcile()` for library users who bypass `main()` requires that `getControllerIdentity() != ""` - Safety check in `claimManagerIdentity()` refuses to call `SetManagerIdentity` with an empty string to prevent accidental field clearing - Renamed `ToBeDeprecatedDefaultControllerIdentity` → `DeprecatedDefaultControllerIdentity` - Updated tests to set both `IdentityEnvKey` and `IdentitySuffixEnvKey` --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 02e0483 commit d56306a

7 files changed

Lines changed: 48 additions & 18 deletions

File tree

cmd/main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ func main() {
121121
setupLog.Error(nil, "POD_NAMESPACE environment variable must be set")
122122
os.Exit(1)
123123
}
124+
125+
if os.Getenv(controller.IdentityEnvKey) == "" {
126+
setupLog.Error(nil, "CONTROLLER_IDENTITY environment variable must be set")
127+
os.Exit(1)
128+
}
129+
124130
var ns corev1.Namespace
125131
if err := mgr.GetAPIReader().Get(context.Background(), types.NamespacedName{Name: podNamespace}, &ns); err != nil {
126132
setupLog.Error(err, "unable to fetch namespace UID for controller identity suffix")

internal/controller/execplan.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,10 @@ func (r *TemporalWorkerDeploymentReconciler) shouldClaimManagerIdentity(vcfg *pl
188188
if existing == "" {
189189
return true // unclaimed
190190
}
191-
// In the next release, the namespace UID will be included in the controller identity.
192-
// To support smooth rollback, in this release, we will detect the future format and
193-
// treat that as a reclaimable claim.
194-
if existing == getControllerIdentityWithNamespaceUID() {
191+
192+
// Handle Worker Deployments that were controller-managed before we
193+
// started recording the cluster-UID in the manager identity
194+
if existing == getDeprecatedControllerIdentity() {
195195
return true
196196
}
197197
return false
@@ -205,6 +205,14 @@ func (r *TemporalWorkerDeploymentReconciler) claimManagerIdentity(
205205
vcfg *planner.VersionConfig,
206206
) error {
207207
identity := getControllerIdentity()
208+
if identity == "" {
209+
// Passing an empty identity to SetManagerIdentity clears the field on the
210+
// Worker Deployment, leaving it ownerless. Refuse rather than cause that.
211+
// This should never happen, but this is the extra fallback in case somehow
212+
// the check in main() and Reconcile() are not sufficient.
213+
return errors.New(fmt.Sprintf("%s and %s are not set; refusing to call SetManagerIdentity to avoid clearing the manager identity field",
214+
IdentityEnvKey, IdentitySuffixEnvKey))
215+
}
208216
resp, err := deploymentHandler.SetManagerIdentity(ctx, sdkclient.WorkerDeploymentSetManagerIdentityOptions{
209217
Self: true,
210218
ConflictToken: vcfg.ConflictToken,

internal/controller/suite_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ var testEnv *envtest.Environment
2929

3030
func TestMain(m *testing.M) {
3131
_ = os.Setenv(IdentityEnvKey, "test-controller-identity")
32+
_ = os.Setenv(IdentitySuffixEnvKey, "123")
3233
os.Exit(m.Run())
3334
}
3435

internal/controller/util.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,24 @@ func getControllerVersion() string {
5858
return "unknown"
5959
}
6060

61-
// getControllerIdentity returns the identity from environment variable (set by Helm).
62-
// Returns empty string if unset. main() enforces this at startup, but that check is
63-
// bypassed if the reconciler is used as a library (e.g. embedded in another controller
64-
// manager or in tests). An empty return means the env var was not set before starting.
65-
func getControllerIdentity() string {
61+
// getDeprecatedControllerIdentity is used for smooth identity reclamation when rolling
62+
// forward to the new identity format.
63+
func getDeprecatedControllerIdentity() string {
6664
if identity := os.Getenv(IdentityEnvKey); identity != "" {
6765
return identity
6866
}
69-
return defaults.ToBeDeprecatedDefaultControllerIdentity
67+
return defaults.DeprecatedDefaultControllerIdentity
7068
}
7169

72-
// getControllerIdentityWithNamespaceUID returns the identity which will be used in the
73-
// next release. Used in this release for smooth rollback identity reclamation.
74-
func getControllerIdentityWithNamespaceUID() string {
75-
return getControllerIdentity() + "/" + os.Getenv(IdentitySuffixEnvKey)
70+
// getControllerIdentity returns the identity with controller env var and namespace uid suffix.
71+
// Presence of both are ensured by main() and in Reconcile() for users of the controller as a library.
72+
func getControllerIdentity() string {
73+
id := os.Getenv(IdentityEnvKey)
74+
suffix := os.Getenv(IdentitySuffixEnvKey)
75+
if id != "" && suffix != "" {
76+
return id + "/" + suffix
77+
}
78+
return ""
7679
}
7780

7881
func GetControllerMaxDeploymentVersionsIneligibleForDeletion() int32 {

internal/controller/worker_controller.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,14 @@ func (r *TemporalWorkerDeploymentReconciler) Reconcile(ctx context.Context, req
135135

136136
l := log.FromContext(ctx)
137137

138+
// Fallback identity check for when the reconciler is used as a library and
139+
// main() is not in the call path. main() is kept as the primary check for
140+
// faster feedback in normal Helm-based deployments.
141+
if getControllerIdentity() == "" {
142+
return ctrl.Result{}, errors.New(fmt.Sprintf("%s and %s are not set",
143+
IdentityEnvKey, IdentitySuffixEnvKey))
144+
}
145+
138146
l.V(1).Info("Running Reconcile loop")
139147

140148
// Fetch the worker deployment

internal/defaults/defaults.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const (
1212
ServerMaxVersions = 100
1313
MaxVersionsIneligibleForDeletion = int32(ServerMaxVersions * 0.75)
1414

15-
// ToBeDeprecatedDefaultControllerIdentity will stop being used in the next release.
16-
ToBeDeprecatedDefaultControllerIdentity = "temporal-worker-controller"
15+
// DeprecatedDefaultControllerIdentity is no longer used, except to detect deployments previously
16+
// managed by this identity for clean reclamation with the new identity format.
17+
DeprecatedDefaultControllerIdentity = "temporal-worker-controller"
1718
)

internal/tests/internal/env_helpers.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ const (
4141
testDrainageRefreshInterval = time.Second
4242
testMaxVersionsIneligibleForDeletion = 5
4343
testMaxVersionsInDeployment = 6
44-
testControllerIdentity = "test-controller-identity"
44+
testControllerIdentityPrefix = "test-controller-identity"
45+
testControllerIdentitySuffix = "123"
46+
testControllerIdentity = testControllerIdentityPrefix + "/" + testControllerIdentitySuffix
4547
)
4648

4749
// setupKubebuilderAssets sets up the KUBEBUILDER_ASSETS environment variable if not already set
@@ -96,7 +98,8 @@ func getRepoRoot(t *testing.T) string {
9698
func setupTestEnvironment(t *testing.T) (*rest.Config, client.Client, manager.Manager, *clientpool.ClientPool, func()) {
9799
// Set faster reconcile interval for testing
98100
t.Setenv("RECONCILE_INTERVAL", "1s")
99-
t.Setenv(controller.IdentityEnvKey, testControllerIdentity)
101+
t.Setenv(controller.IdentityEnvKey, testControllerIdentityPrefix)
102+
t.Setenv(controller.IdentitySuffixEnvKey, testControllerIdentitySuffix)
100103
if kubeAssets := os.Getenv("KUBEBUILDER_ASSETS"); kubeAssets == "" {
101104
t.Skip("Skipping because KUBEBUILDER_ASSETS not set")
102105
}

0 commit comments

Comments
 (0)