temporalio
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎api/v1alpha1/worker_types.go‎
Lines changed: 47 additions & 22 deletions b/‎api/v1alpha1/worker_types.go‎
Lines changed: 47 additions & 22 deletions
diff --git a/‎docs/README.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/cd-rollouts.md‎
Lines changed: 209 additions & 0 deletions b/‎docs/cd-rollouts.md‎
Lines changed: 209 additions & 0 deletions
diff --git a/‎internal/controller/execplan.go‎
Lines changed: 13 additions & 2 deletions b/‎internal/controller/execplan.go‎
Lines changed: 13 additions & 2 deletions
@@ -104,6 +104,8 @@ See [docs/crd-management.md](docs/crd-management.md) for upgrade, rollback, and
 
 **New to deploying workers with this controller?** → Start with our [Migration Guide](docs/migration-to-versioned.md) to learn how to safely transition from traditional deployments.
 
+**Setting up CI/CD for steady-state rollouts?** → See the [CD Rollouts Guide](docs/cd-rollouts.md) for Helm, kubectl, ArgoCD, and Flux integration patterns.
+
 **Ready to dive deeper?** → Check out the [Architecture Guide](docs/architecture.md) to understand how the controller works, or the [Temporal Worker Versioning docs](https://docs.temporal.io/production-deployment/worker-deployments/worker-versioning) to learn about the underlying Temporal feature.
 
 **Need configuration help?** → See the [Configuration Reference](docs/configuration.md) for all available options.
@@ -145,6 +147,7 @@ The Temporal Worker Controller eliminates this operational overhead by automatin
 |----------|-------------|
 | [Migration Guide](docs/migration-to-versioned.md) | Step-by-step guide for migrating from traditional deployments |
 | [Reversion Guide](docs/migration-to-unversioned.md) | Step-by-step guide for migrating back to unversioned deployment |
+| [CD Rollouts](docs/cd-rollouts.md) | Helm, kubectl, ArgoCD, and Flux integration for steady-state rollouts |
 | [Architecture](docs/architecture.md) | Technical deep-dive into how the controller works |
 | [Configuration](docs/configuration.md) | Complete configuration reference |
 | [Concepts](docs/concepts.md) | Key concepts and terminology |
 
@@ -88,12 +88,24 @@ type TemporalWorkerDeploymentSpec struct {
 
 // Condition type constants for TemporalWorkerDeployment.
 const (
-	// ConditionTemporalConnectionHealthy indicates whether the referenced TemporalConnection
-	// resource exists and is properly configured.
+	// ConditionReady is True when the Temporal connection is reachable and the
+	// target version is the current version in Temporal. CD systems such as
+	// ArgoCD and Flux use this condition to gate deployment success.
+	ConditionReady = "Ready"
+
+	// ConditionProgressing is True while a rollout is actively in-flight —
+	// i.e., the target version has not yet been promoted to current.
+	ConditionProgressing = "Progressing"
+)
+
+// Deprecated condition type constants. Maintained for backward compatibility with
+// monitoring and automation built against v1.3.x. Use Ready and Progressing
+// instead. These will be removed in the next major version of the CRD.
+const (
+	// Deprecated: Use ConditionReady and ConditionProgressing instead.
 	ConditionTemporalConnectionHealthy = "TemporalConnectionHealthy"
 
-	// ConditionRolloutComplete indicates whether the target version has been successfully
-	// registered as the current version, completing the rollout.
+	// Deprecated: Use ConditionReady instead.
 	ConditionRolloutComplete = "RolloutComplete"
 )
 
@@ -104,33 +116,46 @@ const (
 // They should be treated as stable within an API version and renamed only with
 // a corresponding version bump.
 const (
-	// ReasonTemporalConnectionNotFound is set on ConditionTemporalConnectionHealthy
-	// when the referenced TemporalConnection resource cannot be found.
+	// ReasonRolloutComplete is set on ConditionReady=True and ConditionProgressing=False
+	// when the target version has been successfully registered as the current version.
+	ReasonRolloutComplete = "RolloutComplete"
+
+	// ReasonWaitingForPollers is set on ConditionProgressing=True when the target
+	// version's Kubernetes Deployment has been created but the version is not yet
+	// registered with Temporal (workers have not started polling yet).
+	ReasonWaitingForPollers = "WaitingForPollers"
+
+	// ReasonWaitingForPromotion is set on ConditionProgressing=True when the target
+	// version is registered with Temporal (Inactive) but has not yet been promoted
+	// to current or ramping.
+	ReasonWaitingForPromotion = "WaitingForPromotion"
+
+	// ReasonRamping is set on ConditionProgressing=True when the target version is
+	// the ramping version and is receiving a configured percentage of new workflows.
+	ReasonRamping = "Ramping"
+
+	// ReasonTemporalConnectionNotFound is set on ConditionProgressing=False when the
+	// referenced TemporalConnection resource cannot be found.
 	ReasonTemporalConnectionNotFound = "TemporalConnectionNotFound"
 
-	// ReasonAuthSecretInvalid is set on ConditionTemporalConnectionHealthy when the
-	// credential secret referenced by the TemporalConnection is misconfigured. This
-	// covers: (1) the secret reference has an empty name, (2) the named Kubernetes
-	// Secret cannot be fetched or has an unexpected type, and (3) the mTLS certificate
-	// in the secret is expired or about to expire.
+	// ReasonAuthSecretInvalid is set on ConditionProgressing=False when the credential
+	// secret referenced by the TemporalConnection is misconfigured. This covers:
+	// (1) the secret reference has an empty name, (2) the named Kubernetes Secret
+	// cannot be fetched or has an unexpected type, and (3) the mTLS certificate in
+	// the secret is expired or about to expire.
 	ReasonAuthSecretInvalid = "AuthSecretInvalid"
 
-	// ReasonTemporalClientCreationFailed is set on ConditionTemporalConnectionHealthy
-	// when the Temporal SDK client cannot connect to the server (dial failure or failed
-	// health check). The credentials were valid; the server itself is unreachable.
+	// ReasonTemporalClientCreationFailed is set on ConditionProgressing=False when the
+	// Temporal SDK client cannot connect to the server (dial failure or failed health
+	// check). The credentials were valid; the server itself is unreachable.
 	ReasonTemporalClientCreationFailed = "TemporalClientCreationFailed"
 
-	// ReasonTemporalStateFetchFailed is set on ConditionTemporalConnectionHealthy
-	// when the controller cannot query the current worker deployment state from Temporal.
+	// ReasonTemporalStateFetchFailed is set on ConditionProgressing=False when the
+	// controller cannot query the current worker deployment state from Temporal.
 	ReasonTemporalStateFetchFailed = "TemporalStateFetchFailed"
 
-	// ReasonTemporalConnectionHealthy is set on ConditionTemporalConnectionHealthy
-	// when the connection is reachable and the auth secret is resolved.
+	// Deprecated: Use ReasonRolloutComplete on ConditionReady instead.
 	ReasonTemporalConnectionHealthy = "TemporalConnectionHealthy"
-
-	// ReasonRolloutComplete is set on ConditionRolloutComplete when the target
-	// version has been successfully registered as the current version.
-	ReasonRolloutComplete = "RolloutComplete"
 )
 
 // VersionStatus indicates the status of a version.
 
@@ -11,6 +11,9 @@ This documentation structure is designed to support various types of technical d
 
 ## Index
 
+### [CD Rollouts](cd-rollouts.md)
+How to integrate the controller into Helm, kubectl, ArgoCD, and Flux pipelines for steady-state rollouts once you are already using Worker Versioning.
+
 ### [Architecture](architecture.md)
 High-level overview of the Temporal Worker Controller architecture.
 
 
@@ -0,0 +1,209 @@
+# CD Rollouts with the Temporal Worker Controller
+
+This guide describes patterns for integrating the Temporal Worker Controller into a CD pipeline, intended as guidance once you are already using Worker Versioning in steady state.
+
+> **Note:** The examples below illustrate common integration patterns but are not guaranteed to work verbatim with every version of each tool. API fields, configuration keys, and default behaviors change between releases. Always verify against the documentation for the specific tool you are using.
+
+For migration help, see [migration-to-versioned.md](migration-to-versioned.md).
+
+## Understanding the conditions
+
+The `TemporalWorkerDeployment` resource exposes two standard conditions on `status.conditions` that CD tools and scripts can consume.
+
+### `Ready`
+
+`Ready=True` means the controller successfully reached Temporal **and** the target version is the current version in Temporal. This is the primary signal that a rollout has finished and the worker is fully operational.
+
+`Ready=True` with reason `RolloutComplete` when the rollout has finished.
+
+`Ready=False` while either condition is not met. The `reason` field tells you why:
+
+| Reason | Meaning |
+|---|---|
+| `WaitingForPollers` | Target version's Deployment exists but workers haven't registered with Temporal yet |
+| `WaitingForPromotion` | Workers are registered (Inactive) but not yet promoted to Current |
+| `Ramping` | Progressive strategy is ramping traffic to the new version |
+| Error reasons (see Progressing below) | A blocking error is preventing progress |
+
+### `Progressing`
+
+`Progressing=True` means a rollout is actively in-flight and the controller is making forward progress. `Progressing=False` means either the rollout is done (`Ready=True`) or a blocking error is preventing progress.
+
+When `Progressing=False` due to an error, the `reason` field identifies what went wrong:
+
+| Reason | Meaning |
+|---|---|
+| `RolloutComplete` | Not an error — the rollout finished successfully |
+| `TemporalConnectionNotFound` | The referenced `TemporalConnection` resource doesn't exist |
+| `AuthSecretInvalid` | The credential secret is missing, malformed, or has an expired certificate |
+| `TemporalClientCreationFailed` | The controller can't reach the Temporal server (dial/health-check failure) |
+| `TemporalStateFetchFailed` | The controller reached the server but can't read the worker deployment state |
+| `PlanGenerationFailed` | Internal error generating the reconciliation plan |
+| `PlanExecutionFailed` | Internal error executing the plan (e.g., a Kubernetes API call failed) |
+
+Once the underlying problem is fixed, the next successful reconcile will restore `Progressing` and `Ready` to the correct state.
+
+## Triggering a rollout
+
+A rollout starts when you change the pod template in your `TemporalWorkerDeployment` spec — a changed pod spec produces a new Build ID, which the controller treats as a new version to roll out.
+
+With Helm (image tag update):
+
+```yaml
+# values.yaml
+image:
+  repository: my-registry/my-worker
+  tag: "v2.3.0"
+```
+
+```bash
+helm upgrade my-worker ./chart --values values.yaml
+```
+
+With a plain manifest:
+
+```yaml
+# twd.yaml
+spec:
+  template:
+    spec:
+      containers:
+        - name: worker
+          image: my-registry/my-worker:v2.3.0
+```
+
+```bash
+kubectl apply -f twd.yaml
+```
+
+The controller picks up the change on the next reconcile loop (within seconds) and begins the rollout.
+
+## kubectl
+
+`kubectl wait` can block a pipeline script until `Ready=True`:
+
+```bash
+kubectl apply -f twd.yaml
+kubectl wait temporalworkerdeployment/my-worker \
+  --for=condition=Ready \
+  --timeout=10m
+```
+
+Set `--timeout` to exceed the longest expected rollout time — for progressive strategies this is the sum of all `pauseDuration` values plus the time for workers to start and register. `kubectl wait` exits non-zero on timeout, which you can use to fail the pipeline.
+
+## Helm
+
+### Helm 4
+
+Helm 4 uses [kstatus](https://github.com/kubernetes-sigs/cli-utils/tree/master/pkg/kstatus) for its `--wait` implementation ([HIP-0022](https://helm.sh/community/hips/hip-0022/)). kstatus understands the standard Kubernetes conditions contract and should block until `Ready=True` on your `TemporalWorkerDeployment`:
+
+```bash
+helm upgrade my-worker ./chart --values values.yaml --wait --timeout 10m
+```
+
+> **Verify:** Check your Helm 4 release notes — kstatus behavior and the `--wait` flag semantics have evolved across point releases.
+
+### Helm 3
+
+Helm 3's `--wait` only covers a hardcoded set of native resource types (Deployments, StatefulSets, DaemonSets, Jobs, Pods) and does not inspect conditions on custom resources. A separate `kubectl wait` step is one approach:
+
+```bash
+helm upgrade my-worker ./chart --values values.yaml
+kubectl wait temporalworkerdeployment/my-worker \
+  --for=condition=Ready \
+  --timeout=10m \
+  --namespace my-namespace
+```
+
+## ArgoCD
+
+ArgoCD does not have a generic fallback that automatically checks `status.conditions` on unknown CRD types. For any resource whose group (`temporal.io`) is not in ArgoCD's built-in health check registry, ArgoCD silently skips that resource when computing application health. A [custom Lua health check](https://argo-cd.readthedocs.io/en/stable/operator-manual/health/) is the standard mechanism for teaching ArgoCD how to assess a CRD's health.
+
+The two standard conditions (`Ready`, `Progressing`) keep the Lua simple — it only needs to read the condition type and status, not any controller-specific status fields. The following script is a starting point; adapt it to your ArgoCD version and any site-specific requirements:
+
+```yaml
+# In your argocd-cm ConfigMap
+data:
+  resource.customizations.health.temporal.io_TemporalWorkerDeployment: |
+    local ready = nil
+    local progressing = nil
+    if obj.status ~= nil and obj.status.conditions ~= nil then
+      for _, c in ipairs(obj.status.conditions) do
+        if c.type == "Ready" then ready = c end
+        if c.type == "Progressing" then progressing = c end
+      end
+    end
+    if ready ~= nil and ready.status == "True" then
+      return {status = "Healthy", message = ready.message}
+    end
+    if progressing ~= nil then
+      if progressing.status == "True" then
+        return {status = "Progressing", message = progressing.message}
+      else
+        return {status = "Degraded", message = progressing.message}
+      end
+    end
+    return {status = "Progressing", message = "Waiting for conditions"}
+```
+
+With a health check like this in place:
+
+- ArgoCD shows **Healthy** once `Ready=True`.
+- ArgoCD shows **Progressing** while a rollout is in-flight (`Progressing=True`).
+- ArgoCD shows **Degraded** when progress is blocked (`Progressing=False` with an error reason).
+
+If you use [sync waves](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-waves/) and workers must be fully rolled out before a dependent service is updated, place the `TemporalWorkerDeployment` in an earlier wave.
+
+> **Verify:** ArgoCD's health customization API and Lua runtime have changed across versions. Test your health check script in a non-production environment before relying on it to gate sync waves.
+
+## Flux
+
+### Kustomization
+
+Flux's `Kustomization` controller uses kstatus to assess resource health. Because `TemporalWorkerDeployment` emits a standard `Ready` condition, Flux should treat it as healthy when `Ready=True`. Adding an explicit `healthChecks` entry makes the dependency visible and ensures Flux waits on the `TemporalWorkerDeployment` before marking the Kustomization as ready:
+
+```yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: my-workers
+  namespace: flux-system
+spec:
+  interval: 5m
+  path: ./workers
+  prune: true
+  sourceRef:
+    kind: GitRepository
+    name: my-repo
+  healthChecks:
+    - apiVersion: temporal.io/v1alpha1
+      kind: TemporalWorkerDeployment
+      name: my-worker
+      namespace: my-namespace
+  timeout: 10m
+```
+
+Set `timeout` to exceed the longest expected rollout duration.
+
+### HelmRelease
+
+Flux's `helm-controller` uses kstatus by default for post-install/post-upgrade health assessment, so a `HelmRelease` deploying your worker chart should automatically wait for `Ready=True` on any `TemporalWorkerDeployment` resources in the release:
+
+```yaml
+apiVersion: helm.toolkit.fluxcd.io/v2
+kind: HelmRelease
+metadata:
+  name: my-worker
+  namespace: flux-system
+spec:
+  interval: 5m
+  timeout: 10m   # should exceed the longest expected rollout
+  chart:
+    spec:
+      chart: ./chart
+      sourceRef:
+        kind: GitRepository
+        name: my-repo
+```
+
+> **Verify:** kstatus integration details and the `healthChecks` API have evolved across Flux releases. Check the Flux documentation for your version.
@@ -202,8 +202,11 @@ func (r *TemporalWorkerDeploymentReconciler) updateVersionConfig(ctx context.Con
 				"Failed to set buildID %q as current version: %v", vcfg.BuildID, err)
 			return fmt.Errorf("unable to set current deployment version: %w", err)
 		}
-		r.setCondition(workerDeploy, temporaliov1alpha1.ConditionRolloutComplete, metav1.ConditionTrue,
-			temporaliov1alpha1.ReasonRolloutComplete, fmt.Sprintf("Rollout complete for buildID %s", vcfg.BuildID))
+		// Update the in-memory status to reflect the promotion. The status was mapped
+		// from Temporal state before plan execution, so it is stale at this point.
+		// syncConditions (called at end of reconcile) derives Ready/Progressing from
+		// TargetVersion.Status, so it must be current to avoid a one-cycle lag.
+		workerDeploy.Status.TargetVersion.Status = temporaliov1alpha1.VersionStatusCurrent
 	} else {
 		if vcfg.RampPercentage > 0 {
 			l.Info("applying ramp", "buildID", vcfg.BuildID, "percentage", vcfg.RampPercentage)
@@ -222,6 +225,14 @@ func (r *TemporalWorkerDeploymentReconciler) updateVersionConfig(ctx context.Con
 				"Failed to set buildID %q as ramping version (%d%%): %v", vcfg.BuildID, vcfg.RampPercentage, err)
 			return fmt.Errorf("unable to set ramping deployment version: %w", err)
 		}
+		// Same reasoning as the SetCurrent path above: update the in-memory status
+		// so syncConditions sees the correct state on this reconcile cycle.
+		if vcfg.RampPercentage > 0 {
+			workerDeploy.Status.TargetVersion.Status = temporaliov1alpha1.VersionStatusRamping
+		}
+		// When RampPercentage == 0 we are clearing a stale ramp on a different build ID
+		// (see planner: "Reset ramp if needed"). The target version is already Current,
+		// so no in-memory status update is needed here.
 	}
 
 	if _, err := deploymentHandler.UpdateVersionMetadata(ctx, sdkclient.WorkerDeploymentUpdateVersionMetadataOptions{