Skip to content

Commit 5650e6a

Browse files
simplify keeper version conditions
1 parent 8e1a008 commit 5650e6a

8 files changed

Lines changed: 114 additions & 103 deletions

File tree

api/v1alpha1/conditions.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ const (
4040
ConditionReasonConfigReloadFailed ConditionReason = "ConfigReloadFailed"
4141
ConditionReasonConfigReloadPending ConditionReason = "ConfigReloadPending"
4242

43-
// ConditionTypeVersionInSync indicates that all replicas report the same version as the image.
43+
// ConditionTypeVersionInSync indicates that all replicas report the same version.
4444
ConditionTypeVersionInSync ConditionType = "VersionInSync"
4545
ConditionReasonVersionMatch ConditionReason = "VersionMatch"
4646
ConditionReasonVersionMismatch ConditionReason = "VersionMismatch"

docs/guides/configuration.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ Two conditions surface the result of the probe and the upgrade check:
627627

628628
| Condition | Reason | Meaning |
629629
|-------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
630-
| `VersionInSync` | `VersionMatch` | All replicas report the same version as the image |
630+
| `VersionInSync` | `VersionMatch` | All replicas report the same version |
631631
| `VersionInSync` | `VersionMismatch` | Replicas are running different versions. This reason is suppressed during a planned rolling upgrade. It typically surfaces when a mutable image tag has been pinned (for example `latest` or a bare major like `26.3`) and the underlying registry has shifted between pulls, so different replicas ended up on different patches of the same tag. |
632632
| `VersionInSync` | `VersionPending` | Version probe Job has not finished yet, or no Keeper replica version has been observed yet |
633633
| `VersionInSync` | `VersionProbeFailed` | ClickHouse probe Job failed; the operator cannot determine the running version |

internal/controller/clickhouse/sync.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,29 @@ func (r *clickhouseReconciler) reconcileVersionProbe(ctx context.Context, log ct
417417
if probeResult.Completed() {
418418
r.Cluster.Status.Version = probeResult.Version
419419
r.Cluster.Status.VersionProbeRevision = probeResult.Revision
420+
} else {
421+
reason := v1.ConditionReasonVersionPending
422+
message := "Version probe has not completed yet"
423+
424+
var event []chctrl.EventSpec
425+
if r.versionProbe.Err != nil {
426+
reason = v1.ConditionReasonVersionProbeFailed
427+
message = fmt.Sprintf("Version probe failed: %v", r.versionProbe.Err)
428+
event = []chctrl.EventSpec{{
429+
Type: corev1.EventTypeWarning,
430+
Reason: v1.EventReasonVersionProbeFailed,
431+
Action: v1.EventActionVersionCheck,
432+
Message: message,
433+
}}
434+
}
435+
436+
r.SetCondition(metav1.Condition{
437+
Type: v1.ConditionTypeVersionInSync,
438+
Status: metav1.ConditionUnknown,
439+
Reason: reason,
440+
Message: message,
441+
}, event...)
442+
meta.RemoveStatusCondition(r.Cluster.GetStatus().GetConditions(), v1.ConditionTypeVersionUpgraded)
420443
}
421444

422445
return chctrl.StepContinue(), nil
@@ -664,12 +687,12 @@ func (r *clickhouseReconciler) reconcileClusterRevisions(ctx context.Context, lo
664687
}
665688

666689
{
667-
cond, event := chctrl.GetVersionSyncCondition(r.versionProbe, replicaVersions, len(notUpdated) > 0)
690+
cond, event := chctrl.GetVersionSyncCondition(r.versionProbe.Version, replicaVersions, len(notUpdated) > 0)
668691
r.SetCondition(cond, event...)
669692
}
670693

671694
if r.Checker != nil {
672-
cond, event := chctrl.GetUpgradeCondition(*r.Checker, r.versionProbe, r.Cluster.Spec.UpgradeChannel)
695+
cond, event := chctrl.GetUpgradeCondition(*r.Checker, r.versionProbe.Version, r.Cluster.Spec.UpgradeChannel)
673696
r.SetCondition(cond, event...)
674697
} else {
675698
meta.RemoveStatusCondition(r.Cluster.GetStatus().GetConditions(), v1.ConditionTypeVersionUpgraded)

internal/controller/keeper/sync.go

Lines changed: 33 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"math"
88
"slices"
99
"strconv"
10-
"strings"
1110
"time"
1211

1312
"gopkg.in/yaml.v2"
@@ -560,7 +559,6 @@ func (r *keeperReconciler) evaluateReplicaConditions() {
560559
var errorIDs, notReadyIDs, notUpdatedIDs []string
561560

562561
replicasByMode := map[string][]v1.KeeperReplicaID{}
563-
replicaVersions := map[string]string{}
564562

565563
r.Cluster.Status.ReadyReplicas = 0
566564
for id, replica := range r.ReplicaState {
@@ -580,18 +578,12 @@ func (r *keeperReconciler) evaluateReplicaConditions() {
580578
if replica.HasDiff(r.revs) || !replica.Updated() {
581579
notUpdatedIDs = append(notUpdatedIDs, idStr)
582580
}
583-
584-
if replica.Status.Version != "" {
585-
replicaVersions[idStr] = replica.Status.Version
586-
}
587581
}
588582

589583
r.SetCondition(chctrl.ReplicaStartupCondition(errorIDs))
590584
r.SetCondition(chctrl.HealthyCondition(notReadyIDs))
591585
r.SetCondition(chctrl.ConfigSyncCondition(nil, notUpdatedIDs, nil))
592-
versionCond, versionEvents := keeperVersionSyncCondition(replicaVersions, len(notUpdatedIDs) > 0)
593-
r.SetCondition(versionCond, versionEvents...)
594-
r.evaluateUpgradeCondition(replicaVersions)
586+
r.evaluateVersionConditions(len(notUpdatedIDs) > 0)
595587

596588
// Ready condition — keeper-specific logic.
597589
exists := len(r.ReplicaState)
@@ -670,75 +662,51 @@ func (r *keeperReconciler) evaluateReplicaConditions() {
670662
)
671663
}
672664

673-
func (r *keeperReconciler) evaluateUpgradeCondition(replicaVersions map[string]string) {
674-
version, ok := keeperObservedVersion(replicaVersions)
675-
if ok {
676-
r.Cluster.Status.Version = version
677-
}
678-
679-
if r.Checker == nil || !ok {
680-
meta.RemoveStatusCondition(r.Cluster.GetStatus().GetConditions(), v1.ConditionTypeVersionUpgraded)
681-
return
682-
}
683-
684-
cond, event := chctrl.GetUpgradeCondition(*r.Checker, chctrl.VersionProbeResult{Version: version}, r.Cluster.Spec.UpgradeChannel)
685-
r.SetCondition(cond, event...)
686-
}
665+
func (r *keeperReconciler) evaluateVersionConditions(isUpdating bool) {
666+
versionByReplica := map[string]string{}
667+
distinct := map[string]struct{}{}
668+
observedVersion := ""
687669

688-
func keeperVersionSyncCondition(replicaVersions map[string]string, isUpdating bool) (metav1.Condition, []chctrl.EventSpec) {
689-
newCond := func(status metav1.ConditionStatus, reason v1.ConditionReason, message string) metav1.Condition {
690-
return metav1.Condition{
691-
Type: v1.ConditionTypeVersionInSync,
692-
Status: status,
693-
Reason: reason,
694-
Message: message,
670+
for id, s := range r.ReplicaState {
671+
if s.Status.Version != "" {
672+
versionByReplica[strconv.FormatInt(int64(id), 10)] = s.Status.Version
673+
distinct[s.Status.Version] = struct{}{}
674+
observedVersion = s.Status.Version
695675
}
696676
}
697677

698-
if len(replicaVersions) == 0 {
699-
return newCond(metav1.ConditionUnknown, v1.ConditionReasonVersionPending, "No Keeper replica version has been observed yet"), nil
678+
// Record the version only when all observed replicas agree, otherwise keep the last known one.
679+
if len(distinct) == 1 {
680+
r.Cluster.Status.Version = observedVersion
700681
}
701682

702-
var observed []string
703-
for id, version := range replicaVersions {
704-
observed = append(observed, fmt.Sprintf("%s: %s", id, version))
705-
}
683+
// No running replica has reported a version yet — report pending and skip evaluation.
684+
if len(versionByReplica) == 0 {
685+
r.SetCondition(metav1.Condition{
686+
Type: v1.ConditionTypeVersionInSync,
687+
Status: metav1.ConditionUnknown,
688+
Reason: v1.ConditionReasonVersionPending,
689+
Message: "No Keeper replica has reported a version yet",
690+
})
691+
meta.RemoveStatusCondition(r.Cluster.GetStatus().GetConditions(), v1.ConditionTypeVersionUpgraded)
706692

707-
if version, ok := keeperObservedVersion(replicaVersions); ok {
708-
return newCond(metav1.ConditionTrue, v1.ConditionReasonVersionMatch,
709-
"All observed Keeper replicas report version "+version), nil
693+
return
710694
}
711695

712-
slices.Sort(observed)
713-
cond := newCond(metav1.ConditionFalse, v1.ConditionReasonVersionMismatch,
714-
"Keeper replica versions differ: "+strings.Join(observed, ", "))
715-
716-
if isUpdating {
717-
return cond, nil
696+
version := r.Cluster.Status.Version
697+
if version == "" {
698+
version = observedVersion
718699
}
719700

720-
return cond, []chctrl.EventSpec{{
721-
Type: corev1.EventTypeWarning,
722-
Reason: v1.EventReasonVersionDiverge,
723-
Action: v1.EventActionVersionCheck,
724-
Message: cond.Message,
725-
}}
726-
}
727-
728-
func keeperObservedVersion(replicaVersions map[string]string) (string, bool) {
729-
var result string
730-
for _, version := range replicaVersions {
731-
if result == "" {
732-
result = version
733-
continue
734-
}
701+
cond, event := chctrl.GetVersionSyncCondition(version, versionByReplica, isUpdating)
702+
r.SetCondition(cond, event...)
735703

736-
if version != result {
737-
return "", false
738-
}
704+
if r.Checker != nil {
705+
cond, event = chctrl.GetUpgradeCondition(*r.Checker, version, r.Cluster.Spec.UpgradeChannel)
706+
r.SetCondition(cond, event...)
707+
} else {
708+
meta.RemoveStatusCondition(r.Cluster.GetStatus().GetConditions(), v1.ConditionTypeVersionUpgraded)
739709
}
740-
741-
return result, result != ""
742710
}
743711

744712
func (r *keeperReconciler) updateReplica(ctx context.Context, log ctrlutil.Logger, replicaID v1.KeeperReplicaID) (*ctrl.Result, error) {

internal/controller/keeper/sync_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,48 @@ var _ = Describe("Keeper version status", func() {
269269
Expect(cond.Status).To(Equal(metav1.ConditionFalse))
270270
Expect(cond.Reason).To(Equal(v1.ConditionReasonMinorUpdateAvailable))
271271
})
272+
273+
It("should check upgrades against the last known version while replicas differ", func(ctx context.Context) {
274+
log, rec, cancelEvents := setupReconciler()
275+
defer cancelEvents()
276+
277+
updater := upgrade.NewReleaseUpdater(&upgrade.StaticFetcher{Releases: map[string][]upgrade.ClickHouseVersion{
278+
upgrade.ChannelStable: {
279+
{Major: 25, Minor: 8, Patch: 2, Build: 1},
280+
{Major: 25, Minor: 8, Patch: 3, Build: 1},
281+
},
282+
}}, time.Hour, log)
283+
rec.Checker = upgrade.NewChecker(updater)
284+
rec.Cluster.Status.Version = "25.8.2.1"
285+
rec.ReplicaState = map[v1.KeeperReplicaID]replicaState{
286+
0: {Status: serverStatus{Version: "25.8.2.1"}},
287+
1: {Status: serverStatus{Version: "25.8.3.1"}},
288+
}
289+
290+
updCtx, cancel := context.WithCancel(ctx)
291+
defer cancel()
292+
293+
go func() {
294+
defer GinkgoRecover()
295+
296+
Expect(updater.Start(updCtx)).To(Succeed())
297+
}()
298+
299+
Eventually(updater.GetReleasesData).ShouldNot(BeNil())
300+
301+
rec.evaluateReplicaConditions()
302+
303+
Expect(rec.Cluster.Status.Version).To(Equal("25.8.2.1"))
304+
sync := meta.FindStatusCondition(rec.Cluster.Status.Conditions, v1.ConditionTypeVersionInSync)
305+
Expect(sync).NotTo(BeNil())
306+
Expect(sync.Status).To(Equal(metav1.ConditionFalse))
307+
Expect(sync.Reason).To(Equal(v1.ConditionReasonVersionMismatch))
308+
309+
upg := meta.FindStatusCondition(rec.Cluster.Status.Conditions, v1.ConditionTypeVersionUpgraded)
310+
Expect(upg).NotTo(BeNil())
311+
Expect(upg.Status).To(Equal(metav1.ConditionFalse))
312+
Expect(upg.Reason).To(Equal(v1.ConditionReasonMinorUpdateAvailable))
313+
})
272314
})
273315

274316
func mustGet[T client.Object](ctx context.Context, c client.Client, key types.NamespacedName) T {

internal/controller/upgradecheck.go

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
// Returns current condition and optional EventSpec that should be recorded if condition Status changed.
1616
func GetUpgradeCondition(
1717
checker upgrade.Checker,
18-
probe VersionProbeResult,
18+
version string,
1919
upgradeChannel string,
2020
) (metav1.Condition, []EventSpec) {
2121
newCond := func(status metav1.ConditionStatus, reason v1.ConditionReason, message string) metav1.Condition {
@@ -27,15 +27,7 @@ func GetUpgradeCondition(
2727
}
2828
}
2929

30-
if probe.Pending {
31-
return newCond(metav1.ConditionUnknown, v1.ConditionReasonVersionPending, "Version probe has not completed yet"), nil
32-
}
33-
34-
if probe.Err != nil {
35-
return newCond(metav1.ConditionUnknown, v1.ConditionReasonVersionProbeFailed, "Version probe failed"), nil
36-
}
37-
38-
result, err := checker.CheckUpdates(probe.Version, upgradeChannel)
30+
result, err := checker.CheckUpdates(version, upgradeChannel)
3931
if err != nil {
4032
return newCond(metav1.ConditionUnknown, v1.ConditionReasonUpgradeCheckFailed, fmt.Sprintf("Upgrade check failed: %v", err)), nil
4133
}
@@ -63,7 +55,7 @@ func GetUpgradeCondition(
6355

6456
case result.Outdated:
6557
reason = v1.ConditionReasonVersionOutdated
66-
message = "Current version " + probe.Version + " is out of support"
58+
message = "Current version " + version + " is out of support"
6759
default:
6860
return newCond(metav1.ConditionTrue, v1.ConditionReasonUpToDate, ""), nil
6961
}

internal/controller/versionprobe.go

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,11 @@ func (rm *ResourceManager) VersionProbe(
148148
return VersionProbeResult{Version: version, Revision: revision}, nil
149149
}
150150

151-
// GetVersionSyncCondition evaluates the VersionInSync condition based on the probe result and replica versions.
152-
// Returns current condition and optional EventSpec that should be recorded if condition Status changed.
151+
// GetVersionSyncCondition evaluates the VersionInSync condition by comparing the known
152+
// cluster version against the versions reported by replicas. The version must be known;
153+
// the "version unavailable" states are reported by the caller.
153154
func GetVersionSyncCondition(
154-
probe VersionProbeResult,
155+
version string,
155156
replicaVersions map[string]string,
156157
isUpdating bool,
157158
) (metav1.Condition, []EventSpec) {
@@ -164,25 +165,10 @@ func GetVersionSyncCondition(
164165
}
165166
}
166167

167-
if probe.Err != nil {
168-
message := fmt.Sprintf("Version probe failed: %v", probe.Err)
169-
170-
return newCond(metav1.ConditionUnknown, v1.ConditionReasonVersionProbeFailed, message), []EventSpec{{
171-
Type: corev1.EventTypeWarning,
172-
Reason: v1.EventReasonVersionProbeFailed,
173-
Action: v1.EventActionVersionCheck,
174-
Message: message,
175-
}}
176-
}
177-
178-
if probe.Pending {
179-
return newCond(metav1.ConditionUnknown, v1.ConditionReasonVersionPending, "Version probe has not completed yet"), nil
180-
}
181-
182168
var mismatched []string
183-
for id, version := range replicaVersions {
184-
if version != "" && version != probe.Version {
185-
mismatched = append(mismatched, fmt.Sprintf("%s: %s", id, version))
169+
for id, replicaVersion := range replicaVersions {
170+
if replicaVersion != "" && replicaVersion != version {
171+
mismatched = append(mismatched, fmt.Sprintf("%s: %s", id, replicaVersion))
186172
}
187173
}
188174

@@ -192,7 +178,7 @@ func GetVersionSyncCondition(
192178

193179
slices.Sort(mismatched)
194180
cond := newCond(metav1.ConditionFalse, v1.ConditionReasonVersionMismatch,
195-
fmt.Sprintf("Replica version doesn't match version probe %s: %s", probe.Version, strings.Join(mismatched, ", ")))
181+
fmt.Sprintf("Replica version doesn't match %s: %s", version, strings.Join(mismatched, ", ")))
196182

197183
if isUpdating {
198184
return cond, nil

test/e2e/keeper_e2e_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ var _ = Describe("Keeper controller", Label("keeper"), func() {
6060

6161
WaitKeeperUpdatedAndReady(ctx, &cr, 3*time.Minute, true)
6262
ExpectWithOffset(1, k8sClient.Get(ctx, cr.NamespacedName(), &cr)).To(Succeed())
63-
63+
Expect(cr.Status.Version).To(HavePrefix(cr.Spec.ContainerTemplate.Image.Tag))
6464
KeeperRWChecks(ctx, &cr, &checks)
6565
},
6666
Entry("update log level", v1.KeeperClusterSpec{Settings: v1.KeeperSettings{

0 commit comments

Comments
 (0)