Skip to content

Commit 78268be

Browse files
authored
Merge pull request #111 from kerthcet/fix/updatedReplicas
Change the semantics of leaderWorkerSet Replicas
2 parents 3ad6d2d + 83af7f7 commit 78268be

File tree

5 files changed

+59
-71
lines changed

5 files changed

+59
-71
lines changed

api/leaderworkerset/v1/leaderworkerset_types.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,13 @@ type LeaderWorkerSetStatus struct {
175175
// Conditions track the condition of the leaderworkerset.
176176
Conditions []metav1.Condition `json:"conditions,omitempty"`
177177

178-
// ReadyReplicas track the number of groups that are in ready state.
178+
// ReadyReplicas track the number of groups that are in ready state (updated or not).
179179
ReadyReplicas int32 `json:"readyReplicas,omitempty"`
180180

181-
// UpdatedReplicas track the number of groups that have been updated.
181+
// UpdatedReplicas track the number of groups that have been updated (ready or not).
182182
UpdatedReplicas int32 `json:"updatedReplicas,omitempty"`
183183

184-
// Replicas track the active total number of groups.
184+
// Replicas track the total number of groups that have been created (updated or not, ready or not)
185185
Replicas int32 `json:"replicas,omitempty"`
186186

187187
// HPAPodSelector for pods that belong to the LeaderWorkerSet object, this is

config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml

+4-3
Original file line numberDiff line numberDiff line change
@@ -15392,16 +15392,17 @@ spec:
1539215392
type: string
1539315393
readyReplicas:
1539415394
description: ReadyReplicas track the number of groups that are in
15395-
ready state.
15395+
ready state (updated or not).
1539615396
format: int32
1539715397
type: integer
1539815398
replicas:
15399-
description: Replicas track the active total number of groups.
15399+
description: Replicas track the total number of groups that have been
15400+
created (updated or not, ready or not)
1540015401
format: int32
1540115402
type: integer
1540215403
updatedReplicas:
1540315404
description: UpdatedReplicas track the number of groups that have
15404-
been updated.
15405+
been updated (ready or not).
1540515406
format: int32
1540615407
type: integer
1540715408
type: object

pkg/controllers/leaderworkerset_controller.go

+17-19
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l
354354
}
355355

356356
updateStatus := false
357-
readyCount := 0
358-
updatedCount := 0
357+
readyCount, updatedCount, updatedAndReadyCount := 0, 0, 0
359358
templateHash := utils.LeaderWorkerTemplateHash(lws)
360359

361360
// Iterate through all statefulsets.
@@ -364,22 +363,21 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l
364363
continue
365364
}
366365

367-
// this is the worker statefulset.
368-
if statefulsetutils.StatefulsetReady(sts) {
369-
370-
// the worker pods are OK.
371-
// need to check leader pod for this group.
372-
var leaderPod corev1.Pod
373-
if err := r.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: sts.Name}, &leaderPod); err != nil {
374-
log.Error(err, "Fetching leader pod")
375-
return false, err
376-
}
377-
if podutils.PodRunningAndReady(leaderPod) {
378-
readyCount++
366+
var leaderPod corev1.Pod
367+
if err := r.Get(ctx, client.ObjectKey{Namespace: lws.Namespace, Name: sts.Name}, &leaderPod); err != nil {
368+
log.Error(err, "Fetching leader pod")
369+
return false, err
370+
}
379371

380-
if sts.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash && leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash {
381-
updatedCount++
382-
}
372+
var ready bool
373+
if statefulsetutils.StatefulsetReady(sts) && podutils.PodRunningAndReady(leaderPod) {
374+
ready = true
375+
readyCount++
376+
}
377+
if sts.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash && leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash {
378+
updatedCount++
379+
if ready {
380+
updatedAndReadyCount++
383381
}
384382
}
385383
}
@@ -394,7 +392,7 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l
394392
updateStatus = true
395393
}
396394

397-
condition := makeCondition(updatedCount == int(*lws.Spec.Replicas))
395+
condition := makeCondition(updatedAndReadyCount == int(*lws.Spec.Replicas))
398396
updateCondition := setCondition(lws, condition)
399397
// if condition changed, record events
400398
if updateCondition {
@@ -416,7 +414,7 @@ func (r *LeaderWorkerSetReconciler) updateStatus(ctx context.Context, lws *leade
416414
}
417415

418416
// retrieve the current number of replicas -- the number of leaders
419-
replicas := int(*sts.Spec.Replicas)
417+
replicas := int(sts.Status.Replicas)
420418
if lws.Status.Replicas != int32(replicas) {
421419
lws.Status.Replicas = int32(replicas)
422420
updateStatus = true

test/integration/controllers/leaderworkerset_test.go

+22-42
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/onsi/ginkgo/v2"
2323
"github.com/onsi/gomega"
2424
appsv1 "k8s.io/api/apps/v1"
25+
autoscalingv1 "k8s.io/api/autoscaling/v1"
2526
v1 "k8s.io/api/autoscaling/v1"
2627
corev1 "k8s.io/api/core/v1"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -91,12 +92,10 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
9192
updates: []*update{
9293
{
9394
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
94-
var leaderworkerset leaderworkerset.LeaderWorkerSet
95-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderworkerset)).To(gomega.Succeed())
96-
testing.UpdateReplicaCount(ctx, k8sClient, &leaderworkerset, int32(3))
95+
testing.UpdateReplicaCount(ctx, k8sClient, lws, int32(3))
9796
var leaderSts appsv1.StatefulSet
98-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: leaderworkerset.Name, Namespace: leaderworkerset.Namespace}, &leaderSts)).To(gomega.Succeed())
99-
gomega.Expect(testing.CreateLeaderPods(ctx, leaderSts, k8sClient, &leaderworkerset, 2, 3)).To(gomega.Succeed())
97+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderSts)).To(gomega.Succeed())
98+
gomega.Expect(testing.CreateLeaderPods(ctx, leaderSts, k8sClient, lws, 2, 3)).To(gomega.Succeed())
10099
},
101100
checkLWSState: func(deployment *leaderworkerset.LeaderWorkerSet) {
102101
testing.ExpectValidReplicasCount(ctx, deployment, 3, k8sClient)
@@ -113,12 +112,10 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
113112
updates: []*update{
114113
{
115114
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
116-
var leaderworkerset leaderworkerset.LeaderWorkerSet
117-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderworkerset)).To(gomega.Succeed())
118-
testing.UpdateReplicaCount(ctx, k8sClient, &leaderworkerset, int32(3))
115+
testing.UpdateReplicaCount(ctx, k8sClient, lws, int32(3))
119116
var leaderSts appsv1.StatefulSet
120-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: leaderworkerset.Name, Namespace: leaderworkerset.Namespace}, &leaderSts)).To(gomega.Succeed())
121-
testing.DeleteLeaderPods(ctx, k8sClient, leaderworkerset)
117+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderSts)).To(gomega.Succeed())
118+
testing.DeleteLeaderPods(ctx, k8sClient, lws)
122119
},
123120
checkLWSState: func(deployment *leaderworkerset.LeaderWorkerSet) {
124121
testing.ExpectValidReplicasCount(ctx, deployment, 3, k8sClient)
@@ -135,12 +132,10 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
135132
updates: []*update{
136133
{
137134
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
138-
var leaderworkerset leaderworkerset.LeaderWorkerSet
139-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderworkerset)).To(gomega.Succeed())
140-
testing.UpdateReplicaCount(ctx, k8sClient, &leaderworkerset, int32(0))
135+
testing.UpdateReplicaCount(ctx, k8sClient, lws, int32(0))
141136
var leaderSts appsv1.StatefulSet
142-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: leaderworkerset.Name, Namespace: leaderworkerset.Namespace}, &leaderSts)).To(gomega.Succeed())
143-
testing.DeleteLeaderPods(ctx, k8sClient, leaderworkerset)
137+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderSts)).To(gomega.Succeed())
138+
testing.DeleteLeaderPods(ctx, k8sClient, lws)
144139
},
145140
checkLWSState: func(deployment *leaderworkerset.LeaderWorkerSet) {
146141
testing.ExpectValidReplicasCount(ctx, deployment, 0, k8sClient)
@@ -157,12 +152,10 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
157152
updates: []*update{
158153
{
159154
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
160-
var leaderworkerset leaderworkerset.LeaderWorkerSet
161-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderworkerset)).To(gomega.Succeed())
162-
testing.UpdateReplicaCount(ctx, k8sClient, &leaderworkerset, int32(3))
155+
testing.UpdateReplicaCount(ctx, k8sClient, lws, int32(3))
163156
var leaderSts appsv1.StatefulSet
164-
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: leaderworkerset.Name, Namespace: leaderworkerset.Namespace}, &leaderSts)).To(gomega.Succeed())
165-
gomega.Expect(testing.CreateLeaderPods(ctx, leaderSts, k8sClient, &leaderworkerset, 0, 3)).To(gomega.Succeed())
157+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderSts)).To(gomega.Succeed())
158+
gomega.Expect(testing.CreateLeaderPods(ctx, leaderSts, k8sClient, lws, 0, 3)).To(gomega.Succeed())
166159
},
167160
checkLWSState: func(deployment *leaderworkerset.LeaderWorkerSet) {
168161
testing.ExpectValidReplicasCount(ctx, deployment, 3, k8sClient)
@@ -298,7 +291,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
298291
updates: []*update{
299292
{
300293
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
301-
var scale v1.Scale
294+
var scale autoscalingv1.Scale
302295
gomega.Expect(k8sClient.SubResource("scale").Get(ctx, lws, &scale)).To(gomega.Succeed())
303296
gomega.Expect(int32(scale.Spec.Replicas)).To(gomega.Equal(*lws.Spec.Replicas))
304297
gomega.Expect(int32(scale.Status.Replicas)).To(gomega.Equal(lws.Status.Replicas))
@@ -307,24 +300,17 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
307300
},
308301
{
309302
lwsUpdateFn: func(lws *leaderworkerset.LeaderWorkerSet) {
310-
var scale v1.Scale
311-
gomega.Expect(k8sClient.SubResource("scale").Get(ctx, lws, &scale)).To(gomega.Succeed())
312-
scale.Spec.Replicas = 3
313-
lwsUnstructed, _ := ToUnstructured(lws)
314-
lwsUnstructed.SetAPIVersion("leaderworkerset.x-k8s.io/v1")
315-
lwsUnstructed.SetKind("LeaderWorkerSet")
316-
scaleUnstructed, _ := ToUnstructured(scale.DeepCopy())
317-
scaleUnstructed.SetAPIVersion("autoscaling/v1")
318-
scaleUnstructed.SetKind("Scale")
319-
gomega.Expect(k8sClient.SubResource("scale").Update(ctx, lwsUnstructed, client.WithSubResourceBody(scaleUnstructed))).To(gomega.Succeed())
303+
dep := &leaderworkerset.LeaderWorkerSet{ObjectMeta: metav1.ObjectMeta{Namespace: lws.Namespace, Name: lws.Name}}
304+
scale := &autoscalingv1.Scale{Spec: autoscalingv1.ScaleSpec{Replicas: 3}}
305+
gomega.Expect(k8sClient.SubResource("scale").Update(ctx, dep, client.WithSubResourceBody(scale))).To(gomega.Succeed())
320306
},
321307
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
322308
gomega.Eventually(func() (int32, error) {
323309
var leaderWorkerSet leaderworkerset.LeaderWorkerSet
324310
if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderWorkerSet); err != nil {
325-
return -1, err
311+
return 0, err
326312
}
327-
return leaderWorkerSet.Status.Replicas, nil
313+
return *leaderWorkerSet.Spec.Replicas, nil
328314
}, testing.Timeout, testing.Interval).Should(gomega.Equal(int32(3)))
329315
},
330316
},
@@ -451,13 +437,6 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
451437
updates: []*update{
452438
{
453439
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
454-
gomega.Eventually(func() (int32, error) {
455-
var leaderWorkerSet leaderworkerset.LeaderWorkerSet
456-
if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderWorkerSet); err != nil {
457-
return -1, err
458-
}
459-
return leaderWorkerSet.Status.Replicas, nil
460-
}, testing.Timeout, testing.Interval).Should(gomega.Equal(int32(2)))
461440
testing.ExpectValidLeaderStatefulSet(ctx, lws, k8sClient)
462441
testing.ExpectValidWorkerStatefulSets(ctx, lws, k8sClient, true)
463442
testing.ExpectLeaderWorkerSetProgressing(ctx, k8sClient, lws, "Replicas are progressing")
@@ -561,7 +540,8 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
561540
testing.ExpectLeaderWorkerSetUnavailable(ctx, k8sClient, lws, "All replicas are ready")
562541
testing.ExpectStatefulsetPartitionEqualTo(ctx, k8sClient, lws, 2)
563542
testing.ExpectValidLeaderStatefulSet(ctx, lws, k8sClient)
564-
testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 1)
543+
// 3-index status is unready but template already updated.
544+
testing.ExpectLeaderWorkerSetStatusReplicas(ctx, k8sClient, lws, 3, 2)
565545
},
566546
},
567547
{
@@ -884,7 +864,7 @@ var _ = ginkgo.Describe("LeaderWorkerSet controller", func() {
884864
return k8sClient.Update(ctx, &leaderworkerset)
885865
}, testing.Timeout, testing.Interval).Should(gomega.Succeed())
886866
// Manually delete leader pods here because we have no statefulset controller.
887-
testing.DeleteLeaderPods(ctx, k8sClient, leaderworkerset)
867+
testing.DeleteLeaderPods(ctx, k8sClient, &leaderworkerset)
888868
},
889869
checkLWSState: func(lws *leaderworkerset.LeaderWorkerSet) {
890870
testing.ExpectValidLeaderStatefulSet(ctx, lws, k8sClient)

test/testutils/util.go

+13-4
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,18 @@ func CreateWorkerPodsForLeaderPod(ctx context.Context, leaderPod corev1.Pod, k8s
7676
}).Should(gomega.Succeed())
7777
}
7878

79-
func DeleteLeaderPods(ctx context.Context, k8sClient client.Client, lws leaderworkerset.LeaderWorkerSet) {
79+
func DeleteLeaderPods(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet) {
8080
// delete pods with the highest indexes
8181
var leaders corev1.PodList
8282
gomega.Expect(k8sClient.List(ctx, &leaders, client.InNamespace(lws.Namespace), &client.MatchingLabels{leaderworkerset.WorkerIndexLabelKey: "0"})).To(gomega.Succeed())
83+
84+
var leaderWorkerSet leaderworkerset.LeaderWorkerSet
85+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderWorkerSet)).To(gomega.Succeed())
86+
8387
// we don't have "slice" package before go1.21, could only manually delete pods with largest index
8488
for i := range leaders.Items {
8589
index, _ := strconv.Atoi(leaders.Items[i].Name[len(leaders.Items[i].Name)-1:])
86-
if index >= int(*lws.Spec.Replicas) {
90+
if index >= int(*leaderWorkerSet.Spec.Replicas) {
8791
gomega.Expect(k8sClient.Delete(ctx, &leaders.Items[i])).To(gomega.Succeed())
8892
// delete worker statefulset on behalf of kube-controller-manager
8993
var sts appsv1.StatefulSet
@@ -360,8 +364,13 @@ func ValidatePodExclusivePlacementTerms(pod corev1.Pod) bool {
360364

361365
func UpdateReplicaCount(ctx context.Context, k8sClient client.Client, lws *leaderworkerset.LeaderWorkerSet, count int32) {
362366
gomega.Eventually(func() error {
363-
lws.Spec.Replicas = ptr.To[int32](count)
364-
return k8sClient.Update(ctx, lws)
367+
var leaderworkerset leaderworkerset.LeaderWorkerSet
368+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: lws.Name, Namespace: lws.Namespace}, &leaderworkerset); err != nil {
369+
return err
370+
}
371+
372+
leaderworkerset.Spec.Replicas = ptr.To[int32](count)
373+
return k8sClient.Update(ctx, &leaderworkerset)
365374
}, Timeout, Interval).Should(gomega.Succeed())
366375
}
367376

0 commit comments

Comments
 (0)