Skip to content

Commit 22f6a12

Browse files
authored
Merge pull request #4488 from twz123/backport-4463-to-release-1.29
[Backport release-1.29] Use dedicated leasepool for worker config component
2 parents bff9835 + a1d7798 commit 22f6a12

File tree

5 files changed

+227
-15
lines changed

5 files changed

+227
-15
lines changed

cmd/controller/controller.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,9 @@ func (c *command) start(ctx context.Context) error {
259259

260260
// One leader elector per controller
261261
if !c.SingleNode {
262-
leaderElector = leaderelector.NewLeasePool(adminClientFactory)
262+
// The name used to be hardcoded in the component itself
263+
// At some point we need to rename this.
264+
leaderElector = leaderelector.NewLeasePool(adminClientFactory, "k0s-endpoint-reconciler")
263265
} else {
264266
leaderElector = &leaderelector.Dummy{Leader: true}
265267
}
@@ -468,7 +470,12 @@ func (c *command) start(ctx context.Context) error {
468470
}
469471

470472
if !slices.Contains(c.DisableComponents, constant.WorkerConfigComponentName) {
471-
reconciler, err := workerconfig.NewReconciler(c.K0sVars, nodeConfig.Spec, adminClientFactory, leaderElector, enableKonnectivity)
473+
// Create new dedicated leasepool for worker config reconciler
474+
leaseName := fmt.Sprintf("k0s-%s-%s", constant.WorkerConfigComponentName, constant.KubernetesMajorMinorVersion)
475+
workerConfigLeasePool := leaderelector.NewLeasePool(adminClientFactory, leaseName)
476+
clusterComponents.Add(ctx, workerConfigLeasePool)
477+
478+
reconciler, err := workerconfig.NewReconciler(c.K0sVars, nodeConfig.Spec, adminClientFactory, workerConfigLeasePool, enableKonnectivity)
472479
if err != nil {
473480
return err
474481
}

inttest/Makefile

+4-11
Original file line numberDiff line numberDiff line change
@@ -66,18 +66,9 @@ check-conformance: bin/sonobuoy
6666
get-conformance-results: bin/sonobuoy
6767
$(realpath bin/sonobuoy) retrieve
6868

69-
TIMEOUT ?= 4m
69+
TIMEOUT ?= 6m
7070

7171
check-ctr: TIMEOUT=10m
72-
check-byocri: TIMEOUT=5m
73-
# readiness check for metric tests takes between around 5 and 6 minutes.
74-
check-metrics: TIMEOUT=6m
75-
check-metricsscraper: TIMEOUT=6m
76-
77-
check-calico: TIMEOUT=6m
78-
79-
# Establishing konnectivity tunnels with the LB in place takes a while, thus a bit longer timeout for the smoke
80-
check-customports: TIMEOUT=6m
8172

8273
# Config change smoke runs actually many cases hence a bit longer timeout
8374
check-configchange: TIMEOUT=8m
@@ -88,7 +79,9 @@ check-backup: TIMEOUT=10m
8879
# Autopilot 3x3 HA test can take a while to run
8980
check-ap-ha3x3: K0S_UPDATE_FROM_BIN ?= ../k0s
9081
check-ap-ha3x3: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN))
91-
check-ap-ha3x3: TIMEOUT=6m
82+
83+
check-ap-controllerworker: K0S_UPDATE_FROM_BIN ?= ../k0s
84+
check-ap-controllerworker: K0S_UPDATE_FROM_PATH ?= $(realpath $(K0S_UPDATE_FROM_BIN))
9285

9386
check-customports-dynamicconfig: export K0S_ENABLE_DYNAMIC_CONFIG=true
9487
check-customports-dynamicconfig: TEST_PACKAGE=customports

inttest/Makefile.variables

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ smoketests := \
33
check-addons \
44
check-airgap \
55
check-ap-airgap \
6+
check-ap-controllerworker \
67
check-ap-ha3x3 \
78
check-ap-platformselect \
89
check-ap-quorum \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// Copyright 2024 k0s authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package controllerworker
16+
17+
import (
18+
"fmt"
19+
"strings"
20+
"testing"
21+
"time"
22+
23+
"github.com/k0sproject/k0s/inttest/common"
24+
aptest "github.com/k0sproject/k0s/inttest/common/autopilot"
25+
26+
apconst "github.com/k0sproject/k0s/pkg/autopilot/constant"
27+
appc "github.com/k0sproject/k0s/pkg/autopilot/controller/plans/core"
28+
"github.com/k0sproject/k0s/pkg/constant"
29+
"github.com/k0sproject/k0s/pkg/kubernetes/watch"
30+
31+
"github.com/stretchr/testify/suite"
32+
33+
corev1 "k8s.io/api/core/v1"
34+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
35+
)
36+
37+
type controllerworkerSuite struct {
38+
common.BootlooseSuite
39+
}
40+
41+
const k0sConfigWithMultiController = `
42+
spec:
43+
api:
44+
address: %s
45+
storage:
46+
etcd:
47+
peerAddress: %s
48+
`
49+
50+
const oldVersion = "v1.29.4+k0s.0"
51+
52+
// SetupTest prepares the controller and filesystem, getting it into a consistent
53+
// state which we can run tests against.
54+
func (s *controllerworkerSuite) SetupTest() {
55+
ctx := s.Context()
56+
// ipAddress := s.GetControllerIPAddress(0)
57+
var joinToken string
58+
59+
for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ {
60+
nodeName, require := s.ControllerNode(idx), s.Require()
61+
address := s.GetControllerIPAddress(idx)
62+
63+
s.Require().NoError(s.WaitForSSH(nodeName, 2*time.Minute, 1*time.Second))
64+
ssh, err := s.SSH(ctx, nodeName)
65+
require.NoError(err)
66+
defer ssh.Disconnect()
67+
s.PutFile(nodeName, "/tmp/k0s.yaml", fmt.Sprintf(k0sConfigWithMultiController, address, address))
68+
// Install older version of k0s
69+
downloadCmd := fmt.Sprintf("curl -sSfL get.k0s.sh | K0S_VERSION=%s sh", oldVersion)
70+
out, err := ssh.ExecWithOutput(ctx, downloadCmd)
71+
if err != nil {
72+
s.T().Logf("error getting k0s: %s", out)
73+
}
74+
require.NoError(err)
75+
s.T().Logf("downloaded succesfully: %s", out)
76+
// Note that the token is intentionally empty for the first controller
77+
args := []string{
78+
"--debug",
79+
"--disable-components=metrics-server,helm,konnectivity-server",
80+
"--enable-worker",
81+
"--config=/tmp/k0s.yaml",
82+
}
83+
if joinToken != "" {
84+
s.PutFile(nodeName, "/tmp/token", joinToken)
85+
args = append(args, "--token-file=/tmp/token")
86+
}
87+
out, err = ssh.ExecWithOutput(ctx, "k0s install controller "+strings.Join(args, " "))
88+
if err != nil {
89+
s.T().Logf("error installing k0s: %s", out)
90+
}
91+
require.NoError(err)
92+
_, err = ssh.ExecWithOutput(ctx, "k0s start")
93+
require.NoError(err)
94+
// s.Require().NoError(s.InitController(idx, "--config=/tmp/k0s.yaml", "--disable-components=metrics-server", "--enable-worker", joinToken))
95+
s.Require().NoError(s.WaitJoinAPI(nodeName))
96+
kc, err := s.KubeClient(nodeName)
97+
require.NoError(err)
98+
require.NoError(s.WaitForNodeReady(nodeName, kc))
99+
100+
node, err := kc.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
101+
require.NoError(err)
102+
require.Equal("v1.29.4+k0s", node.Status.NodeInfo.KubeletVersion)
103+
104+
client, err := s.ExtensionsClient(s.ControllerNode(0))
105+
s.Require().NoError(err)
106+
107+
s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "plans"))
108+
s.Require().NoError(aptest.WaitForCRDByName(ctx, client, "controlnodes"))
109+
110+
// With the primary controller running, create the join token for subsequent controllers.
111+
if idx == 0 {
112+
token, err := s.GetJoinToken("controller")
113+
s.Require().NoError(err)
114+
joinToken = token
115+
}
116+
}
117+
118+
// Final sanity -- ensure all nodes see each other according to etcd
119+
for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ {
120+
s.Require().Len(s.GetMembers(idx), s.BootlooseSuite.ControllerCount)
121+
}
122+
}
123+
124+
// TestApply applies a well-formed `plan` yaml, and asserts that
125+
// all of the correct values across different objects + controllers are correct.
126+
func (s *controllerworkerSuite) TestApply() {
127+
128+
planTemplate := `
129+
apiVersion: autopilot.k0sproject.io/v1beta2
130+
kind: Plan
131+
metadata:
132+
name: autopilot
133+
spec:
134+
id: id123
135+
timestamp: now
136+
commands:
137+
- k0supdate:
138+
version: v0.0.0
139+
forceupdate: true
140+
platforms:
141+
linux-amd64:
142+
url: http://localhost/dist/k0s-new
143+
linux-arm64:
144+
url: http://localhost/dist/k0s-new
145+
targets:
146+
controllers:
147+
discovery:
148+
static:
149+
nodes:
150+
- controller1
151+
- controller2
152+
- controller0
153+
`
154+
ctx := s.Context()
155+
manifestFile := "/tmp/happy.yaml"
156+
s.PutFileTemplate(s.ControllerNode(0), manifestFile, planTemplate, nil)
157+
158+
out, err := s.RunCommandController(0, fmt.Sprintf("/usr/local/bin/k0s kubectl apply -f %s", manifestFile))
159+
s.T().Logf("kubectl apply output: '%s'", out)
160+
s.Require().NoError(err)
161+
162+
client, err := s.AutopilotClient(s.ControllerNode(0))
163+
s.Require().NoError(err)
164+
s.NotEmpty(client)
165+
166+
// The plan has enough information to perform a successful update of k0s, so wait for it.
167+
plan, err := aptest.WaitForPlanState(s.Context(), client, apconst.AutopilotName, appc.PlanCompleted)
168+
s.Require().NoError(err)
169+
170+
s.Equal(1, len(plan.Status.Commands))
171+
cmd := plan.Status.Commands[0]
172+
173+
s.Equal(appc.PlanCompleted, cmd.State)
174+
s.NotNil(cmd.K0sUpdate)
175+
s.NotNil(cmd.K0sUpdate.Controllers)
176+
s.Empty(cmd.K0sUpdate.Workers)
177+
178+
for _, node := range cmd.K0sUpdate.Controllers {
179+
s.Equal(appc.SignalCompleted, node.State)
180+
}
181+
182+
kc, err := s.KubeClient(s.ControllerNode(0))
183+
s.NoError(err)
184+
185+
for idx := 0; idx < s.BootlooseSuite.ControllerCount; idx++ {
186+
nodeName, require := s.ControllerNode(idx), s.Require()
187+
require.NoError(s.WaitForNodeReady(nodeName, kc))
188+
// Wait till we see kubelet reporting the expected version
189+
err := watch.Nodes(kc.CoreV1().Nodes()).
190+
WithObjectName(nodeName).
191+
WithErrorCallback(common.RetryWatchErrors(s.T().Logf)).
192+
Until(ctx, func(node *corev1.Node) (bool, error) {
193+
return strings.Contains(node.Status.NodeInfo.KubeletVersion, fmt.Sprintf("v%s.", constant.KubernetesMajorMinorVersion)), nil
194+
})
195+
require.NoError(err)
196+
}
197+
}
198+
199+
// TestQuorumSuite sets up a suite using 3 controllers for quorum, and runs various
200+
// autopilot upgrade scenarios against them.
201+
func TestQuorumSuite(t *testing.T) {
202+
suite.Run(t, &controllerworkerSuite{
203+
common.BootlooseSuite{
204+
ControllerCount: 3,
205+
WorkerCount: 0,
206+
LaunchMode: common.LaunchModeOpenRC,
207+
},
208+
})
209+
}

pkg/component/controller/leaderelector/leasepool.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,22 @@ type LeasePool struct {
3737

3838
acquiredLeaseCallbacks []func()
3939
lostLeaseCallbacks []func()
40+
name string
4041
}
4142

4243
var _ Interface = (*LeasePool)(nil)
4344
var _ manager.Component = (*LeasePool)(nil)
4445

4546
// NewLeasePool creates a new leader elector using a Kubernetes lease pool.
46-
func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface) *LeasePool {
47+
func NewLeasePool(kubeClientFactory kubeutil.ClientFactoryInterface, name string) *LeasePool {
4748
d := atomic.Value{}
4849
d.Store(false)
4950
return &LeasePool{
5051
stopCh: make(chan struct{}),
5152
kubeClientFactory: kubeClientFactory,
5253
log: logrus.WithFields(logrus.Fields{"component": "poolleaderelector"}),
5354
leaderStatus: d,
55+
name: name,
5456
}
5557
}
5658

@@ -63,7 +65,7 @@ func (l *LeasePool) Start(ctx context.Context) error {
6365
if err != nil {
6466
return fmt.Errorf("can't create kubernetes rest client for lease pool: %v", err)
6567
}
66-
leasePool, err := leaderelection.NewLeasePool(ctx, client, "k0s-endpoint-reconciler",
68+
leasePool, err := leaderelection.NewLeasePool(ctx, client, l.name,
6769
leaderelection.WithLogger(l.log),
6870
leaderelection.WithContext(ctx))
6971
if err != nil {

0 commit comments

Comments
 (0)