Skip to content

Commit 5689dcd

Browse files
committed
Merge branch 'KUBE-1216--go-mod' into KUBE-1163-delete-with-provider-id
2 parents 2cf323b + 0cbda03 commit 5689dcd

File tree

9 files changed

+821
-63
lines changed

9 files changed

+821
-63
lines changed

e2e/client/api.gen.go

Lines changed: 286 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

e2e/client/client.gen.go

Lines changed: 362 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/actions/csr/svc.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,15 @@ import (
2424
)
2525

2626
const (
27-
approveCSRTimeout = 4 * time.Minute
28-
groupSystemNodesName = "system:nodes"
27+
approveCSRTimeout = 4 * time.Minute
28+
groupSystemNodesName = "system:nodes"
29+
// kubeletBootstrapRequestingUser is the observed requestor for node's CSRs from real GKE clusters.
2930
kubeletBootstrapRequestingUser = "kubelet-bootstrap"
30-
clusterControllerSAName = "system:serviceaccount:castai-agent:castai-cluster-controller"
31-
approvedMessage = "This CSR was approved by CAST AI"
32-
csrOutdatedAfter = time.Hour
31+
// kubeletNodepoolBootstrapRequestingUser is the observed requestor for node's CSRs for GKE clusters 1.33+.
32+
kubeletNodepoolBootstrapRequestingUser = "kubelet-nodepool-bootstrap"
33+
clusterControllerSAName = "system:serviceaccount:castai-agent:castai-cluster-controller"
34+
approvedMessage = "This CSR was approved by CAST AI"
35+
csrOutdatedAfter = time.Hour
3336
)
3437

3538
func NewApprovalManager(log logrus.FieldLogger, clientset kubernetes.Interface) *ApprovalManager {
@@ -148,8 +151,8 @@ func (m *ApprovalManager) runAutoApproveForCastAINodes(ctx context.Context, c <-
148151
"signer": csr.SignerName(),
149152
"csr": csr.Name(),
150153
})
151-
if shouldSkip(log, csr) {
152-
log.Debug("skipping csr")
154+
if skip, reason := shouldSkip(csr); skip {
155+
log.Infof("skipping csr due to reason: %s", reason)
153156
return
154157
}
155158
log.Info("auto approving csr")
@@ -221,32 +224,26 @@ func (m *ApprovalManager) handle(ctx context.Context, log logrus.FieldLogger, cs
221224
return errors.New("certificate signing request was not approved")
222225
}
223226

224-
func shouldSkip(log logrus.FieldLogger, csr *wrapper.CSR) bool {
227+
func shouldSkip(csr *wrapper.CSR) (bool, string) {
225228
if csr.Approved() {
226-
log.Debug("csr already approved")
227-
return true
229+
return true, "csr already approved"
228230
}
229231
if time.Since(csr.CreatedAt()) > csrOutdatedAfter {
230-
log.Debug("csr is outdated")
231-
return true
232+
return true, fmt.Sprintf("csr is outdated, %v is larger than %v", time.Since(csr.CreatedAt()), csrOutdatedAfter)
232233
}
233234
if !managedSigner(csr.SignerName()) {
234-
log.Debug("csr unknown signer")
235-
return true
235+
return true, fmt.Sprintf("csr unknown signer: %s", csr.SignerName())
236236
}
237237
if !managedCSRNamePrefix(csr.Name()) {
238-
log.Debug("csr name not managed by CAST AI: ", csr.Name())
239-
return true
238+
return true, fmt.Sprintf("csr name not managed by CAST AI: %s", csr.Name())
240239
}
241240
if !managedCSRRequestingUser(csr.RequestingUser()) {
242-
log.Debug("csr requesting user is not managed by CAST AI")
243-
return true
241+
return true, fmt.Sprintf("csr requesting user is not managed by CAST AI: %s", csr.RequestingUser())
244242
}
245243
if !managerSubjectCommonName(csr.ParsedCertificateRequest().Subject.CommonName) {
246-
log.Debug("csr common name is not managed by CAST AI")
247-
return true
244+
return true, fmt.Sprintf("csr common name is not managed by CAST AI %s", csr.ParsedCertificateRequest().Subject.CommonName)
248245
}
249-
return false
246+
return false, ""
250247
}
251248

252249
func managedSigner(signerName string) bool {
@@ -259,7 +256,10 @@ func managedCSRNamePrefix(n string) bool {
259256
}
260257

261258
func managedCSRRequestingUser(s string) bool {
262-
return s == kubeletBootstrapRequestingUser || s == clusterControllerSAName || strings.HasPrefix(s, "system:node:")
259+
return s == kubeletBootstrapRequestingUser || // kubelet bootstrap user variation
260+
s == kubeletNodepoolBootstrapRequestingUser || // kubelet bootstrap user variation (observed initially on GKE 1.33+)
261+
s == clusterControllerSAName || // if created by CC
262+
strings.HasPrefix(s, "system:node:") // Post-bootstrap node user; usually for serving certificate
263263
}
264264

265265
func managerSubjectCommonName(commonName string) bool {

internal/actions/csr/svc_test.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ func TestCSRApprove(t *testing.T) {
7878

7979
csrResult, err := client.CertificatesV1().CertificateSigningRequests().Get(ctx, csrName, metav1.GetOptions{})
8080
r.NoError(err)
81+
r.GreaterOrEqual(len(csrResult.Status.Conditions), 1)
8182

8283
r.Equal(csrResult.Status.Conditions[0].Type, certv1.CertificateApproved)
8384
})
@@ -115,6 +116,112 @@ func TestCSRApprove(t *testing.T) {
115116
r.NoError(err)
116117
r.Len(csrResult.Status.Conditions, 0)
117118
})
119+
120+
t.Run("approves for kubelet-bootstrap user", func(t *testing.T) {
121+
r := require.New(t)
122+
t.Parallel()
123+
124+
csrName := "node-csr-123"
125+
userName := "kubelet-bootstrap"
126+
client := fake.NewClientset(getCSRv1(csrName, userName))
127+
s := NewApprovalManager(log, client)
128+
watcher := watch.NewFake()
129+
client.PrependWatchReactor("certificatesigningrequests", ktest.DefaultWatchReactor(watcher, nil))
130+
131+
ctx := context.Background()
132+
var wg sync.WaitGroup
133+
wg.Add(2)
134+
go func() {
135+
defer wg.Done()
136+
if err := s.Start(ctx); err != nil {
137+
t.Logf("failed to start approval manager: %s", err.Error())
138+
}
139+
}()
140+
go func() {
141+
defer wg.Done()
142+
watcher.Add(getCSRv1(csrName, userName))
143+
time.Sleep(100 * time.Millisecond)
144+
s.Stop()
145+
}()
146+
147+
wg.Wait()
148+
149+
csrResult, err := client.CertificatesV1().CertificateSigningRequests().Get(ctx, csrName, metav1.GetOptions{})
150+
r.NoError(err)
151+
r.GreaterOrEqual(len(csrResult.Status.Conditions), 1)
152+
153+
r.Equal(csrResult.Status.Conditions[0].Type, certv1.CertificateApproved)
154+
})
155+
156+
t.Run("approves for kubelet-nodepool-bootstrap user", func(t *testing.T) {
157+
r := require.New(t)
158+
t.Parallel()
159+
160+
csrName := "node-csr-123"
161+
userName := "kubelet-nodepool-bootstrap"
162+
client := fake.NewClientset(getCSRv1(csrName, userName))
163+
s := NewApprovalManager(log, client)
164+
watcher := watch.NewFake()
165+
client.PrependWatchReactor("certificatesigningrequests", ktest.DefaultWatchReactor(watcher, nil))
166+
167+
ctx := context.Background()
168+
var wg sync.WaitGroup
169+
wg.Add(2)
170+
go func() {
171+
defer wg.Done()
172+
if err := s.Start(ctx); err != nil {
173+
t.Logf("failed to start approval manager: %s", err.Error())
174+
}
175+
}()
176+
go func() {
177+
defer wg.Done()
178+
watcher.Add(getCSRv1(csrName, userName))
179+
time.Sleep(100 * time.Millisecond)
180+
s.Stop()
181+
}()
182+
183+
wg.Wait()
184+
185+
csrResult, err := client.CertificatesV1().CertificateSigningRequests().Get(ctx, csrName, metav1.GetOptions{})
186+
r.NoError(err)
187+
r.GreaterOrEqual(len(csrResult.Status.Conditions), 1)
188+
189+
r.Equal(csrResult.Status.Conditions[0].Type, certv1.CertificateApproved)
190+
})
191+
192+
t.Run("skips for unknown user", func(t *testing.T) {
193+
r := require.New(t)
194+
t.Parallel()
195+
196+
csrName := "node-csr-123"
197+
userName := "some-unknown-user"
198+
client := fake.NewClientset(getCSRv1(csrName, userName))
199+
s := NewApprovalManager(log, client)
200+
watcher := watch.NewFake()
201+
client.PrependWatchReactor("certificatesigningrequests", ktest.DefaultWatchReactor(watcher, nil))
202+
203+
ctx := context.Background()
204+
var wg sync.WaitGroup
205+
wg.Add(2)
206+
go func() {
207+
defer wg.Done()
208+
if err := s.Start(ctx); err != nil {
209+
t.Logf("failed to start approval manager: %s", err.Error())
210+
}
211+
}()
212+
go func() {
213+
defer wg.Done()
214+
watcher.Add(getCSRv1(csrName, userName))
215+
time.Sleep(100 * time.Millisecond)
216+
s.Stop()
217+
}()
218+
219+
wg.Wait()
220+
221+
csrResult, err := client.CertificatesV1().CertificateSigningRequests().Get(ctx, csrName, metav1.GetOptions{})
222+
r.NoError(err)
223+
r.Len(csrResult.Status.Conditions, 0)
224+
})
118225
}
119226

120227
func TestApproveCSRExponentialBackoff(t *testing.T) {

internal/actions/evict_pod_handler.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"reflect"
8+
"time"
89

910
"github.com/sirupsen/logrus"
1011
v1 "k8s.io/api/core/v1"
@@ -22,14 +23,19 @@ import (
2223

2324
func NewEvictPodHandler(log logrus.FieldLogger, clientset kubernetes.Interface) ActionHandler {
2425
return &EvictPodHandler{
25-
log: log,
26-
clientset: clientset,
26+
log: log,
27+
clientset: clientset,
28+
podEvictRetryDelay: 5 * time.Second,
29+
podsTerminationWaitRetryDelay: 10 * time.Second,
2730
}
2831
}
2932

3033
type EvictPodHandler struct {
3134
log logrus.FieldLogger
3235
clientset kubernetes.Interface
36+
37+
podEvictRetryDelay time.Duration
38+
podsTerminationWaitRetryDelay time.Duration
3339
}
3440

3541
func (h *EvictPodHandler) Handle(ctx context.Context, action *castai.ClusterAction) error {
@@ -93,9 +99,10 @@ func (h *EvictPodHandler) evictPod(ctx context.Context, log logrus.FieldLogger,
9399
return fmt.Errorf("unsupported eviction version: %s", groupVersion.String())
94100
}
95101

102+
backoff := waitext.NewConstantBackoff(h.podEvictRetryDelay)
96103
return waitext.Retry(
97104
ctx,
98-
defaultBackoff(),
105+
backoff,
99106
waitext.Forever,
100107
func(ctx context.Context) (bool, error) {
101108
err := submit(ctx)
@@ -119,9 +126,10 @@ func (h *EvictPodHandler) evictPod(ctx context.Context, log logrus.FieldLogger,
119126
}
120127

121128
func (h *EvictPodHandler) waitForPodToBeDeleted(ctx context.Context, log logrus.FieldLogger, namespace, name string) error {
129+
backoff := waitext.NewConstantBackoff(h.podsTerminationWaitRetryDelay)
122130
return waitext.Retry(
123131
ctx, // controls how long we might wait at most.
124-
defaultBackoff(),
132+
backoff,
125133
waitext.Forever,
126134
func(ctx context.Context) (bool, error) {
127135
deleted, phase, err := h.isPodDeleted(ctx, namespace, name)

internal/actions/mock/kubernetes.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/helm/mock/chart_loader.go

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/helm/mock/client.go

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/k8sversion/mock/version.go

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)