Skip to content

Commit 7f8e4a6

Browse files
authored
KUBE-996: approve CSR from "kubernetes.io/kubelet-serving" for Cast nodes (#173)
* KUBE-996: add informer for CSR from "kubernetes.io/kubelet-serving"
1 parent f8321b7 commit 7f8e4a6

File tree

9 files changed

+641
-587
lines changed

9 files changed

+641
-587
lines changed

e2e/suites/gke.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,14 @@ func (ts *gkeTestSuite) Run(ctx context.Context, t *testing.T) {
110110
},
111111
})
112112

113+
r.NoError(err, "failed to add node", err)
114+
113115
t.Cleanup(func() {
114116
if err := cleanupNode(); err != nil {
115117
ts.t.Logf("failed to cleanup node %s: %v", *node.Id, err)
116118
}
117119
})
118120

119-
r.NoError(err)
120-
121121
ts.t.Logf("node %s ready", *node.Id)
122122

123123
r.NoError(backoff.Retry(func() error {

internal/actions/approve_csr_handler.go

Lines changed: 7 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -2,168 +2,20 @@ package actions
22

33
import (
44
"context"
5-
"errors"
6-
"fmt"
7-
"reflect"
8-
"time"
95

10-
"github.com/sirupsen/logrus"
11-
"k8s.io/apimachinery/pkg/util/wait"
12-
"k8s.io/client-go/kubernetes"
13-
14-
"github.com/castai/cluster-controller/internal/actions/csr"
156
"github.com/castai/cluster-controller/internal/castai"
16-
"github.com/castai/cluster-controller/internal/waitext"
17-
)
18-
19-
const (
20-
approveCSRTimeout = 4 * time.Minute
217
)
228

23-
var _ ActionHandler = &ApproveCSRHandler{}
24-
25-
func NewApproveCSRHandler(log logrus.FieldLogger, clientset kubernetes.Interface) *ApproveCSRHandler {
26-
return &ApproveCSRHandler{
27-
log: log,
28-
clientset: clientset,
29-
initialCSRFetchTimeout: 5 * time.Minute,
30-
csrFetchInterval: 5 * time.Second,
31-
}
32-
}
33-
34-
type ApproveCSRHandler struct {
35-
log logrus.FieldLogger
36-
clientset kubernetes.Interface
37-
initialCSRFetchTimeout time.Duration
38-
csrFetchInterval time.Duration
39-
}
40-
41-
func (h *ApproveCSRHandler) Handle(ctx context.Context, action *castai.ClusterAction) error {
42-
req, ok := action.Data().(*castai.ActionApproveCSR)
43-
if !ok {
44-
return newUnexpectedTypeErr(action.Data(), req)
45-
}
46-
log := h.log.WithFields(logrus.Fields{
47-
"node_name": req.NodeName,
48-
"node_id": req.NodeID,
49-
"type": reflect.TypeOf(action.Data().(*castai.ActionApproveCSR)).String(),
50-
ActionIDLogField: action.ID,
51-
})
52-
53-
if req.AllowAutoApprove != nil {
54-
// CSR action may be used only to instruct whether to start / stop watcher responsible for auto-approving; in
55-
// this case, there is nothing more to do.
56-
if req.NodeName == "" {
57-
return nil
58-
}
59-
}
60-
61-
cert, err := h.getInitialNodeCSR(ctx, log, req.NodeName)
62-
if err != nil {
63-
return fmt.Errorf("getting initial csr: %w", err)
64-
}
65-
66-
if cert.Approved() {
67-
log.Debug("csr is already approved")
68-
return nil
69-
}
70-
71-
return h.handleWithRetry(ctx, log, cert)
72-
}
73-
74-
func (h *ApproveCSRHandler) handleWithRetry(ctx context.Context, log *logrus.Entry, cert *csr.Certificate) error {
75-
ctx, cancel := context.WithTimeout(ctx, approveCSRTimeout)
76-
defer cancel()
77-
78-
b := newApproveCSRExponentialBackoff()
79-
return waitext.Retry(
80-
ctx,
81-
b,
82-
waitext.Forever,
83-
func(ctx context.Context) (bool, error) {
84-
return true, h.handle(ctx, log, cert)
85-
},
86-
func(err error) {
87-
log.Warnf("csr approval failed, will retry: %v", err)
88-
},
89-
)
90-
}
91-
92-
func (h *ApproveCSRHandler) handle(ctx context.Context, log logrus.FieldLogger, cert *csr.Certificate) (reterr error) {
93-
// Since this new csr may be denied we need to delete it.
94-
log.Debug("deleting old csr")
95-
if err := cert.DeleteCSR(ctx, h.clientset); err != nil {
96-
return fmt.Errorf("deleting csr: %w", err)
97-
}
98-
99-
// Create a new CSR with the same request data as the original one.
100-
log.Debug("requesting new csr")
101-
newCert, err := cert.NewCSR(ctx, h.clientset)
102-
if err != nil {
103-
return fmt.Errorf("requesting new csr: %w", err)
104-
}
9+
// // TODO clean up after proper handling unknown actions https://castai.atlassian.net/browse/KUBE-1036.
10510

106-
// Approve new csr.
107-
log.Debug("approving new csr")
108-
resp, err := newCert.ApproveCSRCertificate(ctx, h.clientset)
109-
if err != nil {
110-
return fmt.Errorf("approving csr: %w", err)
111-
}
112-
if resp.Approved() {
113-
return nil
114-
}
11+
var _ ActionHandler = &ApproveCSRHandlerDeprecated{}
11512

116-
return errors.New("certificate signing request was not approved")
13+
func NewApproveCSRHandler() *ApproveCSRHandlerDeprecated {
14+
return &ApproveCSRHandlerDeprecated{}
11715
}
11816

119-
func (h *ApproveCSRHandler) getInitialNodeCSR(ctx context.Context, log logrus.FieldLogger, nodeName string) (*csr.Certificate, error) {
120-
log.Debug("getting initial csr")
121-
122-
ctx, cancel := context.WithTimeout(ctx, h.initialCSRFetchTimeout)
123-
defer cancel()
124-
125-
poll := func() (*csr.Certificate, error) {
126-
for {
127-
select {
128-
case <-ctx.Done():
129-
return nil, ctx.Err()
130-
case <-time.After(h.csrFetchInterval):
131-
cert, err := csr.GetCertificateByNodeName(ctx, h.clientset, nodeName)
132-
if err != nil && !errors.Is(err, csr.ErrNodeCertificateNotFound) {
133-
return nil, err
134-
}
135-
if cert != nil {
136-
return cert, nil
137-
}
138-
}
139-
}
140-
}
141-
142-
var cert *csr.Certificate
143-
var err error
144-
145-
b := waitext.DefaultExponentialBackoff()
146-
err = waitext.Retry(
147-
ctx,
148-
b,
149-
3,
150-
func(ctx context.Context) (bool, error) {
151-
cert, err = poll()
152-
if errors.Is(err, context.DeadlineExceeded) {
153-
return false, err
154-
}
155-
return true, err
156-
},
157-
func(err error) {
158-
log.Warnf("getting initial csr, will retry: %v", err)
159-
},
160-
)
161-
162-
return cert, err
163-
}
17+
type ApproveCSRHandlerDeprecated struct{}
16418

165-
func newApproveCSRExponentialBackoff() wait.Backoff {
166-
b := waitext.DefaultExponentialBackoff()
167-
b.Factor = 2
168-
return b
19+
func (h *ApproveCSRHandlerDeprecated) Handle(_ context.Context, _ *castai.ClusterAction) error {
20+
return nil
16921
}

0 commit comments

Comments
 (0)