From b36bd00674b544071b8c99f53d330604500fa499 Mon Sep 17 00:00:00 2001 From: Xavi Garcia Date: Tue, 19 May 2026 16:14:22 +0200 Subject: [PATCH] This PR updates the ClusterRegistrationToken controller to reject tokens with no TTL or TTL set to zero. It also updates how access to secrets in the cluster registration namespace is granted. Instead to grant access to *all* secrets in the cluster registration namespace when the registration is initiated it grants access to *only* the namespace intended for the cluster registering and only when then registration process is finishing. For manager-initiated registration access to the specific secret name is granted when the registration process finishes. It is similar in agent-initiated registration but in order to be able to identify the token used it now needs to receive the token name as a new helm value when deploying the agent. We need this because otherwise we can't identify which token belongs to the actual ClusterRegistration request. Refers to: https://github.com/rancher/fleet/issues/5157 Signed-off-by: Xavi Garcia --- .github/workflows/e2e-multicluster-ci.yml | 3 +- charts/fleet-agent/templates/secret.yaml | 1 + charts/fleet-agent/templates/validate.yaml | 4 + charts/fleet-agent/values.yaml | 3 + charts/fleet/templates/deployment.yaml | 2 + charts/fleet/values.yaml | 2 + internal/cmd/agent/register/register.go | 6 + .../controllers/cluster/import.go | 4 +- .../clusterregistration/controller.go | 130 +++++++++++- .../clusterregistration/controller_test.go | 191 +++++++++++++++++- .../clusterregistrationtoken/handler.go | 59 +++--- .../clusterregistrationtoken/handler_test.go | 176 ++++++++++++++++ .../clusterregistrationtoken/suite_test.go | 13 ++ .../controllers/controllers.go | 6 +- .../cmd/controller/agentmanagement/root.go | 4 +- .../cmd/controller/agentmanagement/start.go | 4 +- internal/config/config.go | 3 + .../fleet.cattle.io/v1alpha1/cluster_types.go | 5 + 18 files changed, 564 insertions(+), 52 deletions(-) create mode 100644 internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go create mode 100644 internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go diff --git a/.github/workflows/e2e-multicluster-ci.yml b/.github/workflows/e2e-multicluster-ci.yml index ae8f1084eb..ada104faf8 100644 --- a/.github/workflows/e2e-multicluster-ci.yml +++ b/.github/workflows/e2e-multicluster-ci.yml @@ -144,7 +144,8 @@ jobs: --set apiServerCA="$ca" \ --set apiServerURL="https://$apiServerIP:6443" \ --set clusterNamespace="fleet-default" \ - --set token="$token" + --set token="$token" \ + --set tokenName="second-token" echo "waiting for downstream cluster to be registered..." { grep -q -m 1 "1/1"; kill $!; } < <(kubectl get cluster -n fleet-default -w) diff --git a/charts/fleet-agent/templates/secret.yaml b/charts/fleet-agent/templates/secret.yaml index 4715882047..cf40ba87e9 100644 --- a/charts/fleet-agent/templates/secret.yaml +++ b/charts/fleet-agent/templates/secret.yaml @@ -3,6 +3,7 @@ data: systemRegistrationNamespace: "{{b64enc .Values.systemRegistrationNamespace}}" clusterNamespace: "{{b64enc .Values.clusterNamespace}}" token: "{{b64enc .Values.token}}" + tokenName: "{{b64enc .Values.tokenName}}" apiServerURL: "{{b64enc .Values.apiServerURL}}" apiServerCA: "{{b64enc .Values.apiServerCA}}" kind: Secret diff --git a/charts/fleet-agent/templates/validate.yaml b/charts/fleet-agent/templates/validate.yaml index 5333818183..534f7b4788 100644 --- a/charts/fleet-agent/templates/validate.yaml +++ b/charts/fleet-agent/templates/validate.yaml @@ -9,3 +9,7 @@ {{if not .Values.apiServerURL }} {{ fail "apiServerURL is required to be set, and most likely also apiServerCA" }} {{end}} + +{{if not .Values.tokenName }} +{{ fail "tokenName is required to be set to the name of the ClusterRegistrationToken used for this registration" }} +{{end}} diff --git a/charts/fleet-agent/values.yaml b/charts/fleet-agent/values.yaml index d25b5d9ad9..569a80cd12 100644 --- a/charts/fleet-agent/values.yaml +++ b/charts/fleet-agent/values.yaml @@ -28,6 +28,9 @@ noProxy: 127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local # The cluster registration value token: "" +# The name of the ClusterRegistrationToken used for this registration +tokenName: "" + # Labels to add to the cluster upon registration only. They are not added after the fact. # labels: # foo: bar diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index 724d70d43e..dee30c316d 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -227,6 +227,8 @@ spec: - name: FLEET_AGENT_ELECTION_RENEW_DEADLINE value: {{$.Values.agent.leaderElection.renewDeadline}} {{- end }} + - name: FLEET_REGISTRATION_TOKEN_TTL_REQUIRED + value: {{ quote $.Values.controller.registrationTokenTTL.required }} {{- if $.Values.extraEnv }} {{ toYaml $.Values.extraEnv | indent 8}} {{- end }} diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index ca2d807476..554dde084d 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -148,6 +148,8 @@ leaderElection: ## Fleet controller configuration controller: replicas: 1 + registrationTokenTTL: + required: false reconciler: # The number of workers that are allowed to each type of reconciler workers: diff --git a/internal/cmd/agent/register/register.go b/internal/cmd/agent/register/register.go index 5ae2e23f59..13382e4b23 100644 --- a/internal/cmd/agent/register/register.go +++ b/internal/cmd/agent/register/register.go @@ -33,6 +33,7 @@ const ( CredName = "fleet-agent" // same as AgentConfigName Kubeconfig = "kubeconfig" Token = "token" + TokenName = "tokenName" Values = "values" DeploymentNamespace = "deploymentNamespace" ClusterNamespace = "clusterNamespace" @@ -187,11 +188,16 @@ func runRegistration(ctx context.Context, k8s coreInterface, namespace string) ( } cfg.Labels["fleet.cattle.io/created-by-agent-pod"] = os.Getenv("HOSTNAME") + tokenName := string(values(secret.Data)[TokenName]) + logrus.Infof("Creating clusterregistration with id '%s' for new token", clientID) request, err := fc.Fleet().V1alpha1().ClusterRegistration().Create(&fleet.ClusterRegistration{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "request-", Namespace: ns, + Labels: map[string]string{ + fleet.RegistrationTokenLabel: tokenName, + }, }, Spec: fleet.ClusterRegistrationSpec{ ClientID: clientID, diff --git a/internal/cmd/controller/agentmanagement/controllers/cluster/import.go b/internal/cmd/controller/agentmanagement/controllers/cluster/import.go index bc5a579cdf..6720edf3cb 100644 --- a/internal/cmd/controller/agentmanagement/controllers/cluster/import.go +++ b/internal/cmd/controller/agentmanagement/controllers/cluster/import.go @@ -48,8 +48,6 @@ const ( ) var ( - ImportTokenPrefix = "import-token-" - errUnavailableAPIServerURL = errors.New("missing apiServerURL in fleet config for cluster auto registration") ) @@ -356,7 +354,7 @@ func (i *importHandler) importCluster(cluster *fleet.Cluster, status fleet.Clust setID := desiredset.GetSetID(config.AgentBootstrapConfigName, "", cluster.Spec.AgentNamespace) apply = apply.WithDynamicLookup().WithSetID(setID).WithNoDeleteGVK(fleetns.GVK()) - tokenName := names.SafeConcatName(ImportTokenPrefix + cluster.Name) + tokenName := names.SafeConcatName(config.ImportTokenPrefix + cluster.Name) token, err := i.tokens.Get(cluster.Namespace, tokenName) if err != nil { // If token doesn't exist, try to create it diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go index d14d229439..bb554c44d5 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go @@ -50,6 +50,7 @@ type handler struct { serviceAccountCache corecontrollers.ServiceAccountCache secretsCache corecontrollers.SecretCache secrets corecontrollers.SecretController + tokenCache fleetcontrollers.ClusterRegistrationTokenCache } func Register(ctx context.Context, @@ -61,7 +62,8 @@ func Register(ctx context.Context, role rbaccontrollers.RoleController, roleBinding rbaccontrollers.RoleBindingController, clusterRegistration fleetcontrollers.ClusterRegistrationController, - clusters fleetcontrollers.ClusterController) { + clusters fleetcontrollers.ClusterController, + clusterRegistrationToken fleetcontrollers.ClusterRegistrationTokenController) { h := &handler{ systemNamespace: systemNamespace, systemRegistrationNamespace: systemRegistrationNamespace, @@ -71,6 +73,7 @@ func Register(ctx context.Context, serviceAccountCache: serviceAccount.Cache(), secrets: secret, secretsCache: secret.Cache(), + tokenCache: clusterRegistrationToken.Cache(), } fleetcontrollers.RegisterClusterRegistrationGeneratingHandler(ctx, @@ -252,15 +255,14 @@ func (h *handler) OnChange(request *fleet.ClusterRegistration, status fleet.Clus logrus.Infof("Cluster registration request '%s/%s' granted, creating cluster, request service account, registration secret", request.Namespace, request.Name) - return []runtime.Object{ + objs := []runtime.Object{ // the registration secret c-clientID-clientRandom secret, // Update the existing service account 'request-UID' in the // cluster namespace, e.g. 'cluster-fleet-default-NAME-ID' requestSA(saName, cluster, request), // Add role bindings to manage bundledeployments and contents, - // the agent could previously only access secrets in - // 'cattle-fleet-clusters-system' and clusterregistrations in + // the agent could previously only access clusterregistrations in // the cluster registration namespace (e.g. 'fleet-default'). See // clusterregistrationtoken controller for details. &rbacv1.Role{ @@ -346,7 +348,125 @@ func (h *handler) OnChange(request *fleet.ClusterRegistration, status fleet.Clus Name: resources.ContentClusterRole, }, }, - }, status, nil + } + + // For manager-initiated imports, grant the import service account access + // to the specific credential secret it needs to read. The import token + // only exists in manager-initiated flows; agent-initiated flows skip this. + importTokenName := names.SafeConcatName(config.ImportTokenPrefix + cluster.Name) + importToken, err := h.tokenCache.Get(request.Namespace, importTokenName) + if err != nil && !apierrors.IsNotFound(err) { + return nil, status, fmt.Errorf("failed to look up import token %s/%s: %w", request.Namespace, importTokenName, err) + } + if err == nil { + // Grant access scoped to the single credential secret for this cluster. + importSAName := names.SafeConcatName(importTokenName, string(importToken.UID)) + credRoleName := names.SafeConcatName(importSAName, "creds") + objs = append(objs, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + Verbs: []string{"get"}, + APIGroups: []string{""}, + Resources: []string{"secrets"}, + ResourceNames: []string{secret.Name}, + }, + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: importSAName, + Namespace: request.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: credRoleName, + }, + }, + ) + } else { + // Agent-initiated: no import-token- exists because the cluster + // is auto-created at registration time. The ClusterRegistration carries + // a label identifying the token that was used, so we can grant only + // that token's SA scoped read access to the credential secret. + tokenName := request.Labels[fleet.RegistrationTokenLabel] + if tokenName == "" { + logrus.Warnf("ClusterRegistration '%s/%s' has no %s label; cannot grant credential secret access for agent-initiated registration", + request.Namespace, request.Name, fleet.RegistrationTokenLabel) + } else { + agentToken, tokenErr := h.tokenCache.Get(request.Namespace, tokenName) + if tokenErr != nil && !apierrors.IsNotFound(tokenErr) { + return nil, status, fmt.Errorf("looking up registration token %s/%s: %w", request.Namespace, tokenName, tokenErr) + } + if apierrors.IsNotFound(tokenErr) { + logrus.Warnf("ClusterRegistration '%s/%s': registration token %s/%s not found (may have expired), skipping credential secret access grant", + request.Namespace, request.Name, request.Namespace, tokenName) + return objs, status, nil + } + agentSAName := names.SafeConcatName(tokenName, string(agentToken.UID)) + credRoleName := names.SafeConcatName(request.Name, "creds") + objs = append(objs, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + Verbs: []string{"get"}, + APIGroups: []string{""}, + Resources: []string{"secrets"}, + ResourceNames: []string{secret.Name}, + }, + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: agentSAName, + Namespace: request.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: credRoleName, + }, + }, + ) + } + } + + return objs, status, nil } // shouldDelete returns true for any other cluster registration with the same clientID, but different random and older creation timestamp diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go index b3dc0bac1c..2499f94f04 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go @@ -7,10 +7,13 @@ import ( "github.com/rancher/wrangler/v3/pkg/generic/fake" "go.uber.org/mock/gomock" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "github.com/rancher/fleet/internal/names" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -30,6 +33,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterClient *fake.MockClientInterface[*fleet.Cluster, *fleet.ClusterList] clusterRegistrationController *fake.MockControllerInterface[*fleet.ClusterRegistration, *fleet.ClusterRegistrationList] clusterCache *fake.MockCacheInterface[*fleet.Cluster] + tokenCache *fake.MockCacheInterface[*fleet.ClusterRegistrationToken] h *handler notFound = apierrors.NewNotFound(schema.GroupResource{}, "") anError = errors.New("an error occurred") @@ -43,6 +47,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterClient = fake.NewMockClientInterface[*fleet.Cluster, *fleet.ClusterList](ctrl) clusterRegistrationController = fake.NewMockControllerInterface[*fleet.ClusterRegistration, *fleet.ClusterRegistrationList](ctrl) clusterCache = fake.NewMockCacheInterface[*fleet.Cluster](ctrl) + tokenCache = fake.NewMockCacheInterface[*fleet.ClusterRegistrationToken](ctrl) h = &handler{ systemNamespace: "fleet-system", @@ -53,6 +58,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { secretsCache: secretCache, secrets: secretController, serviceAccountCache: saCache, + tokenCache: tokenCache, } }) @@ -232,17 +238,198 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterRegistrationController.EXPECT().Update(gomock.Any()).Return(&fleet.ClusterRegistration{}, nil) }) - Context("grants registration, cleans up and creates objects", func() { + Context("agent-initiated flow: no import token found, RegistrationTokenLabel is empty", func() { BeforeEach(func() { + tokenCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) }) - It("creates a new secret", func() { + It("grants registration and creates 6 objects without credential RBAC", func() { objs, newStatus, err := h.OnChange(request, status) Expect(err).ToNot(HaveOccurred()) Expect(objs).To(HaveLen(6)) Expect(newStatus.Granted).To(BeTrue()) }) }) + + Context("manager-initiated flow: import token exists", func() { + BeforeEach(func() { + importToken := &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "import-token-cluster", + Namespace: "fleet-default", + UID: types.UID("test-token-uid"), + }, + } + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(importToken, nil) + }) + + It("grants registration and creates 8 objects including scoped credential RBAC", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(8)) + Expect(newStatus.Granted).To(BeTrue()) + + // Locate the Role created in systemRegistrationNamespace + // to verify access is scoped to a single secret, not wildcard. + var credRole *rbacv1.Role + for _, obj := range objs { + if role, ok := obj.(*rbacv1.Role); ok && role.Namespace == h.systemRegistrationNamespace { + credRole = role + break + } + } + Expect(credRole).NotTo(BeNil(), "expected a credential Role in %s", h.systemRegistrationNamespace) + Expect(credRole.Rules).To(HaveLen(1)) + rule := credRole.Rules[0] + Expect(rule.Resources).To(ConsistOf("secrets")) + // ResourceNames MUST be non-empty + Expect(rule.ResourceNames).NotTo(BeEmpty(), "credential Role must restrict access to a specific secret, not all secrets") + Expect(rule.Verbs).To(ConsistOf("get")) + }) + + It("creates a scoped RoleBinding in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + + var credRoleBinding *rbacv1.RoleBinding + for _, obj := range objs { + if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.Namespace == h.systemRegistrationNamespace { + credRoleBinding = rb + break + } + } + Expect(credRoleBinding).NotTo(BeNil(), "expected a credential RoleBinding in %s", h.systemRegistrationNamespace) + Expect(credRoleBinding.Subjects).To(HaveLen(1)) + Expect(credRoleBinding.Subjects[0].Kind).To(Equal("ServiceAccount")) + }) + }) + + Context("tokenCache returns an unexpected error", func() { + BeforeEach(func() { + tokenCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, anError) + }) + + It("propagates the error", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to look up import token")) + Expect(objs).To(BeNil()) + }) + }) + }) + + When("agent-initiated: RegistrationTokenLabel is set and token exists", func() { + var agentToken *fleet.ClusterRegistrationToken + + BeforeEach(func() { + cluster.Status = fleet.ClusterStatus{Namespace: "fleet-default"} + request.Labels = map[string]string{ + fleet.RegistrationTokenLabel: "my-token", + } + + sa = &corev1.ServiceAccount{} + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(sa, nil) + + secret := &corev1.Secret{Data: map[string][]byte{"token": []byte("secrettoken")}} + secretController.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(secret, nil) + + clusterRegistrationController.EXPECT().List(gomock.Any(), gomock.Any()).Return(&fleet.ClusterRegistrationList{}, nil) + clusterRegistrationController.EXPECT().Update(gomock.Any()).DoAndReturn( + func(cr *fleet.ClusterRegistration) (*fleet.ClusterRegistration, error) { return cr, nil }, + ) + + agentToken = &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-token", + Namespace: "fleet-default", + UID: types.UID("agent-token-uid"), + }, + } + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(nil, notFound) + tokenCache.EXPECT().Get(gomock.Any(), "my-token").Return(agentToken, nil) + }) + + It("grants registration and creates 8 objects including scoped credential RBAC", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(8)) + Expect(newStatus.Granted).To(BeTrue()) + + var credRole *rbacv1.Role + for _, obj := range objs { + if role, ok := obj.(*rbacv1.Role); ok && role.Namespace == h.systemRegistrationNamespace { + credRole = role + break + } + } + Expect(credRole).NotTo(BeNil(), "expected a credential Role in %s", h.systemRegistrationNamespace) + Expect(credRole.Rules).To(HaveLen(1)) + Expect(credRole.Rules[0].Resources).To(ConsistOf("secrets")) + Expect(credRole.Rules[0].ResourceNames).NotTo(BeEmpty(), "credential Role must restrict access to a specific secret") + Expect(credRole.Rules[0].Verbs).To(ConsistOf("get")) + }) + + It("binds the credential RoleBinding to the agent token's service account", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + + var credRoleBinding *rbacv1.RoleBinding + for _, obj := range objs { + if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.Namespace == h.systemRegistrationNamespace { + credRoleBinding = rb + break + } + } + Expect(credRoleBinding).NotTo(BeNil(), "expected a credential RoleBinding in %s", h.systemRegistrationNamespace) + Expect(credRoleBinding.Subjects).To(HaveLen(1)) + subject := credRoleBinding.Subjects[0] + Expect(subject.Kind).To(Equal("ServiceAccount")) + Expect(subject.Name).To(Equal(names.SafeConcatName("my-token", string(agentToken.UID)))) + Expect(subject.Namespace).To(Equal("fleet-default")) + }) + }) + + When("agent-initiated: RegistrationTokenLabel is set but token has expired", func() { + BeforeEach(func() { + cluster.Status = fleet.ClusterStatus{Namespace: "fleet-default"} + request.Labels = map[string]string{ + fleet.RegistrationTokenLabel: "my-token", + } + + sa = &corev1.ServiceAccount{} + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(sa, nil) + + secret := &corev1.Secret{Data: map[string][]byte{"token": []byte("secrettoken")}} + secretController.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(secret, nil) + + clusterRegistrationController.EXPECT().List(gomock.Any(), gomock.Any()).Return(&fleet.ClusterRegistrationList{}, nil) + clusterRegistrationController.EXPECT().Update(gomock.Any()).DoAndReturn( + func(cr *fleet.ClusterRegistration) (*fleet.ClusterRegistration, error) { return cr, nil }, + ) + + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(nil, notFound) + tokenCache.EXPECT().Get(gomock.Any(), "my-token").Return(nil, notFound) + }) + + It("grants registration with base objects only and no credential RBAC, without error", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(newStatus.Granted).To(BeTrue()) + Expect(objs).To(HaveLen(6)) + + for _, obj := range objs { + if m, ok := obj.(metav1.Object); ok { + if _, isRole := obj.(*rbacv1.Role); isRole { + Expect(m.GetNamespace()).NotTo(Equal(h.systemRegistrationNamespace), + "no credential Role should be created when the registration token has expired") + } + if _, isRB := obj.(*rbacv1.RoleBinding); isRB { + Expect(m.GetNamespace()).NotTo(Equal(h.systemRegistrationNamespace), + "no credential RoleBinding should be created when the registration token has expired") + } + } + } + }) }) }) }) diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go index 344f0db02a..cd221de5c0 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go @@ -5,6 +5,7 @@ package clusterregistrationtoken import ( "context" + "fmt" "time" "github.com/sirupsen/logrus" @@ -30,6 +31,7 @@ import ( type handler struct { systemNamespace string systemRegistrationNamespace string + enforceTTL bool clusterRegistrationTokens fleetcontrollers.ClusterRegistrationTokenController serviceAccountCache corecontrollers.ServiceAccountCache secretsCache corecontrollers.SecretCache @@ -39,6 +41,7 @@ type handler struct { func Register(ctx context.Context, systemNamespace string, systemRegistrationNamespace string, + enforceTTL bool, apply apply.Apply, clusterGroupToken fleetcontrollers.ClusterRegistrationTokenController, serviceAccounts corecontrollers.ServiceAccountController, @@ -48,6 +51,7 @@ func Register(ctx context.Context, h := &handler{ systemNamespace: systemNamespace, systemRegistrationNamespace: systemRegistrationNamespace, + enforceTTL: enforceTTL, clusterRegistrationTokens: clusterGroupToken, serviceAccountCache: serviceAccounts.Cache(), secretsCache: secretsCache, @@ -68,8 +72,20 @@ func Register(ctx context.Context, } func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.ClusterRegistrationTokenStatus) ([]runtime.Object, fleet.ClusterRegistrationTokenStatus, error) { - if gone, err := h.deleteExpired(token); err != nil || gone { - return nil, status, nil //nolint:nilerr // intentionally ignoring error to avoid retry loops on deletion failures + if h.enforceTTL && (token.Spec.TTL == nil || token.Spec.TTL.Duration <= 0) { + logrus.Warnf("ClusterRegistrationToken '%s/%s' has no TTL, deleting", token.Namespace, token.Name) + if err := h.clusterRegistrationTokens.Delete(token.Namespace, token.Name, nil); err != nil && !apierror.IsNotFound(err) { + return nil, status, fmt.Errorf("deleting ClusterRegistrationToken %s/%s with no TTL: %w", token.Namespace, token.Name, err) + } + logrus.Warnf("ClusterRegistrationToken %s/%s rejected: TTL is required", token.Namespace, token.Name) + + return nil, status, nil + } + + if token.Spec.TTL != nil && token.Spec.TTL.Duration > 0 { + if gone, err := h.deleteExpired(token); err != nil || gone { + return nil, status, nil //nolint:nilerr // intentionally ignoring error to avoid retry loops on deletion failures + } } logrus.Debugf("Cluster registration token '%s/%s', creating import service account, roles and secret", token.Namespace, token.Name) @@ -130,9 +146,10 @@ func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.C // Add service account, e.g.: import-token-local in the system // registration namespace. This account is used during registration to - // access secrets in the system registration namespace - // 'cattle-fleet-clusters-system' and clusterregistrations in the - // cluster registration namespace (e.g. 'fleet-default'). + // create clusterregistrations in the cluster registration namespace + // (e.g. 'fleet-default'). Access to secrets in the system registration + // namespace 'cattle-fleet-clusters-system' is granted separately by the + // clusterregistration controller, scoped to the specific credential secret. return append([]runtime.Object{ &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ @@ -180,37 +197,6 @@ func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.C Name: names.SafeConcatName(saName, "role"), }, }, - &rbacv1.Role{ - ObjectMeta: metav1.ObjectMeta{ - Name: names.SafeConcatName(saName, "creds"), - Namespace: h.systemRegistrationNamespace, - }, - Rules: []rbacv1.PolicyRule{ - { - Verbs: []string{"get"}, - APIGroups: []string{""}, - Resources: []string{"secrets"}, - }, - }, - }, - &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: names.SafeConcatName(saName, "creds"), - Namespace: h.systemRegistrationNamespace, - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: saName, - Namespace: token.Namespace, - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "Role", - Name: names.SafeConcatName(saName, "creds"), - }, - }, }, secrets...), status, nil } @@ -234,6 +220,7 @@ func (h *handler) clusterRegistrationSecret(token *fleet.ClusterRegistrationToke config.APIServerCAKey: string(config.Get().APIServerCA), "token": string(secret.Data["token"]), // from service account "systemRegistrationNamespace": h.systemRegistrationNamespace, + "tokenName": token.Name, } if h.systemNamespace != config.DefaultNamespace { diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go new file mode 100644 index 0000000000..acced62997 --- /dev/null +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go @@ -0,0 +1,176 @@ +package clusterregistrationtoken + +import ( + "errors" + "time" + + "github.com/rancher/wrangler/v3/pkg/generic/fake" + "go.uber.org/mock/gomock" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ClusterRegistrationToken OnChange", func() { + const ( + systemNamespace = "fleet-system" + systemRegistrationNamespace = "cattle-fleet-clusters-system" + ) + + var ( + token *fleet.ClusterRegistrationToken + status fleet.ClusterRegistrationTokenStatus + saCache *fake.MockCacheInterface[*corev1.ServiceAccount] + tokenController *fake.MockControllerInterface[*fleet.ClusterRegistrationToken, *fleet.ClusterRegistrationTokenList] + h *handler + notFound = apierrors.NewNotFound(schema.GroupResource{}, "") + ) + + BeforeEach(func() { + ctrl := gomock.NewController(GinkgoT()) + saCache = fake.NewMockCacheInterface[*corev1.ServiceAccount](ctrl) + tokenController = fake.NewMockControllerInterface[*fleet.ClusterRegistrationToken, *fleet.ClusterRegistrationTokenList](ctrl) + + h = &handler{ + systemNamespace: systemNamespace, + systemRegistrationNamespace: systemRegistrationNamespace, + serviceAccountCache: saCache, + clusterRegistrationTokens: tokenController, + } + + token = &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "import-token-local", + Namespace: "fleet-default", + CreationTimestamp: metav1.Now(), + }, + Spec: fleet.ClusterRegistrationTokenSpec{ + TTL: &metav1.Duration{Duration: 24 * time.Hour}, + }, + } + status = fleet.ClusterRegistrationTokenStatus{} + }) + + Describe("no broad secret-read Role is granted", func() { + BeforeEach(func() { + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) + // deleteExpired enqueues a re-check when the TTL has not yet elapsed. + tokenController.EXPECT().EnqueueAfter(gomock.Any(), gomock.Any(), gomock.Any()) + }) + + It("does not return a Role that grants access to all secrets in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + + for _, obj := range objs { + role, ok := obj.(*rbacv1.Role) + if !ok || role.Namespace != systemRegistrationNamespace { + continue + } + for _, rule := range role.Rules { + for _, resource := range rule.Resources { + if resource == "secrets" { + Expect(rule.ResourceNames).NotTo(BeEmpty(), + "found a Role in %s granting access to all secrets", + systemRegistrationNamespace) + } + } + } + } + }) + + It("returns exactly 3 objects: ServiceAccount, Role for clusterregistrations, and RoleBinding", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(3)) + + kinds := map[string]int{} + for _, obj := range objs { + switch obj.(type) { + case *corev1.ServiceAccount: + kinds["ServiceAccount"]++ + case *rbacv1.Role: + kinds["Role"]++ + case *rbacv1.RoleBinding: + kinds["RoleBinding"]++ + } + } + Expect(kinds["ServiceAccount"]).To(Equal(1)) + Expect(kinds["Role"]).To(Equal(1)) + Expect(kinds["RoleBinding"]).To(Equal(1)) + }) + + It("does not create any object in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + + for _, obj := range objs { + if m, ok := obj.(metav1.Object); ok { + Expect(m.GetNamespace()).NotTo(Equal(systemRegistrationNamespace), + "handler must not create objects in %s; scoped RBAC is the clusterregistration controller's responsibility", + systemRegistrationNamespace) + } + } + }) + }) + + Describe("tokens with no TTL are rejected when enforceTTL is enabled", func() { + BeforeEach(func() { + h.enforceTTL = true + token.Spec.TTL = nil + }) + + It("deletes the token and returns no error (no retry)", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(nil) + + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(BeNil()) + }) + + It("returns no error even when deletion fails with NotFound (already gone)", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(notFound) + + _, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + }) + + It("returns a deletion error when the API call fails", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(errors.New("server unavailable")) + + _, _, err := h.OnChange(token, status) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("deleting ClusterRegistrationToken")) + }) + + It("also rejects a token whose TTL duration is zero, without returning an error", func() { + token.Spec.TTL = &metav1.Duration{Duration: 0} + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(nil) + + _, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + }) + }) + + Describe("tokens with no TTL are accepted when enforceTTL is disabled", func() { + BeforeEach(func() { + h.enforceTTL = false + token.Spec.TTL = nil + }) + + It("does not delete or reject the token", func() { + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) + + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(3)) + }) + }) +}) diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go new file mode 100644 index 0000000000..866759d823 --- /dev/null +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go @@ -0,0 +1,13 @@ +package clusterregistrationtoken + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestFleet(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "ClusterRegistrationToken Handler Suite") +} diff --git a/internal/cmd/controller/agentmanagement/controllers/controllers.go b/internal/cmd/controller/agentmanagement/controllers/controllers.go index b930ab57eb..1500d0fb54 100644 --- a/internal/cmd/controller/agentmanagement/controllers/controllers.go +++ b/internal/cmd/controller/agentmanagement/controllers/controllers.go @@ -55,7 +55,7 @@ func (a *AppContext) Start(ctx context.Context) error { return start.All(ctx, 50, a.starters...) } -func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, disableBootstrap bool) error { +func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, disableBootstrap bool, enforceTTL bool) error { systemRegistrationNamespace := fleetns.SystemRegistrationNamespace(systemNamespace) // config should be registered first to ensure the global @@ -120,11 +120,13 @@ func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, d appCtx.RBAC.Role(), appCtx.RBAC.RoleBinding(), appCtx.ClusterRegistration(), - appCtx.Cluster()) + appCtx.Cluster(), + appCtx.ClusterRegistrationToken()) clusterregistrationtoken.Register(ctx, systemNamespace, systemRegistrationNamespace, + enforceTTL, appCtx.Apply.WithCacheTypes( appCtx.Core.Secret(), appCtx.Core.ServiceAccount(), diff --git a/internal/cmd/controller/agentmanagement/root.go b/internal/cmd/controller/agentmanagement/root.go index 753059fa1c..49694d692d 100644 --- a/internal/cmd/controller/agentmanagement/root.go +++ b/internal/cmd/controller/agentmanagement/root.go @@ -45,7 +45,9 @@ func (a *AgentManagement) Run(cmd *cobra.Command, args []string) error { if a.Namespace == "" { return errors.New("--namespace or env NAMESPACE is required to be set") } - return start(cmd.Context(), a.Kubeconfig, a.Namespace, a.DisableBootstrap) + enforceTTL, _ := strconv.ParseBool(os.Getenv("FLEET_REGISTRATION_TOKEN_TTL_REQUIRED")) + + return start(cmd.Context(), a.Kubeconfig, a.Namespace, a.DisableBootstrap, enforceTTL) } func App() *cobra.Command { diff --git a/internal/cmd/controller/agentmanagement/start.go b/internal/cmd/controller/agentmanagement/start.go index 48eebbb12e..d66a1ea4b1 100644 --- a/internal/cmd/controller/agentmanagement/start.go +++ b/internal/cmd/controller/agentmanagement/start.go @@ -18,7 +18,7 @@ import ( "k8s.io/client-go/rest" ) -func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap bool) error { +func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap bool, enforceTTL bool) error { clientConfig := kubeconfig.GetNonInteractiveClientConfig(kubeConfig) kc, err := clientConfig.ClientConfig() if err != nil { @@ -51,7 +51,7 @@ func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap b if err != nil { logrus.Fatal(err) } - if err := controllers.Register(ctx, appCtx, namespace, disableBootstrap); err != nil { + if err := controllers.Register(ctx, appCtx, namespace, disableBootstrap, enforceTTL); err != nil { logrus.Fatal(err) } if err := appCtx.Start(ctx); err != nil { diff --git a/internal/config/config.go b/internal/config/config.go index 7256bf536c..abad38a649 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -28,6 +28,9 @@ const ( // contains the controller and agent DefaultNamespace = "cattle-fleet-system" LegacyDefaultNamespace = "fleet-system" + // ImportTokenPrefix is the prefix for ClusterRegistrationToken names + // created during manager-initiated cluster imports. + ImportTokenPrefix = "import-token-" // ImportTokenSecretValuesKey is the key in the import token secret, // which contains the values for cluster registration. ImportTokenSecretValuesKey = "values" diff --git a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go index 8160ea7d1e..5635243cb2 100644 --- a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go +++ b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go @@ -32,6 +32,11 @@ var ( // ClusterRegistrationTokenAnnotation is the namespace of the // clusterregistration, e.g. "fleet-local". ClusterRegistrationNamespaceAnnotation = "fleet.cattle.io/cluster-registration-namespace" + // RegistrationTokenLabel is set on a ClusterRegistration to record which + // ClusterRegistrationToken was used. The clusterregistration controller + // reads this label to grant only that token's SA scoped read access to + // the resulting credential secret. + RegistrationTokenLabel = "fleet.cattle.io/registration-token" // ManagedLabel is used for clean up. Cluster namespaces and other // resources with this label will be cleaned up. Used in Rancher to // identify fleet namespaces.