diff --git a/.github/workflows/e2e-multicluster-ci.yml b/.github/workflows/e2e-multicluster-ci.yml index ae8f1084eb..ada104faf8 100644 --- a/.github/workflows/e2e-multicluster-ci.yml +++ b/.github/workflows/e2e-multicluster-ci.yml @@ -144,7 +144,8 @@ jobs: --set apiServerCA="$ca" \ --set apiServerURL="https://$apiServerIP:6443" \ --set clusterNamespace="fleet-default" \ - --set token="$token" + --set token="$token" \ + --set tokenName="second-token" echo "waiting for downstream cluster to be registered..." { grep -q -m 1 "1/1"; kill $!; } < <(kubectl get cluster -n fleet-default -w) diff --git a/charts/fleet-agent/templates/secret.yaml b/charts/fleet-agent/templates/secret.yaml index 4715882047..cf40ba87e9 100644 --- a/charts/fleet-agent/templates/secret.yaml +++ b/charts/fleet-agent/templates/secret.yaml @@ -3,6 +3,7 @@ data: systemRegistrationNamespace: "{{b64enc .Values.systemRegistrationNamespace}}" clusterNamespace: "{{b64enc .Values.clusterNamespace}}" token: "{{b64enc .Values.token}}" + tokenName: "{{b64enc .Values.tokenName}}" apiServerURL: "{{b64enc .Values.apiServerURL}}" apiServerCA: "{{b64enc .Values.apiServerCA}}" kind: Secret diff --git a/charts/fleet-agent/templates/validate.yaml b/charts/fleet-agent/templates/validate.yaml index 5333818183..534f7b4788 100644 --- a/charts/fleet-agent/templates/validate.yaml +++ b/charts/fleet-agent/templates/validate.yaml @@ -9,3 +9,7 @@ {{if not .Values.apiServerURL }} {{ fail "apiServerURL is required to be set, and most likely also apiServerCA" }} {{end}} + +{{if not .Values.tokenName }} +{{ fail "tokenName is required to be set to the name of the ClusterRegistrationToken used for this registration" }} +{{end}} diff --git a/charts/fleet-agent/values.yaml b/charts/fleet-agent/values.yaml index d25b5d9ad9..569a80cd12 100644 --- a/charts/fleet-agent/values.yaml +++ b/charts/fleet-agent/values.yaml @@ -28,6 +28,9 @@ noProxy: 127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.cluster.local # The cluster registration value token: "" +# The name of the ClusterRegistrationToken used for this registration +tokenName: "" + # Labels to add to the cluster upon registration only. They are not added after the fact. # labels: # foo: bar diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index 724d70d43e..dee30c316d 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -227,6 +227,8 @@ spec: - name: FLEET_AGENT_ELECTION_RENEW_DEADLINE value: {{$.Values.agent.leaderElection.renewDeadline}} {{- end }} + - name: FLEET_REGISTRATION_TOKEN_TTL_REQUIRED + value: {{ quote $.Values.controller.registrationTokenTTL.required }} {{- if $.Values.extraEnv }} {{ toYaml $.Values.extraEnv | indent 8}} {{- end }} diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index ca2d807476..554dde084d 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -148,6 +148,8 @@ leaderElection: ## Fleet controller configuration controller: replicas: 1 + registrationTokenTTL: + required: false reconciler: # The number of workers that are allowed to each type of reconciler workers: diff --git a/internal/cmd/agent/register/register.go b/internal/cmd/agent/register/register.go index 5ae2e23f59..13382e4b23 100644 --- a/internal/cmd/agent/register/register.go +++ b/internal/cmd/agent/register/register.go @@ -33,6 +33,7 @@ const ( CredName = "fleet-agent" // same as AgentConfigName Kubeconfig = "kubeconfig" Token = "token" + TokenName = "tokenName" Values = "values" DeploymentNamespace = "deploymentNamespace" ClusterNamespace = "clusterNamespace" @@ -187,11 +188,16 @@ func runRegistration(ctx context.Context, k8s coreInterface, namespace string) ( } cfg.Labels["fleet.cattle.io/created-by-agent-pod"] = os.Getenv("HOSTNAME") + tokenName := string(values(secret.Data)[TokenName]) + logrus.Infof("Creating clusterregistration with id '%s' for new token", clientID) request, err := fc.Fleet().V1alpha1().ClusterRegistration().Create(&fleet.ClusterRegistration{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "request-", Namespace: ns, + Labels: map[string]string{ + fleet.RegistrationTokenLabel: tokenName, + }, }, Spec: fleet.ClusterRegistrationSpec{ ClientID: clientID, diff --git a/internal/cmd/controller/agentmanagement/controllers/cluster/import.go b/internal/cmd/controller/agentmanagement/controllers/cluster/import.go index bc5a579cdf..6720edf3cb 100644 --- a/internal/cmd/controller/agentmanagement/controllers/cluster/import.go +++ b/internal/cmd/controller/agentmanagement/controllers/cluster/import.go @@ -48,8 +48,6 @@ const ( ) var ( - ImportTokenPrefix = "import-token-" - errUnavailableAPIServerURL = errors.New("missing apiServerURL in fleet config for cluster auto registration") ) @@ -356,7 +354,7 @@ func (i *importHandler) importCluster(cluster *fleet.Cluster, status fleet.Clust setID := desiredset.GetSetID(config.AgentBootstrapConfigName, "", cluster.Spec.AgentNamespace) apply = apply.WithDynamicLookup().WithSetID(setID).WithNoDeleteGVK(fleetns.GVK()) - tokenName := names.SafeConcatName(ImportTokenPrefix + cluster.Name) + tokenName := names.SafeConcatName(config.ImportTokenPrefix + cluster.Name) token, err := i.tokens.Get(cluster.Namespace, tokenName) if err != nil { // If token doesn't exist, try to create it diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go index d14d229439..bb554c44d5 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller.go @@ -50,6 +50,7 @@ type handler struct { serviceAccountCache corecontrollers.ServiceAccountCache secretsCache corecontrollers.SecretCache secrets corecontrollers.SecretController + tokenCache fleetcontrollers.ClusterRegistrationTokenCache } func Register(ctx context.Context, @@ -61,7 +62,8 @@ func Register(ctx context.Context, role rbaccontrollers.RoleController, roleBinding rbaccontrollers.RoleBindingController, clusterRegistration fleetcontrollers.ClusterRegistrationController, - clusters fleetcontrollers.ClusterController) { + clusters fleetcontrollers.ClusterController, + clusterRegistrationToken fleetcontrollers.ClusterRegistrationTokenController) { h := &handler{ systemNamespace: systemNamespace, systemRegistrationNamespace: systemRegistrationNamespace, @@ -71,6 +73,7 @@ func Register(ctx context.Context, serviceAccountCache: serviceAccount.Cache(), secrets: secret, secretsCache: secret.Cache(), + tokenCache: clusterRegistrationToken.Cache(), } fleetcontrollers.RegisterClusterRegistrationGeneratingHandler(ctx, @@ -252,15 +255,14 @@ func (h *handler) OnChange(request *fleet.ClusterRegistration, status fleet.Clus logrus.Infof("Cluster registration request '%s/%s' granted, creating cluster, request service account, registration secret", request.Namespace, request.Name) - return []runtime.Object{ + objs := []runtime.Object{ // the registration secret c-clientID-clientRandom secret, // Update the existing service account 'request-UID' in the // cluster namespace, e.g. 'cluster-fleet-default-NAME-ID' requestSA(saName, cluster, request), // Add role bindings to manage bundledeployments and contents, - // the agent could previously only access secrets in - // 'cattle-fleet-clusters-system' and clusterregistrations in + // the agent could previously only access clusterregistrations in // the cluster registration namespace (e.g. 'fleet-default'). See // clusterregistrationtoken controller for details. &rbacv1.Role{ @@ -346,7 +348,125 @@ func (h *handler) OnChange(request *fleet.ClusterRegistration, status fleet.Clus Name: resources.ContentClusterRole, }, }, - }, status, nil + } + + // For manager-initiated imports, grant the import service account access + // to the specific credential secret it needs to read. The import token + // only exists in manager-initiated flows; agent-initiated flows skip this. + importTokenName := names.SafeConcatName(config.ImportTokenPrefix + cluster.Name) + importToken, err := h.tokenCache.Get(request.Namespace, importTokenName) + if err != nil && !apierrors.IsNotFound(err) { + return nil, status, fmt.Errorf("failed to look up import token %s/%s: %w", request.Namespace, importTokenName, err) + } + if err == nil { + // Grant access scoped to the single credential secret for this cluster. + importSAName := names.SafeConcatName(importTokenName, string(importToken.UID)) + credRoleName := names.SafeConcatName(importSAName, "creds") + objs = append(objs, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + Verbs: []string{"get"}, + APIGroups: []string{""}, + Resources: []string{"secrets"}, + ResourceNames: []string{secret.Name}, + }, + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: importSAName, + Namespace: request.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: credRoleName, + }, + }, + ) + } else { + // Agent-initiated: no import-token- exists because the cluster + // is auto-created at registration time. The ClusterRegistration carries + // a label identifying the token that was used, so we can grant only + // that token's SA scoped read access to the credential secret. + tokenName := request.Labels[fleet.RegistrationTokenLabel] + if tokenName == "" { + logrus.Warnf("ClusterRegistration '%s/%s' has no %s label; cannot grant credential secret access for agent-initiated registration", + request.Namespace, request.Name, fleet.RegistrationTokenLabel) + } else { + agentToken, tokenErr := h.tokenCache.Get(request.Namespace, tokenName) + if tokenErr != nil && !apierrors.IsNotFound(tokenErr) { + return nil, status, fmt.Errorf("looking up registration token %s/%s: %w", request.Namespace, tokenName, tokenErr) + } + if apierrors.IsNotFound(tokenErr) { + logrus.Warnf("ClusterRegistration '%s/%s': registration token %s/%s not found (may have expired), skipping credential secret access grant", + request.Namespace, request.Name, request.Namespace, tokenName) + return objs, status, nil + } + agentSAName := names.SafeConcatName(tokenName, string(agentToken.UID)) + credRoleName := names.SafeConcatName(request.Name, "creds") + objs = append(objs, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + Verbs: []string{"get"}, + APIGroups: []string{""}, + Resources: []string{"secrets"}, + ResourceNames: []string{secret.Name}, + }, + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: credRoleName, + Namespace: h.systemRegistrationNamespace, + Labels: map[string]string{ + fleet.ManagedLabel: "true", + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: agentSAName, + Namespace: request.Namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: credRoleName, + }, + }, + ) + } + } + + return objs, status, nil } // shouldDelete returns true for any other cluster registration with the same clientID, but different random and older creation timestamp diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go index b3dc0bac1c..2499f94f04 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistration/controller_test.go @@ -7,10 +7,13 @@ import ( "github.com/rancher/wrangler/v3/pkg/generic/fake" "go.uber.org/mock/gomock" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "github.com/rancher/fleet/internal/names" fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -30,6 +33,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterClient *fake.MockClientInterface[*fleet.Cluster, *fleet.ClusterList] clusterRegistrationController *fake.MockControllerInterface[*fleet.ClusterRegistration, *fleet.ClusterRegistrationList] clusterCache *fake.MockCacheInterface[*fleet.Cluster] + tokenCache *fake.MockCacheInterface[*fleet.ClusterRegistrationToken] h *handler notFound = apierrors.NewNotFound(schema.GroupResource{}, "") anError = errors.New("an error occurred") @@ -43,6 +47,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterClient = fake.NewMockClientInterface[*fleet.Cluster, *fleet.ClusterList](ctrl) clusterRegistrationController = fake.NewMockControllerInterface[*fleet.ClusterRegistration, *fleet.ClusterRegistrationList](ctrl) clusterCache = fake.NewMockCacheInterface[*fleet.Cluster](ctrl) + tokenCache = fake.NewMockCacheInterface[*fleet.ClusterRegistrationToken](ctrl) h = &handler{ systemNamespace: "fleet-system", @@ -53,6 +58,7 @@ var _ = Describe("ClusterRegistration OnChange", func() { secretsCache: secretCache, secrets: secretController, serviceAccountCache: saCache, + tokenCache: tokenCache, } }) @@ -232,17 +238,198 @@ var _ = Describe("ClusterRegistration OnChange", func() { clusterRegistrationController.EXPECT().Update(gomock.Any()).Return(&fleet.ClusterRegistration{}, nil) }) - Context("grants registration, cleans up and creates objects", func() { + Context("agent-initiated flow: no import token found, RegistrationTokenLabel is empty", func() { BeforeEach(func() { + tokenCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) }) - It("creates a new secret", func() { + It("grants registration and creates 6 objects without credential RBAC", func() { objs, newStatus, err := h.OnChange(request, status) Expect(err).ToNot(HaveOccurred()) Expect(objs).To(HaveLen(6)) Expect(newStatus.Granted).To(BeTrue()) }) }) + + Context("manager-initiated flow: import token exists", func() { + BeforeEach(func() { + importToken := &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "import-token-cluster", + Namespace: "fleet-default", + UID: types.UID("test-token-uid"), + }, + } + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(importToken, nil) + }) + + It("grants registration and creates 8 objects including scoped credential RBAC", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(8)) + Expect(newStatus.Granted).To(BeTrue()) + + // Locate the Role created in systemRegistrationNamespace + // to verify access is scoped to a single secret, not wildcard. + var credRole *rbacv1.Role + for _, obj := range objs { + if role, ok := obj.(*rbacv1.Role); ok && role.Namespace == h.systemRegistrationNamespace { + credRole = role + break + } + } + Expect(credRole).NotTo(BeNil(), "expected a credential Role in %s", h.systemRegistrationNamespace) + Expect(credRole.Rules).To(HaveLen(1)) + rule := credRole.Rules[0] + Expect(rule.Resources).To(ConsistOf("secrets")) + // ResourceNames MUST be non-empty + Expect(rule.ResourceNames).NotTo(BeEmpty(), "credential Role must restrict access to a specific secret, not all secrets") + Expect(rule.Verbs).To(ConsistOf("get")) + }) + + It("creates a scoped RoleBinding in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + + var credRoleBinding *rbacv1.RoleBinding + for _, obj := range objs { + if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.Namespace == h.systemRegistrationNamespace { + credRoleBinding = rb + break + } + } + Expect(credRoleBinding).NotTo(BeNil(), "expected a credential RoleBinding in %s", h.systemRegistrationNamespace) + Expect(credRoleBinding.Subjects).To(HaveLen(1)) + Expect(credRoleBinding.Subjects[0].Kind).To(Equal("ServiceAccount")) + }) + }) + + Context("tokenCache returns an unexpected error", func() { + BeforeEach(func() { + tokenCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, anError) + }) + + It("propagates the error", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to look up import token")) + Expect(objs).To(BeNil()) + }) + }) + }) + + When("agent-initiated: RegistrationTokenLabel is set and token exists", func() { + var agentToken *fleet.ClusterRegistrationToken + + BeforeEach(func() { + cluster.Status = fleet.ClusterStatus{Namespace: "fleet-default"} + request.Labels = map[string]string{ + fleet.RegistrationTokenLabel: "my-token", + } + + sa = &corev1.ServiceAccount{} + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(sa, nil) + + secret := &corev1.Secret{Data: map[string][]byte{"token": []byte("secrettoken")}} + secretController.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(secret, nil) + + clusterRegistrationController.EXPECT().List(gomock.Any(), gomock.Any()).Return(&fleet.ClusterRegistrationList{}, nil) + clusterRegistrationController.EXPECT().Update(gomock.Any()).DoAndReturn( + func(cr *fleet.ClusterRegistration) (*fleet.ClusterRegistration, error) { return cr, nil }, + ) + + agentToken = &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-token", + Namespace: "fleet-default", + UID: types.UID("agent-token-uid"), + }, + } + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(nil, notFound) + tokenCache.EXPECT().Get(gomock.Any(), "my-token").Return(agentToken, nil) + }) + + It("grants registration and creates 8 objects including scoped credential RBAC", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(8)) + Expect(newStatus.Granted).To(BeTrue()) + + var credRole *rbacv1.Role + for _, obj := range objs { + if role, ok := obj.(*rbacv1.Role); ok && role.Namespace == h.systemRegistrationNamespace { + credRole = role + break + } + } + Expect(credRole).NotTo(BeNil(), "expected a credential Role in %s", h.systemRegistrationNamespace) + Expect(credRole.Rules).To(HaveLen(1)) + Expect(credRole.Rules[0].Resources).To(ConsistOf("secrets")) + Expect(credRole.Rules[0].ResourceNames).NotTo(BeEmpty(), "credential Role must restrict access to a specific secret") + Expect(credRole.Rules[0].Verbs).To(ConsistOf("get")) + }) + + It("binds the credential RoleBinding to the agent token's service account", func() { + objs, _, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + + var credRoleBinding *rbacv1.RoleBinding + for _, obj := range objs { + if rb, ok := obj.(*rbacv1.RoleBinding); ok && rb.Namespace == h.systemRegistrationNamespace { + credRoleBinding = rb + break + } + } + Expect(credRoleBinding).NotTo(BeNil(), "expected a credential RoleBinding in %s", h.systemRegistrationNamespace) + Expect(credRoleBinding.Subjects).To(HaveLen(1)) + subject := credRoleBinding.Subjects[0] + Expect(subject.Kind).To(Equal("ServiceAccount")) + Expect(subject.Name).To(Equal(names.SafeConcatName("my-token", string(agentToken.UID)))) + Expect(subject.Namespace).To(Equal("fleet-default")) + }) + }) + + When("agent-initiated: RegistrationTokenLabel is set but token has expired", func() { + BeforeEach(func() { + cluster.Status = fleet.ClusterStatus{Namespace: "fleet-default"} + request.Labels = map[string]string{ + fleet.RegistrationTokenLabel: "my-token", + } + + sa = &corev1.ServiceAccount{} + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(sa, nil) + + secret := &corev1.Secret{Data: map[string][]byte{"token": []byte("secrettoken")}} + secretController.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(secret, nil) + + clusterRegistrationController.EXPECT().List(gomock.Any(), gomock.Any()).Return(&fleet.ClusterRegistrationList{}, nil) + clusterRegistrationController.EXPECT().Update(gomock.Any()).DoAndReturn( + func(cr *fleet.ClusterRegistration) (*fleet.ClusterRegistration, error) { return cr, nil }, + ) + + tokenCache.EXPECT().Get(gomock.Any(), "import-token-cluster").Return(nil, notFound) + tokenCache.EXPECT().Get(gomock.Any(), "my-token").Return(nil, notFound) + }) + + It("grants registration with base objects only and no credential RBAC, without error", func() { + objs, newStatus, err := h.OnChange(request, status) + Expect(err).ToNot(HaveOccurred()) + Expect(newStatus.Granted).To(BeTrue()) + Expect(objs).To(HaveLen(6)) + + for _, obj := range objs { + if m, ok := obj.(metav1.Object); ok { + if _, isRole := obj.(*rbacv1.Role); isRole { + Expect(m.GetNamespace()).NotTo(Equal(h.systemRegistrationNamespace), + "no credential Role should be created when the registration token has expired") + } + if _, isRB := obj.(*rbacv1.RoleBinding); isRB { + Expect(m.GetNamespace()).NotTo(Equal(h.systemRegistrationNamespace), + "no credential RoleBinding should be created when the registration token has expired") + } + } + } + }) }) }) }) diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go index 344f0db02a..cd221de5c0 100644 --- a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler.go @@ -5,6 +5,7 @@ package clusterregistrationtoken import ( "context" + "fmt" "time" "github.com/sirupsen/logrus" @@ -30,6 +31,7 @@ import ( type handler struct { systemNamespace string systemRegistrationNamespace string + enforceTTL bool clusterRegistrationTokens fleetcontrollers.ClusterRegistrationTokenController serviceAccountCache corecontrollers.ServiceAccountCache secretsCache corecontrollers.SecretCache @@ -39,6 +41,7 @@ type handler struct { func Register(ctx context.Context, systemNamespace string, systemRegistrationNamespace string, + enforceTTL bool, apply apply.Apply, clusterGroupToken fleetcontrollers.ClusterRegistrationTokenController, serviceAccounts corecontrollers.ServiceAccountController, @@ -48,6 +51,7 @@ func Register(ctx context.Context, h := &handler{ systemNamespace: systemNamespace, systemRegistrationNamespace: systemRegistrationNamespace, + enforceTTL: enforceTTL, clusterRegistrationTokens: clusterGroupToken, serviceAccountCache: serviceAccounts.Cache(), secretsCache: secretsCache, @@ -68,8 +72,20 @@ func Register(ctx context.Context, } func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.ClusterRegistrationTokenStatus) ([]runtime.Object, fleet.ClusterRegistrationTokenStatus, error) { - if gone, err := h.deleteExpired(token); err != nil || gone { - return nil, status, nil //nolint:nilerr // intentionally ignoring error to avoid retry loops on deletion failures + if h.enforceTTL && (token.Spec.TTL == nil || token.Spec.TTL.Duration <= 0) { + logrus.Warnf("ClusterRegistrationToken '%s/%s' has no TTL, deleting", token.Namespace, token.Name) + if err := h.clusterRegistrationTokens.Delete(token.Namespace, token.Name, nil); err != nil && !apierror.IsNotFound(err) { + return nil, status, fmt.Errorf("deleting ClusterRegistrationToken %s/%s with no TTL: %w", token.Namespace, token.Name, err) + } + logrus.Warnf("ClusterRegistrationToken %s/%s rejected: TTL is required", token.Namespace, token.Name) + + return nil, status, nil + } + + if token.Spec.TTL != nil && token.Spec.TTL.Duration > 0 { + if gone, err := h.deleteExpired(token); err != nil || gone { + return nil, status, nil //nolint:nilerr // intentionally ignoring error to avoid retry loops on deletion failures + } } logrus.Debugf("Cluster registration token '%s/%s', creating import service account, roles and secret", token.Namespace, token.Name) @@ -130,9 +146,10 @@ func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.C // Add service account, e.g.: import-token-local in the system // registration namespace. This account is used during registration to - // access secrets in the system registration namespace - // 'cattle-fleet-clusters-system' and clusterregistrations in the - // cluster registration namespace (e.g. 'fleet-default'). + // create clusterregistrations in the cluster registration namespace + // (e.g. 'fleet-default'). Access to secrets in the system registration + // namespace 'cattle-fleet-clusters-system' is granted separately by the + // clusterregistration controller, scoped to the specific credential secret. return append([]runtime.Object{ &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ @@ -180,37 +197,6 @@ func (h *handler) OnChange(token *fleet.ClusterRegistrationToken, status fleet.C Name: names.SafeConcatName(saName, "role"), }, }, - &rbacv1.Role{ - ObjectMeta: metav1.ObjectMeta{ - Name: names.SafeConcatName(saName, "creds"), - Namespace: h.systemRegistrationNamespace, - }, - Rules: []rbacv1.PolicyRule{ - { - Verbs: []string{"get"}, - APIGroups: []string{""}, - Resources: []string{"secrets"}, - }, - }, - }, - &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: names.SafeConcatName(saName, "creds"), - Namespace: h.systemRegistrationNamespace, - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: saName, - Namespace: token.Namespace, - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "Role", - Name: names.SafeConcatName(saName, "creds"), - }, - }, }, secrets...), status, nil } @@ -234,6 +220,7 @@ func (h *handler) clusterRegistrationSecret(token *fleet.ClusterRegistrationToke config.APIServerCAKey: string(config.Get().APIServerCA), "token": string(secret.Data["token"]), // from service account "systemRegistrationNamespace": h.systemRegistrationNamespace, + "tokenName": token.Name, } if h.systemNamespace != config.DefaultNamespace { diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go new file mode 100644 index 0000000000..acced62997 --- /dev/null +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/handler_test.go @@ -0,0 +1,176 @@ +package clusterregistrationtoken + +import ( + "errors" + "time" + + "github.com/rancher/wrangler/v3/pkg/generic/fake" + "go.uber.org/mock/gomock" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + + fleet "github.com/rancher/fleet/pkg/apis/fleet.cattle.io/v1alpha1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ClusterRegistrationToken OnChange", func() { + const ( + systemNamespace = "fleet-system" + systemRegistrationNamespace = "cattle-fleet-clusters-system" + ) + + var ( + token *fleet.ClusterRegistrationToken + status fleet.ClusterRegistrationTokenStatus + saCache *fake.MockCacheInterface[*corev1.ServiceAccount] + tokenController *fake.MockControllerInterface[*fleet.ClusterRegistrationToken, *fleet.ClusterRegistrationTokenList] + h *handler + notFound = apierrors.NewNotFound(schema.GroupResource{}, "") + ) + + BeforeEach(func() { + ctrl := gomock.NewController(GinkgoT()) + saCache = fake.NewMockCacheInterface[*corev1.ServiceAccount](ctrl) + tokenController = fake.NewMockControllerInterface[*fleet.ClusterRegistrationToken, *fleet.ClusterRegistrationTokenList](ctrl) + + h = &handler{ + systemNamespace: systemNamespace, + systemRegistrationNamespace: systemRegistrationNamespace, + serviceAccountCache: saCache, + clusterRegistrationTokens: tokenController, + } + + token = &fleet.ClusterRegistrationToken{ + ObjectMeta: metav1.ObjectMeta{ + Name: "import-token-local", + Namespace: "fleet-default", + CreationTimestamp: metav1.Now(), + }, + Spec: fleet.ClusterRegistrationTokenSpec{ + TTL: &metav1.Duration{Duration: 24 * time.Hour}, + }, + } + status = fleet.ClusterRegistrationTokenStatus{} + }) + + Describe("no broad secret-read Role is granted", func() { + BeforeEach(func() { + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) + // deleteExpired enqueues a re-check when the TTL has not yet elapsed. + tokenController.EXPECT().EnqueueAfter(gomock.Any(), gomock.Any(), gomock.Any()) + }) + + It("does not return a Role that grants access to all secrets in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + + for _, obj := range objs { + role, ok := obj.(*rbacv1.Role) + if !ok || role.Namespace != systemRegistrationNamespace { + continue + } + for _, rule := range role.Rules { + for _, resource := range rule.Resources { + if resource == "secrets" { + Expect(rule.ResourceNames).NotTo(BeEmpty(), + "found a Role in %s granting access to all secrets", + systemRegistrationNamespace) + } + } + } + } + }) + + It("returns exactly 3 objects: ServiceAccount, Role for clusterregistrations, and RoleBinding", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(3)) + + kinds := map[string]int{} + for _, obj := range objs { + switch obj.(type) { + case *corev1.ServiceAccount: + kinds["ServiceAccount"]++ + case *rbacv1.Role: + kinds["Role"]++ + case *rbacv1.RoleBinding: + kinds["RoleBinding"]++ + } + } + Expect(kinds["ServiceAccount"]).To(Equal(1)) + Expect(kinds["Role"]).To(Equal(1)) + Expect(kinds["RoleBinding"]).To(Equal(1)) + }) + + It("does not create any object in systemRegistrationNamespace", func() { + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + + for _, obj := range objs { + if m, ok := obj.(metav1.Object); ok { + Expect(m.GetNamespace()).NotTo(Equal(systemRegistrationNamespace), + "handler must not create objects in %s; scoped RBAC is the clusterregistration controller's responsibility", + systemRegistrationNamespace) + } + } + }) + }) + + Describe("tokens with no TTL are rejected when enforceTTL is enabled", func() { + BeforeEach(func() { + h.enforceTTL = true + token.Spec.TTL = nil + }) + + It("deletes the token and returns no error (no retry)", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(nil) + + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(BeNil()) + }) + + It("returns no error even when deletion fails with NotFound (already gone)", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(notFound) + + _, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + }) + + It("returns a deletion error when the API call fails", func() { + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(errors.New("server unavailable")) + + _, _, err := h.OnChange(token, status) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("deleting ClusterRegistrationToken")) + }) + + It("also rejects a token whose TTL duration is zero, without returning an error", func() { + token.Spec.TTL = &metav1.Duration{Duration: 0} + tokenController.EXPECT().Delete(token.Namespace, token.Name, nil).Return(nil) + + _, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + }) + }) + + Describe("tokens with no TTL are accepted when enforceTTL is disabled", func() { + BeforeEach(func() { + h.enforceTTL = false + token.Spec.TTL = nil + }) + + It("does not delete or reject the token", func() { + saCache.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, notFound) + + objs, _, err := h.OnChange(token, status) + Expect(err).ToNot(HaveOccurred()) + Expect(objs).To(HaveLen(3)) + }) + }) +}) diff --git a/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go new file mode 100644 index 0000000000..866759d823 --- /dev/null +++ b/internal/cmd/controller/agentmanagement/controllers/clusterregistrationtoken/suite_test.go @@ -0,0 +1,13 @@ +package clusterregistrationtoken + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestFleet(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "ClusterRegistrationToken Handler Suite") +} diff --git a/internal/cmd/controller/agentmanagement/controllers/controllers.go b/internal/cmd/controller/agentmanagement/controllers/controllers.go index b930ab57eb..1500d0fb54 100644 --- a/internal/cmd/controller/agentmanagement/controllers/controllers.go +++ b/internal/cmd/controller/agentmanagement/controllers/controllers.go @@ -55,7 +55,7 @@ func (a *AppContext) Start(ctx context.Context) error { return start.All(ctx, 50, a.starters...) } -func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, disableBootstrap bool) error { +func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, disableBootstrap bool, enforceTTL bool) error { systemRegistrationNamespace := fleetns.SystemRegistrationNamespace(systemNamespace) // config should be registered first to ensure the global @@ -120,11 +120,13 @@ func Register(ctx context.Context, appCtx *AppContext, systemNamespace string, d appCtx.RBAC.Role(), appCtx.RBAC.RoleBinding(), appCtx.ClusterRegistration(), - appCtx.Cluster()) + appCtx.Cluster(), + appCtx.ClusterRegistrationToken()) clusterregistrationtoken.Register(ctx, systemNamespace, systemRegistrationNamespace, + enforceTTL, appCtx.Apply.WithCacheTypes( appCtx.Core.Secret(), appCtx.Core.ServiceAccount(), diff --git a/internal/cmd/controller/agentmanagement/root.go b/internal/cmd/controller/agentmanagement/root.go index 753059fa1c..49694d692d 100644 --- a/internal/cmd/controller/agentmanagement/root.go +++ b/internal/cmd/controller/agentmanagement/root.go @@ -45,7 +45,9 @@ func (a *AgentManagement) Run(cmd *cobra.Command, args []string) error { if a.Namespace == "" { return errors.New("--namespace or env NAMESPACE is required to be set") } - return start(cmd.Context(), a.Kubeconfig, a.Namespace, a.DisableBootstrap) + enforceTTL, _ := strconv.ParseBool(os.Getenv("FLEET_REGISTRATION_TOKEN_TTL_REQUIRED")) + + return start(cmd.Context(), a.Kubeconfig, a.Namespace, a.DisableBootstrap, enforceTTL) } func App() *cobra.Command { diff --git a/internal/cmd/controller/agentmanagement/start.go b/internal/cmd/controller/agentmanagement/start.go index 48eebbb12e..d66a1ea4b1 100644 --- a/internal/cmd/controller/agentmanagement/start.go +++ b/internal/cmd/controller/agentmanagement/start.go @@ -18,7 +18,7 @@ import ( "k8s.io/client-go/rest" ) -func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap bool) error { +func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap bool, enforceTTL bool) error { clientConfig := kubeconfig.GetNonInteractiveClientConfig(kubeConfig) kc, err := clientConfig.ClientConfig() if err != nil { @@ -51,7 +51,7 @@ func start(ctx context.Context, kubeConfig, namespace string, disableBootstrap b if err != nil { logrus.Fatal(err) } - if err := controllers.Register(ctx, appCtx, namespace, disableBootstrap); err != nil { + if err := controllers.Register(ctx, appCtx, namespace, disableBootstrap, enforceTTL); err != nil { logrus.Fatal(err) } if err := appCtx.Start(ctx); err != nil { diff --git a/internal/config/config.go b/internal/config/config.go index 7256bf536c..abad38a649 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -28,6 +28,9 @@ const ( // contains the controller and agent DefaultNamespace = "cattle-fleet-system" LegacyDefaultNamespace = "fleet-system" + // ImportTokenPrefix is the prefix for ClusterRegistrationToken names + // created during manager-initiated cluster imports. + ImportTokenPrefix = "import-token-" // ImportTokenSecretValuesKey is the key in the import token secret, // which contains the values for cluster registration. ImportTokenSecretValuesKey = "values" diff --git a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go index 8160ea7d1e..5635243cb2 100644 --- a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go +++ b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go @@ -32,6 +32,11 @@ var ( // ClusterRegistrationTokenAnnotation is the namespace of the // clusterregistration, e.g. "fleet-local". ClusterRegistrationNamespaceAnnotation = "fleet.cattle.io/cluster-registration-namespace" + // RegistrationTokenLabel is set on a ClusterRegistration to record which + // ClusterRegistrationToken was used. The clusterregistration controller + // reads this label to grant only that token's SA scoped read access to + // the resulting credential secret. + RegistrationTokenLabel = "fleet.cattle.io/registration-token" // ManagedLabel is used for clean up. Cluster namespaces and other // resources with this label will be cleaned up. Used in Rancher to // identify fleet namespaces.