Skip to content

Commit 66e6b19

Browse files
committed
Feat: Add support for GKETenantControllerManager
1 parent 0c41597 commit 66e6b19

28 files changed

+1657
-319
lines changed

cmd/cloud-controller-manager/BUILD

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,36 @@ go_library(
2020
srcs = [
2121
"gkenetworkparamsetcontroller.go",
2222
"gkeservicecontroller.go",
23+
"gketenantcontrollermanager.go",
2324
"main.go",
2425
"nodeipamcontroller.go",
2526
],
2627
importpath = "k8s.io/cloud-provider-gcp/cmd/cloud-controller-manager",
2728
deps = [
2829
"//cmd/cloud-controller-manager/options",
2930
"//pkg/controller/gkenetworkparamset",
31+
"//pkg/controller/gketenantcontrollers",
3032
"//pkg/controller/nodeipam",
3133
"//pkg/controller/nodeipam/config",
3234
"//pkg/controller/nodeipam/ipam",
3335
"//pkg/controller/service",
3436
"//providers/gce",
37+
"//vendor/github.com/GoogleCloudPlatform/gke-enterprise-mt/apis/providerconfig/v1:providerconfig",
38+
"//vendor/github.com/GoogleCloudPlatform/gke-enterprise-mt/pkg/framework",
3539
"//vendor/github.com/GoogleCloudPlatform/gke-networking-api/client/network/clientset/versioned",
3640
"//vendor/github.com/GoogleCloudPlatform/gke-networking-api/client/network/informers/externalversions",
3741
"//vendor/github.com/GoogleCloudPlatform/gke-networking-api/client/nodetopology/clientset/versioned",
3842
"//vendor/github.com/spf13/pflag",
43+
"//vendor/k8s.io/apimachinery/pkg/runtime/schema",
3944
"//vendor/k8s.io/apimachinery/pkg/util/wait",
4045
"//vendor/k8s.io/apiserver/pkg/util/feature",
46+
"//vendor/k8s.io/client-go/dynamic",
47+
"//vendor/k8s.io/client-go/dynamic/dynamicinformer",
4148
"//vendor/k8s.io/cloud-provider",
4249
"//vendor/k8s.io/cloud-provider/app",
4350
"//vendor/k8s.io/cloud-provider/app/config",
51+
"//vendor/k8s.io/cloud-provider/controllers/node",
52+
"//vendor/k8s.io/cloud-provider/controllers/nodelifecycle",
4453
"//vendor/k8s.io/cloud-provider/names",
4554
"//vendor/k8s.io/cloud-provider/options",
4655
"//vendor/k8s.io/component-base/cli/flag",
@@ -51,7 +60,6 @@ go_library(
5160
"//vendor/k8s.io/controller-manager/controller",
5261
"//vendor/k8s.io/klog/v2:klog",
5362
"//vendor/k8s.io/kubernetes/cmd/kube-controller-manager/names",
54-
"//vendor/k8s.io/utils/net",
5563
],
5664
)
5765

@@ -70,9 +78,13 @@ go_test(
7078
"//pkg/controller/nodeipam/config",
7179
"//pkg/controller/service",
7280
"//vendor/k8s.io/api/core/v1:core",
81+
"//vendor/k8s.io/client-go/informers",
82+
"//vendor/k8s.io/client-go/kubernetes/fake",
83+
"//vendor/k8s.io/client-go/rest",
7384
"//vendor/k8s.io/cloud-provider",
7485
"//vendor/k8s.io/cloud-provider/app/config",
7586
"//vendor/k8s.io/cloud-provider/config",
7687
"//vendor/k8s.io/controller-manager/app",
88+
"//vendor/k8s.io/controller-manager/pkg/clientbuilder",
7789
],
7890
)

cmd/cloud-controller-manager/gkenetworkparamsetcontroller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
networkinformers "github.com/GoogleCloudPlatform/gke-networking-api/client/network/informers/externalversions"
1111
cloudprovider "k8s.io/cloud-provider"
1212
gkenetworkparamsetcontroller "k8s.io/cloud-provider-gcp/pkg/controller/gkenetworkparamset"
13+
nodeipam "k8s.io/cloud-provider-gcp/pkg/controller/nodeipam"
1314
"k8s.io/cloud-provider-gcp/pkg/controller/nodeipam/ipam"
1415
"k8s.io/cloud-provider-gcp/providers/gce"
1516
"k8s.io/cloud-provider/app"
@@ -73,7 +74,7 @@ func startGkeNetworkParamsController(ccmConfig *cloudcontrollerconfig.CompletedC
7374
// with stack type and returns a list of typed cidrs and error
7475
func validClusterCIDR(clusterCIDRFromFlag string) ([]*net.IPNet, error) {
7576
// failure: bad cidrs in config
76-
clusterCIDRs, dualStack, err := processCIDRs(clusterCIDRFromFlag)
77+
clusterCIDRs, dualStack, err := nodeipam.ProcessCIDRs(clusterCIDRFromFlag)
7778
if err != nil {
7879
return nil, err
7980
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
providerconfigv1 "github.com/GoogleCloudPlatform/gke-enterprise-mt/apis/providerconfig/v1"
8+
"github.com/GoogleCloudPlatform/gke-enterprise-mt/pkg/framework"
9+
networkclientset "github.com/GoogleCloudPlatform/gke-networking-api/client/network/clientset/versioned"
10+
networkinformers "github.com/GoogleCloudPlatform/gke-networking-api/client/network/informers/externalversions"
11+
topologyclientset "github.com/GoogleCloudPlatform/gke-networking-api/client/nodetopology/clientset/versioned"
12+
"k8s.io/apimachinery/pkg/runtime/schema"
13+
"k8s.io/client-go/dynamic"
14+
dynamicinformer "k8s.io/client-go/dynamic/dynamicinformer"
15+
cloudprovider "k8s.io/cloud-provider"
16+
nodeipamcontrolleroptions "k8s.io/cloud-provider-gcp/cmd/cloud-controller-manager/options"
17+
"k8s.io/cloud-provider-gcp/pkg/controller/gketenantcontrollers"
18+
nodeipamconfig "k8s.io/cloud-provider-gcp/pkg/controller/nodeipam/config"
19+
"k8s.io/cloud-provider/app"
20+
cloudcontrollerconfig "k8s.io/cloud-provider/app/config"
21+
controllermanagerapp "k8s.io/controller-manager/app"
22+
"k8s.io/controller-manager/controller"
23+
"k8s.io/klog/v2"
24+
25+
"k8s.io/cloud-provider-gcp/pkg/controller/nodeipam"
26+
"k8s.io/cloud-provider-gcp/pkg/controller/nodeipam/ipam"
27+
"k8s.io/cloud-provider/controllers/node"
28+
"k8s.io/cloud-provider/controllers/nodelifecycle"
29+
)
30+
31+
// startGKETenantControllerManagerWrapper is used to take cloud config as input and start the GKE TenantControllerManager controller
32+
func startGKETenantControllerManagerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface, nodeIPAMControllerOptions nodeipamcontrolleroptions.NodeIPAMControllerOptions) app.InitFunc {
33+
return func(ctx context.Context, controllerContext controllermanagerapp.ControllerContext) (controller.Interface, bool, error) {
34+
return startGKETenantControllerManager(ctx, initContext, controllerContext, completedConfig, cloud, *nodeIPAMControllerOptions.NodeIPAMControllerConfiguration)
35+
}
36+
}
37+
38+
func startGKETenantControllerManager(ctx context.Context, initContext app.ControllerInitContext, controlexContext controllermanagerapp.ControllerContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration) (controller.Interface, bool, error) {
39+
if !enableProviderConfigController {
40+
klog.Infof("GKE Tenant Controller Manager is disabled (enable with --enable-provider-config-controller)")
41+
return nil, false, nil
42+
}
43+
44+
clientConfig := completedConfig.Kubeconfig
45+
46+
// Create network clients and informers
47+
networkClient, err := networkclientset.NewForConfig(clientConfig)
48+
if err != nil {
49+
klog.Errorf("Failed to create network client: %v", err)
50+
return nil, false, err
51+
}
52+
networkInformerFactory := networkinformers.NewSharedInformerFactory(networkClient, 12*time.Hour)
53+
networkInformer := networkInformerFactory.Networking().V1().Networks()
54+
gnpInformer := networkInformerFactory.Networking().V1().GKENetworkParamSets()
55+
56+
// Create topology client
57+
nodeTopologyClient, err := topologyclientset.NewForConfig(clientConfig)
58+
if err != nil {
59+
klog.Errorf("Failed to create topology client: %v", err)
60+
return nil, false, err
61+
}
62+
63+
// Eagerly request the main Node informer so the SharedInformerFactory starts it.
64+
// If the tenant controller manager is the only enabled controller, the informer
65+
// factory won't start the node cache unless we explicitly ask for it here.
66+
_ = controlexContext.InformerFactory.Core().V1().Nodes().Informer()
67+
68+
// Create dynamic client for framework
69+
dynamicClient, err := dynamic.NewForConfig(clientConfig)
70+
if err != nil {
71+
klog.Errorf("Failed to create dynamic client: %v", err)
72+
return nil, false, err
73+
}
74+
75+
// Create dynamic informer factory for ProviderConfig
76+
gvr := schema.GroupVersionResource{
77+
Group: providerconfigv1.GroupName,
78+
Version: providerconfigv1.SchemeGroupVersion.Version,
79+
Resource: "providerconfigs",
80+
}
81+
dynamicInformerFactory := dynamicinformer.NewDynamicSharedInformerFactory(dynamicClient, 12*time.Hour)
82+
providerConfigInformer := dynamicInformerFactory.ForResource(gvr).Informer()
83+
84+
// Define controllers
85+
controllers := map[string]gketenantcontrollers.ControllerStartFunc{
86+
"node-controller": func(cfg *gketenantcontrollers.ControllerConfig) error {
87+
klog.Infof("Creating OSS Cloud Node Controller for %s...", cfg.ProviderConfig.Name)
88+
nodeController, err := node.NewCloudNodeController(
89+
cfg.NodeInformer,
90+
cfg.KubeClient,
91+
cfg.Cloud,
92+
completedConfig.ComponentConfig.NodeStatusUpdateFrequency.Duration,
93+
completedConfig.ComponentConfig.NodeController.ConcurrentNodeSyncs,
94+
)
95+
if err != nil {
96+
return err
97+
}
98+
klog.Infof("Starting OSS Cloud Node Controller for %s (blocking)", cfg.ProviderConfig.Name)
99+
nodeController.Run(cfg.Context.Done(), cfg.ControllerContext.ControllerManagerMetrics)
100+
return nil
101+
},
102+
"node-ipam-controller": func(cfg *gketenantcontrollers.ControllerConfig) error {
103+
klog.Infof("Starting Node IPAM Controller for %s...", cfg.ProviderConfig.Name)
104+
clusterCIDR, err := gketenantcontrollers.GetClusterCIDRsFromProviderConfig(cfg.ProviderConfig)
105+
if err != nil {
106+
klog.Errorf("Failed to get ClusterCIDRs from ProviderConfig: %v. Node IPAM Controller will be disabled.", err)
107+
return nil // Don't fail the whole start
108+
}
109+
110+
_, started, err := nodeipam.StartNodeIpamController(
111+
cfg.Context,
112+
cfg.NodeInformer,
113+
cfg.KubeClient,
114+
cfg.Cloud,
115+
clusterCIDR,
116+
completedConfig.ComponentConfig.KubeCloudShared.AllocateNodeCIDRs,
117+
nodeIPAMConfig.ServiceCIDR,
118+
nodeIPAMConfig.SecondaryServiceCIDR,
119+
nodeIPAMConfig,
120+
networkInformer,
121+
gnpInformer,
122+
nodeTopologyClient,
123+
ipam.CIDRAllocatorType(completedConfig.ComponentConfig.KubeCloudShared.CIDRAllocatorType),
124+
cfg.ControllerContext.ControllerManagerMetrics,
125+
)
126+
if err != nil {
127+
return err
128+
}
129+
if !started {
130+
klog.Infof("Node IPAM Controller not started (disabled in config) for %s", cfg.ProviderConfig.Name)
131+
} else {
132+
klog.Infof("Node IPAM Controller started with ClusterCIDR: %s for %s", clusterCIDR, cfg.ProviderConfig.Name)
133+
}
134+
// Block until context is canceled so starter doesn't exit early
135+
<-cfg.Context.Done()
136+
return nil
137+
},
138+
"node-lifecycle-controller": func(cfg *gketenantcontrollers.ControllerConfig) error {
139+
klog.Infof("Creating Node Lifecycle Controller for %s...", cfg.ProviderConfig.Name)
140+
nodeMonitorPeriod := completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration
141+
lifecycleController, err := nodelifecycle.NewCloudNodeLifecycleController(
142+
cfg.NodeInformer,
143+
cfg.KubeClient,
144+
cfg.Cloud,
145+
nodeMonitorPeriod,
146+
)
147+
if err != nil {
148+
return err
149+
}
150+
klog.Infof("Starting Node Lifecycle Controller for %s...", cfg.ProviderConfig.Name)
151+
lifecycleController.Run(cfg.Context, cfg.ControllerContext.ControllerManagerMetrics)
152+
return nil
153+
},
154+
}
155+
156+
// Create the starter
157+
starter := gketenantcontrollers.NewNodeControllerStarter(
158+
completedConfig.ClientBuilder,
159+
completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName),
160+
controlexContext.InformerFactory,
161+
completedConfig,
162+
controlexContext,
163+
controllers,
164+
)
165+
166+
// Create the framework manager
167+
mgr := framework.New(
168+
dynamicClient,
169+
providerConfigInformer,
170+
gkeTenantControllerManagerName,
171+
starter,
172+
ctx.Done(),
173+
)
174+
175+
// Start network informers
176+
networkInformerFactory.Start(ctx.Done())
177+
// Start dynamic informers
178+
dynamicInformerFactory.Start(ctx.Done())
179+
180+
// Run the manager
181+
go mgr.Run()
182+
183+
return nil, true, nil
184+
}

cmd/cloud-controller-manager/main.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import (
2828
"k8s.io/apimachinery/pkg/util/wait"
2929
cloudprovider "k8s.io/cloud-provider"
3030
"k8s.io/cloud-provider/app"
31-
"k8s.io/cloud-provider/app/config"
31+
cloudcontrollerconfig "k8s.io/cloud-provider/app/config"
3232
"k8s.io/cloud-provider/names"
3333
"k8s.io/cloud-provider/options"
3434
cliflag "k8s.io/component-base/cli/flag"
@@ -43,9 +43,12 @@ import (
4343
)
4444

4545
const (
46-
gkeServiceLBControllerName = "gke-service-lb-controller"
47-
gkeServiceControllerClientName = "gke-service-controller"
48-
gkeServiceAlias = "gke-service"
46+
gkeServiceLBControllerName = "gke-service-lb-controller"
47+
gkeServiceControllerClientName = "gke-service-controller"
48+
gkeServiceAlias = "gke-service"
49+
gkeTenantControllerManagerName = "gke-tenant-controller-manager"
50+
gkeTenantControllerClientName = "gke-tenant-controller-manager"
51+
gkeTenantControllerManagerAlias = "gke-tenant-controller-manager"
4952
)
5053

5154
var (
@@ -77,6 +80,9 @@ var (
7780
// The reason for it not being enabled by default is the additional GCE API calls that are made
7881
// for checking if the deny firewalls exist/deletion which will eat up the quota unnecessarily.
7982
enableL4DenyFirewallRollbackCleanup bool
83+
84+
// enableProviderConfigController enables the gke-tenant-controller-manager.
85+
enableProviderConfigController bool
8086
)
8187

8288
func main() {
@@ -99,6 +105,7 @@ func main() {
99105
cloudProviderFS.BoolVar(&enableL4LBAnnotations, "enable-l4-lb-annotations", false, "Enables Annotations for GCE L4 LB Services")
100106
cloudProviderFS.BoolVar(&enableL4DenyFirewall, "enable-l4-deny-firewall", false, "Enable creation and updates of Deny VPC Firewall Rules for L4 external load balancers. Requires --enable-pinhole and --enable-l4-deny-firewall-rollback-cleanup to be true.")
101107
cloudProviderFS.BoolVar(&enableL4DenyFirewallRollbackCleanup, "enable-l4-deny-firewall-rollback-cleanup", false, "Enable cleanup codepath of the deny firewalls for rollback. The reason for it not being enabled by default is the additional GCE API calls that are made for checking if the deny firewalls exist/deletion which will eat up the quota unnecessarily.")
108+
cloudProviderFS.BoolVar(&enableProviderConfigController, "enable-provider-config-controller", false, "Enables the GKE Tenant Controller Manager for Multi-Tenancy.")
102109

103110
// add new controllers and initializers
104111
nodeIpamController := nodeIPAMController{}
@@ -119,12 +126,25 @@ func main() {
119126
Constructor: startGkeServiceControllerWrapper,
120127
}
121128

129+
controllerInitializers[gkeTenantControllerManagerName] = app.ControllerInitFuncConstructor{
130+
InitContext: app.ControllerInitContext{
131+
ClientName: gkeTenantControllerClientName,
132+
},
133+
Constructor: func(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc {
134+
return startGKETenantControllerManagerWrapper(initContext, completedConfig, cloud, nodeIpamController.nodeIPAMControllerOptions)
135+
},
136+
}
137+
122138
// add controllers disabled by default
123139
app.ControllersDisabledByDefault.Insert("gkenetworkparamset")
124140
app.ControllersDisabledByDefault.Insert(gkeServiceLBControllerName)
141+
app.ControllersDisabledByDefault.Insert(gkeTenantControllerManagerName)
142+
125143
aliasMap := names.CCMControllerAliases()
126144
aliasMap["nodeipam"] = kcmnames.NodeIpamController
127145
aliasMap[gkeServiceAlias] = gkeServiceLBControllerName
146+
aliasMap[gkeTenantControllerManagerAlias] = gkeTenantControllerManagerName
147+
128148
command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, aliasMap, fss, wait.NeverStop)
129149

130150
logs.InitLogs()
@@ -135,7 +155,7 @@ func main() {
135155
}
136156
}
137157

138-
func cloudInitializer(config *config.CompletedConfig) cloudprovider.Interface {
158+
func cloudInitializer(config *cloudcontrollerconfig.CompletedConfig) cloudprovider.Interface {
139159
cloudConfig := config.ComponentConfig.KubeCloudShared.CloudProvider
140160

141161
// initialize cloud provider with the cloud provider name and config file provided
@@ -155,12 +175,9 @@ func cloudInitializer(config *config.CompletedConfig) cloudprovider.Interface {
155175
}
156176
}
157177

158-
if enableMultiProject {
178+
if !enableProviderConfigController && enableMultiProject {
159179
gceCloud, ok := (cloud).(*gce.Cloud)
160180
if !ok {
161-
// Fail-fast: If enableMultiProject is set, the cloud provider MUST
162-
// be GCE. A non-GCE provider indicates a misconfiguration. Ideally,
163-
// we never expect this to be executed.
164181
klog.Fatalf("multi-project mode requires GCE cloud provider, but got %T", cloud)
165182
}
166183
gceCloud.SetProjectFromNodeProviderID(true)

0 commit comments

Comments
 (0)