Skip to content

Commit d2e2b76

Browse files
committed
feat(occm): support multi region cluster
Currently, it supports only single auth section. Set the regions in config as: [Global] region=REGION1 regions=REGION1 regions=REGION2 regions=REGION3
1 parent d228854 commit d2e2b76

File tree

6 files changed

+178
-41
lines changed

6 files changed

+178
-41
lines changed

docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ The options in `Global` section are used for openstack-cloud-controller-manager
114114
Keystone user password. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
115115
* `region`
116116
Required. Keystone region name.
117+
* `regions`
118+
Optional. Keystone region name, which is used to specify regions for the cloud provider where the instance is running. Region is default region name. Can be specified multiple times.
117119
* `domain-id`
118120
Keystone user domain ID. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
119121
* `domain-name`
@@ -317,7 +319,7 @@ Although the openstack-cloud-controller-manager was initially implemented with N
317319
call](https://docs.openstack.org/api-ref/load-balancer/v2/?expanded=create-a-load-balancer-detail#creating-a-fully-populated-load-balancer).
318320
Setting this option to true will create loadbalancers using serial API calls which first create an unpopulated
319321
loadbalancer, then populate its listeners, pools and members. This is a compatibility option at the expense of
320-
increased load on the OpenStack API. Default: false
322+
increased load on the OpenStack API. Default: false
321323
322324
NOTE:
323325

pkg/client/client.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ type AuthOpts struct {
5353
UserDomainID string `gcfg:"user-domain-id" mapstructure:"user-domain-id" name:"os-userDomainID" value:"optional"`
5454
UserDomainName string `gcfg:"user-domain-name" mapstructure:"user-domain-name" name:"os-userDomainName" value:"optional"`
5555
Region string `name:"os-region"`
56+
Regions []string `name:"os-regions" value:"optional"`
5657
EndpointType gophercloud.Availability `gcfg:"os-endpoint-type" mapstructure:"os-endpoint-type" name:"os-endpointType" value:"optional"`
5758
CAFile string `gcfg:"ca-file" mapstructure:"ca-file" name:"os-certAuthorityPath" value:"optional"`
5859
TLSInsecure string `gcfg:"tls-insecure" mapstructure:"tls-insecure" name:"os-TLSInsecure" value:"optional" matches:"^true|false$"`
@@ -87,6 +88,7 @@ func LogCfg(authOpts AuthOpts) {
8788
klog.V(5).Infof("UserDomainID: %s", authOpts.UserDomainID)
8889
klog.V(5).Infof("UserDomainName: %s", authOpts.UserDomainName)
8990
klog.V(5).Infof("Region: %s", authOpts.Region)
91+
klog.V(5).Infof("Regions: %s", authOpts.Regions)
9092
klog.V(5).Infof("EndpointType: %s", authOpts.EndpointType)
9193
klog.V(5).Infof("CAFile: %s", authOpts.CAFile)
9294
klog.V(5).Infof("CertFile: %s", authOpts.CertFile)
@@ -232,6 +234,20 @@ func ReadClouds(authOpts *AuthOpts) error {
232234
authOpts.ApplicationCredentialName = replaceEmpty(authOpts.ApplicationCredentialName, cloud.AuthInfo.ApplicationCredentialName)
233235
authOpts.ApplicationCredentialSecret = replaceEmpty(authOpts.ApplicationCredentialSecret, cloud.AuthInfo.ApplicationCredentialSecret)
234236

237+
regions := strings.Split(authOpts.Region, ",")
238+
if len(regions) > 1 {
239+
authOpts.Region = regions[0]
240+
}
241+
242+
for _, r := range cloud.Regions {
243+
// Support only single auth section in clouds.yaml
244+
if r.Values.AuthInfo == nil && r.Name != authOpts.Region {
245+
regions = append(regions, r.Name)
246+
}
247+
}
248+
249+
authOpts.Regions = regions
250+
235251
return nil
236252
}
237253

pkg/csi/cinder/openstack/openstack.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
126126
}
127127
}
128128

129-
for _, global := range cfg.Global {
129+
for idx, global := range cfg.Global {
130130
// Update the config with data from clouds.yaml if UseClouds is enabled
131131
if global.UseClouds {
132132
if global.CloudsFile != "" {
@@ -138,6 +138,10 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
138138
}
139139
klog.V(5).Infof("Credentials are loaded from %s:", global.CloudsFile)
140140
}
141+
142+
if len(global.Regions) == 0 {
143+
cfg.Global[idx].Regions = []string{global.Region}
144+
}
141145
}
142146

143147
return cfg, nil

pkg/csi/cinder/openstack/openstack_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ rescan-on-resize=true`
112112
CAFile: fakeCAfile,
113113
TenantID: fakeTenantID,
114114
Region: fakeRegion,
115+
Regions: []string{fakeRegion},
115116
}
116117
expectedOpts.Global["cloud2"] = &client.AuthOpts{
117118
Username: fakeUserName_cloud2,
@@ -121,6 +122,7 @@ rescan-on-resize=true`
121122
CAFile: fakeCAfile_cloud2,
122123
TenantID: fakeTenantID_cloud2,
123124
Region: fakeRegion_cloud2,
125+
Regions: []string{fakeRegion_cloud2},
124126
}
125127
expectedOpts.Global["cloud3"] = &client.AuthOpts{
126128
Username: fakeUserName_cloud3,
@@ -130,6 +132,7 @@ rescan-on-resize=true`
130132
CAFile: fakeCAfile_cloud3,
131133
TenantID: fakeTenantID_cloud3,
132134
Region: fakeRegion_cloud3,
135+
Regions: []string{fakeRegion_cloud3},
133136
}
134137

135138
expectedOpts.BlockStorage.RescanOnResize = true
@@ -224,6 +227,7 @@ rescan-on-resize=true`
224227
CAFile: fakeCAfile,
225228
TenantID: fakeTenantID,
226229
Region: fakeRegion,
230+
Regions: []string{fakeRegion},
227231
EndpointType: gophercloud.AvailabilityPublic,
228232
UseClouds: true,
229233
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -237,6 +241,7 @@ rescan-on-resize=true`
237241
CAFile: fakeCAfile_cloud2,
238242
TenantID: fakeTenantID_cloud2,
239243
Region: fakeRegion_cloud2,
244+
Regions: []string{fakeRegion_cloud2},
240245
EndpointType: gophercloud.AvailabilityPublic,
241246
UseClouds: true,
242247
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -250,6 +255,7 @@ rescan-on-resize=true`
250255
CAFile: fakeCAfile_cloud3,
251256
TenantID: fakeTenantID_cloud3,
252257
Region: fakeRegion_cloud3,
258+
Regions: []string{fakeRegion_cloud3},
253259
EndpointType: gophercloud.AvailabilityPublic,
254260
UseClouds: true,
255261
CloudsFile: wd + "/fixtures/clouds.yaml",

pkg/openstack/instances.go

Lines changed: 140 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
sysos "os"
2323
"regexp"
24+
"slices"
2425
"strings"
2526

2627
"github.com/gophercloud/gophercloud/v2"
@@ -46,9 +47,9 @@ const (
4647

4748
// InstancesV2 encapsulates an implementation of InstancesV2 for OpenStack.
4849
type InstancesV2 struct {
49-
compute *gophercloud.ServiceClient
50-
network *gophercloud.ServiceClient
51-
region string
50+
compute map[string]*gophercloud.ServiceClient
51+
network map[string]*gophercloud.ServiceClient
52+
regions []string
5253
regionProviderID bool
5354
networkingOpts NetworkingOpts
5455
}
@@ -57,16 +58,25 @@ type InstancesV2 struct {
5758
func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) {
5859
klog.V(4).Info("openstack.Instancesv2() called")
5960

60-
compute, err := client.NewComputeV2(os.provider, os.epOpts)
61-
if err != nil {
62-
klog.Errorf("unable to access compute v2 API : %v", err)
63-
return nil, false
64-
}
61+
var err error
62+
compute := make(map[string]*gophercloud.ServiceClient, len(os.regions))
63+
network := make(map[string]*gophercloud.ServiceClient, len(os.regions))
6564

66-
network, err := client.NewNetworkV2(os.provider, os.epOpts)
67-
if err != nil {
68-
klog.Errorf("unable to access network v2 API : %v", err)
69-
return nil, false
65+
for _, region := range os.regions {
66+
opt := os.epOpts
67+
opt.Region = region
68+
69+
compute[region], err = client.NewComputeV2(os.provider, opt)
70+
if err != nil {
71+
klog.Errorf("unable to access compute v2 API : %v", err)
72+
return nil, false
73+
}
74+
75+
network[region], err = client.NewNetworkV2(os.provider, opt)
76+
if err != nil {
77+
klog.Errorf("unable to access network v2 API : %v", err)
78+
return nil, false
79+
}
7080
}
7181

7282
regionalProviderID := false
@@ -77,17 +87,23 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) {
7787
return &InstancesV2{
7888
compute: compute,
7989
network: network,
80-
region: os.epOpts.Region,
90+
regions: os.regions,
8191
regionProviderID: regionalProviderID,
8292
networkingOpts: os.networkingOpts,
8393
}, true
8494
}
8595

8696
// InstanceExists indicates whether a given node exists according to the cloud provider
8797
func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) {
88-
_, err := i.getInstance(ctx, node)
98+
klog.V(4).InfoS("openstack.InstanceExists() called", "node", klog.KObj(node),
99+
"providerID", node.Spec.ProviderID,
100+
"region", node.Labels[v1.LabelTopologyRegion])
101+
102+
_, _, err := i.getInstance(ctx, node)
89103
if err == cloudprovider.InstanceNotFound {
90-
klog.V(6).Infof("instance not found for node: %s", node.Name)
104+
klog.V(6).InfoS("Node is not found in cloud provider", "node", klog.KObj(node),
105+
"providerID", node.Spec.ProviderID,
106+
"region", node.Labels[v1.LabelTopologyRegion])
91107
return false, nil
92108
}
93109

@@ -100,7 +116,11 @@ func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool,
100116

101117
// InstanceShutdown returns true if the instance is shutdown according to the cloud provider.
102118
func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
103-
server, err := i.getInstance(ctx, node)
119+
klog.V(4).InfoS("openstack.InstanceShutdown() called", "node", klog.KObj(node),
120+
"providerID", node.Spec.ProviderID,
121+
"region", node.Labels[v1.LabelTopologyRegion])
122+
123+
server, _, err := i.getInstance(ctx, node)
104124
if err != nil {
105125
return false, err
106126
}
@@ -115,7 +135,11 @@ func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool
115135

116136
// InstanceMetadata returns the instance's metadata.
117137
func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
118-
srv, err := i.getInstance(ctx, node)
138+
klog.V(4).InfoS("openstack.InstanceMetadata() called", "node", klog.KObj(node),
139+
"providerID", node.Spec.ProviderID,
140+
"region", node.Labels[v1.LabelTopologyRegion])
141+
142+
srv, region, err := i.getInstance(ctx, node)
119143
if err != nil {
120144
return nil, err
121145
}
@@ -124,62 +148,140 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo
124148
server = *srv
125149
}
126150

127-
instanceType, err := srvInstanceType(ctx, i.compute, &server)
151+
instanceType, err := srvInstanceType(ctx, i.compute[region], &server)
128152
if err != nil {
129153
return nil, err
130154
}
131155

132-
ports, err := getAttachedPorts(ctx, i.network, server.ID)
156+
ports, err := getAttachedPorts(ctx, i.network[region], server.ID)
133157
if err != nil {
134158
return nil, err
135159
}
136160

137-
addresses, err := nodeAddresses(ctx, &server, ports, i.network, i.networkingOpts)
161+
addresses, err := nodeAddresses(ctx, &server, ports, i.network[region], i.networkingOpts)
138162
if err != nil {
139163
return nil, err
140164
}
141165

142166
availabilityZone := util.SanitizeLabel(server.AvailabilityZone)
143167

144168
return &cloudprovider.InstanceMetadata{
145-
ProviderID: i.makeInstanceID(&server),
169+
ProviderID: i.makeInstanceID(&server, region),
146170
InstanceType: instanceType,
147171
NodeAddresses: addresses,
148172
Zone: availabilityZone,
149-
Region: i.region,
173+
Region: region,
150174
}, nil
151175
}
152176

153-
func (i *InstancesV2) makeInstanceID(srv *servers.Server) string {
177+
func (i *InstancesV2) makeInstanceID(srv *servers.Server, region string) string {
154178
if i.regionProviderID {
155-
return fmt.Sprintf("%s://%s/%s", ProviderName, i.region, srv.ID)
179+
return fmt.Sprintf("%s://%s/%s", ProviderName, region, srv.ID)
156180
}
157181
return fmt.Sprintf("%s:///%s", ProviderName, srv.ID)
158182
}
159183

160-
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, error) {
161-
if node.Spec.ProviderID == "" {
162-
return getServerByName(ctx, i.compute, node.Name)
184+
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, string, error) {
185+
var instanceID, instanceRegion string
186+
187+
if node.Spec.ProviderID != "" {
188+
var err error
189+
190+
instanceID, instanceRegion, err = instanceIDFromProviderID(node.Spec.ProviderID)
191+
if err != nil {
192+
return nil, "", err
193+
}
163194
}
164195

165-
instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID)
166-
if err != nil {
167-
return nil, err
196+
if instanceRegion != "" {
197+
if slices.Contains(i.regions, instanceRegion) {
198+
return i.getInstanceByID(ctx, instanceID, []string{instanceRegion})
199+
}
200+
201+
return nil, "", fmt.Errorf("getInstance: ProviderID \"%s\" didn't match supported regions \"%s\"", node.Spec.ProviderID, strings.Join(i.regions, ","))
202+
}
203+
204+
// At this point we know that ProviderID is not properly set or it doesn't contain region information
205+
// We need to search for the instance in all regions
206+
var searchRegions []string
207+
208+
// We cannot trust the region label, so we need to check the region
209+
instanceRegion = node.Labels[v1.LabelTopologyRegion]
210+
if slices.Contains(i.regions, instanceRegion) {
211+
searchRegions = []string{instanceRegion}
168212
}
169213

170-
if instanceRegion != "" && instanceRegion != i.region {
171-
return nil, fmt.Errorf("ProviderID \"%s\" didn't match supported region \"%s\"", node.Spec.ProviderID, i.region)
214+
for _, r := range i.regions {
215+
if r != instanceRegion {
216+
searchRegions = append(searchRegions, r)
217+
}
172218
}
173219

220+
klog.V(4).InfoS("openstack.getInstance() trying to find the instance in regions", "node", klog.KObj(node),
221+
"instanceID", instanceID,
222+
"regions", strings.Join(searchRegions, ","))
223+
224+
if instanceID == "" {
225+
return i.getInstanceByName(ctx, node.Name, searchRegions)
226+
}
227+
228+
return i.getInstanceByID(ctx, instanceID, searchRegions)
229+
}
230+
231+
func (i *InstancesV2) getInstanceByID(ctx context.Context, instanceID string, searchRegions []string) (*servers.Server, string, error) {
232+
server := servers.Server{}
233+
174234
mc := metrics.NewMetricContext("server", "get")
175-
server, err := servers.Get(ctx, i.compute, instanceID).Extract()
176-
if mc.ObserveRequest(err) != nil {
177-
if errors.IsNotFound(err) {
178-
return nil, cloudprovider.InstanceNotFound
235+
for _, r := range searchRegions {
236+
err := servers.Get(ctx, i.compute[r], instanceID).ExtractInto(&server)
237+
if mc.ObserveRequest(err) != nil {
238+
if errors.IsNotFound(err) {
239+
continue
240+
}
241+
242+
return nil, "", err
179243
}
180-
return nil, err
244+
245+
return &server, r, nil
181246
}
182-
return server, nil
247+
248+
return nil, "", cloudprovider.InstanceNotFound
249+
}
250+
251+
func (i *InstancesV2) getInstanceByName(ctx context.Context, name string, searchRegions []string) (*servers.Server, string, error) {
252+
opts := servers.ListOpts{
253+
Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(name)),
254+
}
255+
256+
serverList := make([]servers.Server, 0, 1)
257+
mc := metrics.NewMetricContext("server", "list")
258+
259+
for _, r := range searchRegions {
260+
pager := servers.List(i.compute[r], opts)
261+
262+
err := pager.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) {
263+
s, err := servers.ExtractServers(page)
264+
if err != nil {
265+
return false, err
266+
}
267+
serverList = append(serverList, s...)
268+
if len(serverList) > 1 {
269+
return false, errors.ErrMultipleResults
270+
}
271+
return true, nil
272+
})
273+
if mc.ObserveRequest(err) != nil {
274+
return nil, "", err
275+
}
276+
277+
if len(serverList) == 0 {
278+
continue
279+
}
280+
281+
return &serverList[0], r, nil
282+
}
283+
284+
return nil, "", cloudprovider.InstanceNotFound
183285
}
184286

185287
func getServerByName(ctx context.Context, client *gophercloud.ServiceClient, name string) (*servers.Server, error) {

0 commit comments

Comments
 (0)