Skip to content

Commit 2ea04e3

Browse files
committed
support multi region cluster
1 parent 75b1fbb commit 2ea04e3

File tree

6 files changed

+159
-55
lines changed

6 files changed

+159
-55
lines changed

docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ The options in `Global` section are used for openstack-cloud-controller-manager
114114
Keystone user password. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
115115
* `region`
116116
Required. Keystone region name.
117+
* `regions`
118+
Optional. Keystone region name, which is used to specify regions for the cloud provider where the instance is running. Region is default region name. Can be specified multiple times.
117119
* `domain-id`
118120
Keystone user domain ID. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
119121
* `domain-name`
@@ -317,7 +319,7 @@ Although the openstack-cloud-controller-manager was initially implemented with N
317319
call](https://docs.openstack.org/api-ref/load-balancer/v2/?expanded=create-a-load-balancer-detail#creating-a-fully-populated-load-balancer).
318320
Setting this option to true will create loadbalancers using serial API calls which first create an unpopulated
319321
loadbalancer, then populate its listeners, pools and members. This is a compatibility option at the expense of
320-
increased load on the OpenStack API. Default: false
322+
increased load on the OpenStack API. Default: false
321323
322324
NOTE:
323325

pkg/client/client.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ type AuthOpts struct {
5353
UserDomainID string `gcfg:"user-domain-id" mapstructure:"user-domain-id" name:"os-userDomainID" value:"optional"`
5454
UserDomainName string `gcfg:"user-domain-name" mapstructure:"user-domain-name" name:"os-userDomainName" value:"optional"`
5555
Region string `name:"os-region"`
56+
Regions []string `name:"os-regions" value:"optional"`
5657
EndpointType gophercloud.Availability `gcfg:"os-endpoint-type" mapstructure:"os-endpoint-type" name:"os-endpointType" value:"optional"`
5758
CAFile string `gcfg:"ca-file" mapstructure:"ca-file" name:"os-certAuthorityPath" value:"optional"`
5859
TLSInsecure string `gcfg:"tls-insecure" mapstructure:"tls-insecure" name:"os-TLSInsecure" value:"optional" matches:"^true|false$"`
@@ -87,6 +88,7 @@ func LogCfg(authOpts AuthOpts) {
8788
klog.V(5).Infof("UserDomainID: %s", authOpts.UserDomainID)
8889
klog.V(5).Infof("UserDomainName: %s", authOpts.UserDomainName)
8990
klog.V(5).Infof("Region: %s", authOpts.Region)
91+
klog.V(5).Infof("Regions: %s", authOpts.Regions)
9092
klog.V(5).Infof("EndpointType: %s", authOpts.EndpointType)
9193
klog.V(5).Infof("CAFile: %s", authOpts.CAFile)
9294
klog.V(5).Infof("CertFile: %s", authOpts.CertFile)
@@ -232,6 +234,20 @@ func ReadClouds(authOpts *AuthOpts) error {
232234
authOpts.ApplicationCredentialName = replaceEmpty(authOpts.ApplicationCredentialName, cloud.AuthInfo.ApplicationCredentialName)
233235
authOpts.ApplicationCredentialSecret = replaceEmpty(authOpts.ApplicationCredentialSecret, cloud.AuthInfo.ApplicationCredentialSecret)
234236

237+
regions := strings.Split(authOpts.Region, ",")
238+
if len(regions) > 1 {
239+
authOpts.Region = regions[0]
240+
}
241+
242+
for _, r := range cloud.Regions {
243+
// Support only single auth section in clouds.yaml
244+
if r.Values.AuthInfo == nil && r.Name != authOpts.Region {
245+
regions = append(regions, r.Name)
246+
}
247+
}
248+
249+
authOpts.Regions = regions
250+
235251
return nil
236252
}
237253

pkg/csi/cinder/openstack/openstack.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
124124
}
125125
}
126126

127-
for _, global := range cfg.Global {
127+
for idx, global := range cfg.Global {
128128
// Update the config with data from clouds.yaml if UseClouds is enabled
129129
if global.UseClouds {
130130
if global.CloudsFile != "" {
@@ -136,6 +136,10 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
136136
}
137137
klog.V(5).Infof("Credentials are loaded from %s:", global.CloudsFile)
138138
}
139+
140+
if len(global.Regions) == 0 {
141+
cfg.Global[idx].Regions = []string{global.Region}
142+
}
139143
}
140144

141145
return cfg, nil

pkg/csi/cinder/openstack/openstack_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ rescan-on-resize=true`
112112
CAFile: fakeCAfile,
113113
TenantID: fakeTenantID,
114114
Region: fakeRegion,
115+
Regions: []string{fakeRegion},
115116
}
116117
expectedOpts.Global["cloud2"] = &client.AuthOpts{
117118
Username: fakeUserName_cloud2,
@@ -121,6 +122,7 @@ rescan-on-resize=true`
121122
CAFile: fakeCAfile_cloud2,
122123
TenantID: fakeTenantID_cloud2,
123124
Region: fakeRegion_cloud2,
125+
Regions: []string{fakeRegion_cloud2},
124126
}
125127
expectedOpts.Global["cloud3"] = &client.AuthOpts{
126128
Username: fakeUserName_cloud3,
@@ -130,6 +132,7 @@ rescan-on-resize=true`
130132
CAFile: fakeCAfile_cloud3,
131133
TenantID: fakeTenantID_cloud3,
132134
Region: fakeRegion_cloud3,
135+
Regions: []string{fakeRegion_cloud3},
133136
}
134137

135138
expectedOpts.BlockStorage.RescanOnResize = true
@@ -224,6 +227,7 @@ rescan-on-resize=true`
224227
CAFile: fakeCAfile,
225228
TenantID: fakeTenantID,
226229
Region: fakeRegion,
230+
Regions: []string{fakeRegion},
227231
EndpointType: gophercloud.AvailabilityPublic,
228232
UseClouds: true,
229233
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -237,6 +241,7 @@ rescan-on-resize=true`
237241
CAFile: fakeCAfile_cloud2,
238242
TenantID: fakeTenantID_cloud2,
239243
Region: fakeRegion_cloud2,
244+
Regions: []string{fakeRegion_cloud2},
240245
EndpointType: gophercloud.AvailabilityPublic,
241246
UseClouds: true,
242247
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -250,6 +255,7 @@ rescan-on-resize=true`
250255
CAFile: fakeCAfile_cloud3,
251256
TenantID: fakeTenantID_cloud3,
252257
Region: fakeRegion_cloud3,
258+
Regions: []string{fakeRegion_cloud3},
253259
EndpointType: gophercloud.AvailabilityPublic,
254260
UseClouds: true,
255261
CloudsFile: wd + "/fixtures/clouds.yaml",

pkg/openstack/instancesv2.go

Lines changed: 122 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"context"
2121
"fmt"
2222
sysos "os"
23+
"slices"
24+
"strings"
2325

2426
"github.com/gophercloud/gophercloud/v2"
2527
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers"
@@ -33,9 +35,9 @@ import (
3335

3436
// InstancesV2 encapsulates an implementation of InstancesV2 for OpenStack.
3537
type InstancesV2 struct {
36-
compute *gophercloud.ServiceClient
37-
network *gophercloud.ServiceClient
38-
region string
38+
compute map[string]*gophercloud.ServiceClient
39+
network map[string]*gophercloud.ServiceClient
40+
regions []string
3941
regionProviderID bool
4042
networkingOpts NetworkingOpts
4143
}
@@ -51,16 +53,25 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) {
5153
func (os *OpenStack) instancesv2() (*InstancesV2, bool) {
5254
klog.V(4).Info("openstack.Instancesv2() called")
5355

54-
compute, err := client.NewComputeV2(os.provider, os.epOpts)
55-
if err != nil {
56-
klog.Errorf("unable to access compute v2 API : %v", err)
57-
return nil, false
58-
}
56+
var err error
57+
compute := make(map[string]*gophercloud.ServiceClient, len(os.regions))
58+
network := make(map[string]*gophercloud.ServiceClient, len(os.regions))
5959

60-
network, err := client.NewNetworkV2(os.provider, os.epOpts)
61-
if err != nil {
62-
klog.Errorf("unable to access network v2 API : %v", err)
63-
return nil, false
60+
for _, region := range os.regions {
61+
opt := os.epOpts
62+
opt.Region = region
63+
64+
compute[region], err = client.NewComputeV2(os.provider, opt)
65+
if err != nil {
66+
klog.Errorf("unable to access compute v2 API : %v", err)
67+
return nil, false
68+
}
69+
70+
network[region], err = client.NewNetworkV2(os.provider, opt)
71+
if err != nil {
72+
klog.Errorf("unable to access network v2 API : %v", err)
73+
return nil, false
74+
}
6475
}
6576

6677
regionalProviderID := false
@@ -71,17 +82,23 @@ func (os *OpenStack) instancesv2() (*InstancesV2, bool) {
7182
return &InstancesV2{
7283
compute: compute,
7384
network: network,
74-
region: os.epOpts.Region,
85+
regions: os.regions,
7586
regionProviderID: regionalProviderID,
7687
networkingOpts: os.networkingOpts,
7788
}, true
7889
}
7990

8091
// InstanceExists indicates whether a given node exists according to the cloud provider
8192
func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) {
82-
_, err := i.getInstance(ctx, node)
93+
klog.V(4).InfoS("openstack.InstanceExists() called", "node", klog.KObj(node),
94+
"providerID", node.Spec.ProviderID,
95+
"region", node.Labels[v1.LabelTopologyRegion])
96+
97+
_, _, err := i.getInstance(ctx, node)
8398
if err == cloudprovider.InstanceNotFound {
84-
klog.V(6).Infof("instance not found for node: %s", node.Name)
99+
klog.V(6).InfoS("Node is not found in cloud provider", "node", klog.KObj(node),
100+
"providerID", node.Spec.ProviderID,
101+
"region", node.Labels[v1.LabelTopologyRegion])
85102
return false, nil
86103
}
87104

@@ -94,7 +111,11 @@ func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool,
94111

95112
// InstanceShutdown returns true if the instance is shutdown according to the cloud provider.
96113
func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
97-
server, err := i.getInstance(ctx, node)
114+
klog.V(4).InfoS("openstack.InstanceShutdown() called", "node", klog.KObj(node),
115+
"providerID", node.Spec.ProviderID,
116+
"region", node.Labels[v1.LabelTopologyRegion])
117+
118+
server, _, err := i.getInstance(ctx, node)
98119
if err != nil {
99120
return false, err
100121
}
@@ -109,7 +130,7 @@ func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool
109130

110131
// InstanceMetadata returns the instance's metadata.
111132
func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
112-
srv, err := i.getInstance(ctx, node)
133+
srv, region, err := i.getInstance(ctx, node)
113134
if err != nil {
114135
return nil, err
115136
}
@@ -118,77 +139,125 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo
118139
server = *srv
119140
}
120141

121-
instanceType, err := srvInstanceType(i.compute, &server)
142+
instanceType, err := srvInstanceType(i.compute[region], &server)
122143
if err != nil {
123144
return nil, err
124145
}
125146

126-
ports, err := getAttachedPorts(i.network, server.ID)
147+
ports, err := getAttachedPorts(i.network[region], server.ID)
127148
if err != nil {
128149
return nil, err
129150
}
130151

131-
addresses, err := nodeAddresses(&server, ports, i.network, i.networkingOpts)
152+
addresses, err := nodeAddresses(&server, ports, i.network[region], i.networkingOpts)
132153
if err != nil {
133154
return nil, err
134155
}
135156

136157
return &cloudprovider.InstanceMetadata{
137-
ProviderID: i.makeInstanceID(&server),
158+
ProviderID: i.makeInstanceID(&server, region),
138159
InstanceType: instanceType,
139160
NodeAddresses: addresses,
140161
Zone: server.AvailabilityZone,
141-
Region: i.region,
162+
Region: region,
142163
}, nil
143164
}
144165

145-
func (i *InstancesV2) makeInstanceID(srv *servers.Server) string {
166+
func (i *InstancesV2) makeInstanceID(srv *servers.Server, region string) string {
146167
if i.regionProviderID {
147-
return fmt.Sprintf("%s://%s/%s", ProviderName, i.region, srv.ID)
168+
return fmt.Sprintf("%s://%s/%s", ProviderName, region, srv.ID)
148169
}
149170
return fmt.Sprintf("%s:///%s", ProviderName, srv.ID)
150171
}
151172

152-
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, error) {
153-
if node.Spec.ProviderID == "" {
154-
opt := servers.ListOpts{
155-
Name: fmt.Sprintf("^%s$", node.Name),
173+
func (i *InstancesV2) getInstance(_ context.Context, node *v1.Node) (*servers.Server, string, error) {
174+
klog.V(4).InfoS("openstack.getInstance() called", "node", klog.KObj(node),
175+
"providerID", node.Spec.ProviderID,
176+
"region", node.Labels[v1.LabelTopologyRegion])
177+
178+
instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID)
179+
if err == nil && instanceRegion != "" {
180+
if slices.Contains(i.regions, instanceRegion) {
181+
return i.getInstanceByID(instanceID, []string{instanceRegion})
182+
}
183+
184+
return nil, "", fmt.Errorf("getInstance: ProviderID \"%s\" didn't match supported regions \"%s\"", node.Spec.ProviderID, strings.Join(i.regions, ","))
185+
}
186+
187+
// At this point we know that ProviderID is not properly set or it doesn't contain region information
188+
// We need to search for the instance in all regions
189+
var searchRegions []string
190+
191+
// We cannot trust the region label, so we need to check the region
192+
instanceRegion = node.Labels[v1.LabelTopologyRegion]
193+
if slices.Contains(i.regions, instanceRegion) {
194+
searchRegions = []string{instanceRegion}
195+
}
196+
197+
for _, r := range i.regions {
198+
if r != instanceRegion {
199+
searchRegions = append(searchRegions, r)
200+
}
201+
}
202+
203+
klog.V(6).InfoS("openstack.getInstance() trying to find the instance in regions", "node", klog.KObj(node),
204+
"instanceID", instanceID,
205+
"regions", strings.Join(searchRegions, ","))
206+
207+
if instanceID == "" {
208+
return i.getInstanceByName(node, searchRegions)
209+
}
210+
211+
return i.getInstanceByID(instanceID, searchRegions)
212+
}
213+
214+
func (i *InstancesV2) getInstanceByID(instanceID string, searchRegions []string) (*servers.Server, string, error) {
215+
server := servers.Server{}
216+
217+
mc := metrics.NewMetricContext("server", "get")
218+
for _, r := range searchRegions {
219+
err := servers.Get(context.TODO(), i.compute[r], instanceID).ExtractInto(&server)
220+
if mc.ObserveRequest(err) != nil {
221+
if errors.IsNotFound(err) {
222+
continue
223+
}
224+
225+
return nil, "", err
156226
}
157-
mc := metrics.NewMetricContext("server", "list")
158-
allPages, err := servers.List(i.compute, opt).AllPages(context.TODO())
227+
228+
return &server, r, nil
229+
}
230+
231+
return nil, "", cloudprovider.InstanceNotFound
232+
}
233+
234+
func (i *InstancesV2) getInstanceByName(node *v1.Node, searchRegions []string) (*servers.Server, string, error) {
235+
opt := servers.ListOpts{
236+
Name: fmt.Sprintf("^%s$", node.Name),
237+
}
238+
239+
serverList := make([]servers.Server, 0, 1)
240+
mc := metrics.NewMetricContext("server", "list")
241+
242+
for _, r := range searchRegions {
243+
allPages, err := servers.List(i.compute[r], opt).AllPages(context.TODO())
159244
if mc.ObserveRequest(err) != nil {
160-
return nil, fmt.Errorf("error listing servers %v: %v", opt, err)
245+
return nil, "", fmt.Errorf("error listing servers %v: %v", opt, err)
161246
}
162247

163-
serverList, err := servers.ExtractServers(allPages)
248+
err = servers.ExtractServersInto(allPages, &serverList)
164249
if err != nil {
165-
return nil, fmt.Errorf("error extracting servers from pages: %v", err)
250+
return nil, "", fmt.Errorf("error extracting servers from pages: %v", err)
166251
}
167252
if len(serverList) == 0 {
168-
return nil, cloudprovider.InstanceNotFound
253+
continue
169254
}
170255
if len(serverList) > 1 {
171-
return nil, fmt.Errorf("getInstance: multiple instances found")
256+
return nil, "", fmt.Errorf("getInstanceByName: multiple instances found")
172257
}
173-
return &serverList[0], nil
174-
}
175-
176-
instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID)
177-
if err != nil {
178-
return nil, err
179-
}
180258

181-
if instanceRegion != "" && instanceRegion != i.region {
182-
return nil, fmt.Errorf("ProviderID \"%s\" didn't match supported region \"%s\"", node.Spec.ProviderID, i.region)
259+
return &serverList[0], r, nil
183260
}
184261

185-
mc := metrics.NewMetricContext("server", "get")
186-
server, err := servers.Get(context.TODO(), i.compute, instanceID).Extract()
187-
if mc.ObserveRequest(err) != nil {
188-
if errors.IsNotFound(err) {
189-
return nil, cloudprovider.InstanceNotFound
190-
}
191-
return nil, err
192-
}
193-
return server, nil
262+
return nil, "", cloudprovider.InstanceNotFound
194263
}

0 commit comments

Comments
 (0)