Skip to content

Commit f29e427

Browse files
committed
feat(occm): support multi region cluster
Currently, it supports only single auth section. Set the regions in config as: [Global] region=REGION1 regions=REGION1 regions=REGION2 regions=REGION3
1 parent d228854 commit f29e427

File tree

6 files changed

+171
-40
lines changed

6 files changed

+171
-40
lines changed

docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ The options in `Global` section are used for openstack-cloud-controller-manager
114114
Keystone user password. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
115115
* `region`
116116
Required. Keystone region name.
117+
* `regions`
118+
Optional. Keystone region name, which is used to specify regions for the cloud provider where the instance is running. Region is default region name. Can be specified multiple times.
117119
* `domain-id`
118120
Keystone user domain ID. If you are using [Keystone application credential](https://docs.openstack.org/keystone/latest/user/application_credentials.html), this option is not required.
119121
* `domain-name`
@@ -317,7 +319,7 @@ Although the openstack-cloud-controller-manager was initially implemented with N
317319
call](https://docs.openstack.org/api-ref/load-balancer/v2/?expanded=create-a-load-balancer-detail#creating-a-fully-populated-load-balancer).
318320
Setting this option to true will create loadbalancers using serial API calls which first create an unpopulated
319321
loadbalancer, then populate its listeners, pools and members. This is a compatibility option at the expense of
320-
increased load on the OpenStack API. Default: false
322+
increased load on the OpenStack API. Default: false
321323
322324
NOTE:
323325

pkg/client/client.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ type AuthOpts struct {
5353
UserDomainID string `gcfg:"user-domain-id" mapstructure:"user-domain-id" name:"os-userDomainID" value:"optional"`
5454
UserDomainName string `gcfg:"user-domain-name" mapstructure:"user-domain-name" name:"os-userDomainName" value:"optional"`
5555
Region string `name:"os-region"`
56+
Regions []string `name:"os-regions" value:"optional"`
5657
EndpointType gophercloud.Availability `gcfg:"os-endpoint-type" mapstructure:"os-endpoint-type" name:"os-endpointType" value:"optional"`
5758
CAFile string `gcfg:"ca-file" mapstructure:"ca-file" name:"os-certAuthorityPath" value:"optional"`
5859
TLSInsecure string `gcfg:"tls-insecure" mapstructure:"tls-insecure" name:"os-TLSInsecure" value:"optional" matches:"^true|false$"`
@@ -87,6 +88,7 @@ func LogCfg(authOpts AuthOpts) {
8788
klog.V(5).Infof("UserDomainID: %s", authOpts.UserDomainID)
8889
klog.V(5).Infof("UserDomainName: %s", authOpts.UserDomainName)
8990
klog.V(5).Infof("Region: %s", authOpts.Region)
91+
klog.V(5).Infof("Regions: %s", authOpts.Regions)
9092
klog.V(5).Infof("EndpointType: %s", authOpts.EndpointType)
9193
klog.V(5).Infof("CAFile: %s", authOpts.CAFile)
9294
klog.V(5).Infof("CertFile: %s", authOpts.CertFile)
@@ -232,6 +234,20 @@ func ReadClouds(authOpts *AuthOpts) error {
232234
authOpts.ApplicationCredentialName = replaceEmpty(authOpts.ApplicationCredentialName, cloud.AuthInfo.ApplicationCredentialName)
233235
authOpts.ApplicationCredentialSecret = replaceEmpty(authOpts.ApplicationCredentialSecret, cloud.AuthInfo.ApplicationCredentialSecret)
234236

237+
regions := strings.Split(authOpts.Region, ",")
238+
if len(regions) > 1 {
239+
authOpts.Region = regions[0]
240+
}
241+
242+
for _, r := range cloud.Regions {
243+
// Support only single auth section in clouds.yaml
244+
if r.Values.AuthInfo == nil && r.Name != authOpts.Region {
245+
regions = append(regions, r.Name)
246+
}
247+
}
248+
249+
authOpts.Regions = regions
250+
235251
return nil
236252
}
237253

pkg/csi/cinder/openstack/openstack.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
126126
}
127127
}
128128

129-
for _, global := range cfg.Global {
129+
for idx, global := range cfg.Global {
130130
// Update the config with data from clouds.yaml if UseClouds is enabled
131131
if global.UseClouds {
132132
if global.CloudsFile != "" {
@@ -138,6 +138,10 @@ func GetConfigFromFiles(configFilePaths []string) (Config, error) {
138138
}
139139
klog.V(5).Infof("Credentials are loaded from %s:", global.CloudsFile)
140140
}
141+
142+
if len(global.Regions) == 0 {
143+
cfg.Global[idx].Regions = []string{global.Region}
144+
}
141145
}
142146

143147
return cfg, nil

pkg/csi/cinder/openstack/openstack_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ rescan-on-resize=true`
112112
CAFile: fakeCAfile,
113113
TenantID: fakeTenantID,
114114
Region: fakeRegion,
115+
Regions: []string{fakeRegion},
115116
}
116117
expectedOpts.Global["cloud2"] = &client.AuthOpts{
117118
Username: fakeUserName_cloud2,
@@ -121,6 +122,7 @@ rescan-on-resize=true`
121122
CAFile: fakeCAfile_cloud2,
122123
TenantID: fakeTenantID_cloud2,
123124
Region: fakeRegion_cloud2,
125+
Regions: []string{fakeRegion_cloud2},
124126
}
125127
expectedOpts.Global["cloud3"] = &client.AuthOpts{
126128
Username: fakeUserName_cloud3,
@@ -130,6 +132,7 @@ rescan-on-resize=true`
130132
CAFile: fakeCAfile_cloud3,
131133
TenantID: fakeTenantID_cloud3,
132134
Region: fakeRegion_cloud3,
135+
Regions: []string{fakeRegion_cloud3},
133136
}
134137

135138
expectedOpts.BlockStorage.RescanOnResize = true
@@ -224,6 +227,7 @@ rescan-on-resize=true`
224227
CAFile: fakeCAfile,
225228
TenantID: fakeTenantID,
226229
Region: fakeRegion,
230+
Regions: []string{fakeRegion},
227231
EndpointType: gophercloud.AvailabilityPublic,
228232
UseClouds: true,
229233
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -237,6 +241,7 @@ rescan-on-resize=true`
237241
CAFile: fakeCAfile_cloud2,
238242
TenantID: fakeTenantID_cloud2,
239243
Region: fakeRegion_cloud2,
244+
Regions: []string{fakeRegion_cloud2},
240245
EndpointType: gophercloud.AvailabilityPublic,
241246
UseClouds: true,
242247
CloudsFile: wd + "/fixtures/clouds.yaml",
@@ -250,6 +255,7 @@ rescan-on-resize=true`
250255
CAFile: fakeCAfile_cloud3,
251256
TenantID: fakeTenantID_cloud3,
252257
Region: fakeRegion_cloud3,
258+
Regions: []string{fakeRegion_cloud3},
253259
EndpointType: gophercloud.AvailabilityPublic,
254260
UseClouds: true,
255261
CloudsFile: wd + "/fixtures/clouds.yaml",

pkg/openstack/instances.go

Lines changed: 133 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
sysos "os"
2323
"regexp"
24+
"slices"
2425
"strings"
2526

2627
"github.com/gophercloud/gophercloud/v2"
@@ -46,9 +47,9 @@ const (
4647

4748
// InstancesV2 encapsulates an implementation of InstancesV2 for OpenStack.
4849
type InstancesV2 struct {
49-
compute *gophercloud.ServiceClient
50-
network *gophercloud.ServiceClient
51-
region string
50+
compute map[string]*gophercloud.ServiceClient
51+
network map[string]*gophercloud.ServiceClient
52+
regions []string
5253
regionProviderID bool
5354
networkingOpts NetworkingOpts
5455
}
@@ -57,16 +58,25 @@ type InstancesV2 struct {
5758
func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) {
5859
klog.V(4).Info("openstack.Instancesv2() called")
5960

60-
compute, err := client.NewComputeV2(os.provider, os.epOpts)
61-
if err != nil {
62-
klog.Errorf("unable to access compute v2 API : %v", err)
63-
return nil, false
64-
}
61+
var err error
62+
compute := make(map[string]*gophercloud.ServiceClient, len(os.regions))
63+
network := make(map[string]*gophercloud.ServiceClient, len(os.regions))
6564

66-
network, err := client.NewNetworkV2(os.provider, os.epOpts)
67-
if err != nil {
68-
klog.Errorf("unable to access network v2 API : %v", err)
69-
return nil, false
65+
for _, region := range os.regions {
66+
opt := os.epOpts
67+
opt.Region = region
68+
69+
compute[region], err = client.NewComputeV2(os.provider, opt)
70+
if err != nil {
71+
klog.Errorf("unable to access compute v2 API : %v", err)
72+
return nil, false
73+
}
74+
75+
network[region], err = client.NewNetworkV2(os.provider, opt)
76+
if err != nil {
77+
klog.Errorf("unable to access network v2 API : %v", err)
78+
return nil, false
79+
}
7080
}
7181

7282
regionalProviderID := false
@@ -77,17 +87,23 @@ func (os *OpenStack) InstancesV2() (cloudprovider.InstancesV2, bool) {
7787
return &InstancesV2{
7888
compute: compute,
7989
network: network,
80-
region: os.epOpts.Region,
90+
regions: os.regions,
8191
regionProviderID: regionalProviderID,
8292
networkingOpts: os.networkingOpts,
8393
}, true
8494
}
8595

8696
// InstanceExists indicates whether a given node exists according to the cloud provider
8797
func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) {
88-
_, err := i.getInstance(ctx, node)
98+
klog.V(4).InfoS("openstack.InstanceExists() called", "node", klog.KObj(node),
99+
"providerID", node.Spec.ProviderID,
100+
"region", node.Labels[v1.LabelTopologyRegion])
101+
102+
_, _, err := i.getInstance(ctx, node)
89103
if err == cloudprovider.InstanceNotFound {
90-
klog.V(6).Infof("instance not found for node: %s", node.Name)
104+
klog.V(6).InfoS("Node is not found in cloud provider", "node", klog.KObj(node),
105+
"providerID", node.Spec.ProviderID,
106+
"region", node.Labels[v1.LabelTopologyRegion])
91107
return false, nil
92108
}
93109

@@ -100,7 +116,11 @@ func (i *InstancesV2) InstanceExists(ctx context.Context, node *v1.Node) (bool,
100116

101117
// InstanceShutdown returns true if the instance is shutdown according to the cloud provider.
102118
func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
103-
server, err := i.getInstance(ctx, node)
119+
klog.V(4).InfoS("openstack.InstanceShutdown() called", "node", klog.KObj(node),
120+
"providerID", node.Spec.ProviderID,
121+
"region", node.Labels[v1.LabelTopologyRegion])
122+
123+
server, _, err := i.getInstance(ctx, node)
104124
if err != nil {
105125
return false, err
106126
}
@@ -115,7 +135,7 @@ func (i *InstancesV2) InstanceShutdown(ctx context.Context, node *v1.Node) (bool
115135

116136
// InstanceMetadata returns the instance's metadata.
117137
func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
118-
srv, err := i.getInstance(ctx, node)
138+
srv, region, err := i.getInstance(ctx, node)
119139
if err != nil {
120140
return nil, err
121141
}
@@ -124,62 +144,138 @@ func (i *InstancesV2) InstanceMetadata(ctx context.Context, node *v1.Node) (*clo
124144
server = *srv
125145
}
126146

127-
instanceType, err := srvInstanceType(ctx, i.compute, &server)
147+
instanceType, err := srvInstanceType(ctx, i.compute[region], &server)
128148
if err != nil {
129149
return nil, err
130150
}
131151

132-
ports, err := getAttachedPorts(ctx, i.network, server.ID)
152+
ports, err := getAttachedPorts(ctx, i.network[region], server.ID)
133153
if err != nil {
134154
return nil, err
135155
}
136156

137-
addresses, err := nodeAddresses(ctx, &server, ports, i.network, i.networkingOpts)
157+
addresses, err := nodeAddresses(ctx, &server, ports, i.network[region], i.networkingOpts)
138158
if err != nil {
139159
return nil, err
140160
}
141161

142162
availabilityZone := util.SanitizeLabel(server.AvailabilityZone)
143163

144164
return &cloudprovider.InstanceMetadata{
145-
ProviderID: i.makeInstanceID(&server),
165+
ProviderID: i.makeInstanceID(&server, region),
146166
InstanceType: instanceType,
147167
NodeAddresses: addresses,
148168
Zone: availabilityZone,
149-
Region: i.region,
169+
Region: region,
150170
}, nil
151171
}
152172

153-
func (i *InstancesV2) makeInstanceID(srv *servers.Server) string {
173+
func (i *InstancesV2) makeInstanceID(srv *servers.Server, region string) string {
154174
if i.regionProviderID {
155-
return fmt.Sprintf("%s://%s/%s", ProviderName, i.region, srv.ID)
175+
return fmt.Sprintf("%s://%s/%s", ProviderName, region, srv.ID)
156176
}
157177
return fmt.Sprintf("%s:///%s", ProviderName, srv.ID)
158178
}
159179

160-
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, error) {
161-
if node.Spec.ProviderID == "" {
162-
return getServerByName(ctx, i.compute, node.Name)
163-
}
180+
func (i *InstancesV2) getInstance(ctx context.Context, node *v1.Node) (*servers.Server, string, error) {
181+
klog.V(4).InfoS("openstack.getInstance() called", "node", klog.KObj(node),
182+
"providerID", node.Spec.ProviderID,
183+
"region", node.Labels[v1.LabelTopologyRegion])
164184

165185
instanceID, instanceRegion, err := instanceIDFromProviderID(node.Spec.ProviderID)
166186
if err != nil {
167-
return nil, err
187+
return nil, "", err
168188
}
169189

170-
if instanceRegion != "" && instanceRegion != i.region {
171-
return nil, fmt.Errorf("ProviderID \"%s\" didn't match supported region \"%s\"", node.Spec.ProviderID, i.region)
190+
if instanceRegion != "" {
191+
if slices.Contains(i.regions, instanceRegion) {
192+
return i.getInstanceByID(ctx, instanceID, []string{instanceRegion})
193+
}
194+
195+
return nil, "", fmt.Errorf("getInstance: ProviderID \"%s\" didn't match supported regions \"%s\"", node.Spec.ProviderID, strings.Join(i.regions, ","))
172196
}
173197

198+
// At this point we know that ProviderID is not properly set or it doesn't contain region information
199+
// We need to search for the instance in all regions
200+
var searchRegions []string
201+
202+
// We cannot trust the region label, so we need to check the region
203+
instanceRegion = node.Labels[v1.LabelTopologyRegion]
204+
if slices.Contains(i.regions, instanceRegion) {
205+
searchRegions = []string{instanceRegion}
206+
}
207+
208+
for _, r := range i.regions {
209+
if r != instanceRegion {
210+
searchRegions = append(searchRegions, r)
211+
}
212+
}
213+
214+
klog.V(6).InfoS("openstack.getInstance() trying to find the instance in regions", "node", klog.KObj(node),
215+
"instanceID", instanceID,
216+
"regions", strings.Join(searchRegions, ","))
217+
218+
if instanceID == "" {
219+
return i.getInstanceByName(ctx, node.Name, searchRegions)
220+
}
221+
222+
return i.getInstanceByID(ctx, instanceID, searchRegions)
223+
}
224+
225+
func (i *InstancesV2) getInstanceByID(ctx context.Context, instanceID string, searchRegions []string) (*servers.Server, string, error) {
226+
server := servers.Server{}
227+
174228
mc := metrics.NewMetricContext("server", "get")
175-
server, err := servers.Get(ctx, i.compute, instanceID).Extract()
176-
if mc.ObserveRequest(err) != nil {
177-
if errors.IsNotFound(err) {
178-
return nil, cloudprovider.InstanceNotFound
229+
for _, r := range searchRegions {
230+
err := servers.Get(ctx, i.compute[r], instanceID).ExtractInto(&server)
231+
if mc.ObserveRequest(err) != nil {
232+
if errors.IsNotFound(err) {
233+
continue
234+
}
235+
236+
return nil, "", err
179237
}
180-
return nil, err
238+
239+
return &server, r, nil
240+
}
241+
242+
return nil, "", cloudprovider.InstanceNotFound
243+
}
244+
245+
func (i *InstancesV2) getInstanceByName(ctx context.Context, name string, searchRegions []string) (*servers.Server, string, error) {
246+
opts := servers.ListOpts{
247+
Name: fmt.Sprintf("^%s$", regexp.QuoteMeta(name)),
248+
}
249+
250+
serverList := make([]servers.Server, 0, 1)
251+
mc := metrics.NewMetricContext("server", "list")
252+
253+
for _, r := range searchRegions {
254+
pager := servers.List(i.compute[r], opts)
255+
256+
err := pager.EachPage(ctx, func(_ context.Context, page pagination.Page) (bool, error) {
257+
s, err := servers.ExtractServers(page)
258+
if err != nil {
259+
return false, err
260+
}
261+
serverList = append(serverList, s...)
262+
if len(serverList) > 1 {
263+
return false, errors.ErrMultipleResults
264+
}
265+
return true, nil
266+
})
267+
if mc.ObserveRequest(err) != nil {
268+
return nil, "", err
269+
}
270+
271+
if len(serverList) == 0 {
272+
continue
273+
}
274+
275+
return &serverList[0], r, nil
181276
}
182-
return server, nil
277+
278+
return nil, "", cloudprovider.InstanceNotFound
183279
}
184280

185281
func getServerByName(ctx context.Context, client *gophercloud.ServiceClient, name string) (*servers.Server, error) {

0 commit comments

Comments
 (0)