Skip to content

Commit 6987094

Browse files
authored
perf(ec2): add parallel pagination by availability zone (#819)
1 parent d63df0a commit 6987094

4 files changed

Lines changed: 179 additions & 30 deletions

File tree

pkg/aws/client/compute.go

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@ package client
22

33
import (
44
"context"
5+
"sync"
56

67
"github.com/aws/aws-sdk-go-v2/aws"
78
awsEc2 "github.com/aws/aws-sdk-go-v2/service/ec2"
89
"github.com/aws/aws-sdk-go-v2/service/ec2/types"
10+
"golang.org/x/sync/errgroup"
911

1012
"github.com/grafana/cloudcost-exporter/pkg/aws/services/ec2"
1113
)
1214

1315
const maxResults = 1000
1416
const eksPVTagName = "kubernetes.io/created-for/pv/name"
17+
const maxConcurrentAZFetches = 5
1518

1619
var clusterTags = []string{"cluster", "eks:cluster-name", "aws:eks:cluster-name"}
1720

@@ -33,10 +36,58 @@ func (e *compute) describeRegions(ctx context.Context, allRegions bool) ([]types
3336
}
3437

3538
func (e *compute) listComputeInstances(ctx context.Context) ([]types.Reservation, error) {
39+
azs, err := e.getAvailabilityZones(ctx)
40+
if err != nil {
41+
return e.listComputeInstancesSequential(ctx, nil)
42+
}
43+
44+
if len(azs) <= 1 {
45+
return e.listComputeInstancesSequential(ctx, nil)
46+
}
47+
48+
// Fetch instances from each AZ in parallel to improve performance
49+
var mu sync.Mutex
50+
var allInstances []types.Reservation
51+
52+
eg, egCtx := errgroup.WithContext(ctx)
53+
eg.SetLimit(maxConcurrentAZFetches)
54+
55+
for _, az := range azs {
56+
az := az
57+
eg.Go(func() error {
58+
filter := []types.Filter{
59+
{
60+
Name: aws.String("availability-zone"),
61+
Values: []string{az},
62+
},
63+
}
64+
instances, err := e.listComputeInstancesSequential(egCtx, filter)
65+
if err != nil {
66+
return err
67+
}
68+
69+
mu.Lock()
70+
allInstances = append(allInstances, instances...)
71+
mu.Unlock()
72+
return nil
73+
})
74+
}
75+
76+
if err := eg.Wait(); err != nil {
77+
return nil, err
78+
}
79+
80+
return allInstances, nil
81+
}
82+
83+
func (e *compute) listComputeInstancesSequential(ctx context.Context, filters []types.Filter) ([]types.Reservation, error) {
3684
dii := &awsEc2.DescribeInstancesInput{
37-
// 1000 max results was decided arbitrarily. This can likely be tuned.
3885
MaxResults: aws.Int32(maxResults),
3986
}
87+
if len(filters) > 0 {
88+
dii.Filters = filters
89+
}
90+
4091
var instances []types.Reservation
4192
for {
4293
resp, err := e.client.DescribeInstances(ctx, dii)
@@ -53,6 +104,29 @@ func (e *compute) listComputeInstances(ctx context.Context) ([]types.Reservation
53104
return instances, nil
54105
}
55106

107+
func (e *compute) getAvailabilityZones(ctx context.Context) ([]string, error) {
108+
resp, err := e.client.DescribeAvailabilityZones(ctx, &awsEc2.DescribeAvailabilityZonesInput{
109+
Filters: []types.Filter{
110+
{
111+
Name: aws.String("state"),
112+
Values: []string{"available"},
113+
},
114+
},
115+
})
116+
if err != nil {
117+
return nil, err
118+
}
119+
120+
azs := make([]string, 0, len(resp.AvailabilityZones))
121+
for _, az := range resp.AvailabilityZones {
122+
if az.ZoneName != nil {
123+
azs = append(azs, *az.ZoneName)
124+
}
125+
}
126+
127+
return azs, nil
128+
}
129+
56130
// DISK
57131

58132
func (e *compute) listEBSVolumes(ctx context.Context) ([]types.Volume, error) {

pkg/aws/client/compute_test.go

Lines changed: 83 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,56 @@ import (
1414

1515
func TestListComputeInstances(t *testing.T) {
1616
tests := map[string]struct {
17-
ctx context.Context
18-
DescribeInstances func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error)
19-
err error
20-
want []types.Reservation
21-
expectedCalls int
17+
ctx context.Context
18+
DescribeInstances func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error)
19+
DescribeAvailabilityZones func(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error)
20+
err error
21+
want []types.Reservation
22+
expectedCalls int
2223
}{
2324
"No instance should return nothing": {
2425
ctx: t.Context(),
2526
DescribeInstances: func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
2627
return &ec2.DescribeInstancesOutput{}, nil
2728
},
29+
DescribeAvailabilityZones: func(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error) {
30+
return &ec2.DescribeAvailabilityZonesOutput{
31+
AvailabilityZones: []types.AvailabilityZone{
32+
{ZoneName: aws.String("us-east-1a")},
33+
{ZoneName: aws.String("us-east-1b")},
34+
},
35+
}, nil
36+
},
2837
err: nil,
2938
want: nil,
30-
expectedCalls: 1,
39+
expectedCalls: 2,
3140
},
3241
"Single instance should return a single instance": {
3342
ctx: t.Context(),
3443
DescribeInstances: func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
35-
return &ec2.DescribeInstancesOutput{
36-
Reservations: []types.Reservation{
37-
{
38-
Instances: []types.Instance{
39-
{
40-
InstanceId: aws.String("i-1234567890abcdef0"),
41-
InstanceType: types.InstanceTypeA1Xlarge,
44+
// Check which AZ filter is applied and return appropriate instances
45+
if len(e.Filters) > 0 && e.Filters[0].Values[0] == "us-east-1a" {
46+
return &ec2.DescribeInstancesOutput{
47+
Reservations: []types.Reservation{
48+
{
49+
Instances: []types.Instance{
50+
{
51+
InstanceId: aws.String("i-1234567890abcdef0"),
52+
InstanceType: types.InstanceTypeA1Xlarge,
53+
},
4254
},
4355
},
4456
},
57+
}, nil
58+
}
59+
// Return empty for us-east-1b
60+
return &ec2.DescribeInstancesOutput{}, nil
61+
},
62+
DescribeAvailabilityZones: func(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error) {
63+
return &ec2.DescribeAvailabilityZonesOutput{
64+
AvailabilityZones: []types.AvailabilityZone{
65+
{ZoneName: aws.String("us-east-1a")},
66+
{ZoneName: aws.String("us-east-1b")},
4567
},
4668
}, nil
4769
},
@@ -56,45 +78,67 @@ func TestListComputeInstances(t *testing.T) {
5678
},
5779
},
5880
},
59-
expectedCalls: 1,
81+
expectedCalls: 2,
6082
},
6183
"Ensure errors propagate": {
6284
ctx: t.Context(),
6385
DescribeInstances: func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
86+
// Return error for any AZ
6487
return nil, assert.AnError
6588
},
89+
DescribeAvailabilityZones: func(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error) {
90+
return &ec2.DescribeAvailabilityZonesOutput{
91+
AvailabilityZones: []types.AvailabilityZone{
92+
{ZoneName: aws.String("us-east-1a")},
93+
},
94+
}, nil
95+
},
6696
err: assert.AnError,
6797
want: nil,
6898
expectedCalls: 1,
6999
},
70100
"NextToken should return multiple instances": {
71101
ctx: t.Context(),
72102
DescribeInstances: func(ctx context.Context, e *ec2.DescribeInstancesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstancesOutput, error) {
73-
if e.NextToken == nil {
103+
// Check which AZ filter is applied
104+
if len(e.Filters) > 0 && e.Filters[0].Values[0] == "us-east-1a" {
105+
// For us-east-1a, return instances with pagination
106+
if e.NextToken == nil {
107+
return &ec2.DescribeInstancesOutput{
108+
NextToken: aws.String("token"),
109+
Reservations: []types.Reservation{
110+
{
111+
Instances: []types.Instance{
112+
{
113+
InstanceId: aws.String("i-1234567890abcdef0"),
114+
InstanceType: types.InstanceTypeA1Xlarge,
115+
},
116+
},
117+
},
118+
},
119+
}, nil
120+
}
74121
return &ec2.DescribeInstancesOutput{
75-
NextToken: aws.String("token"),
76122
Reservations: []types.Reservation{
77123
{
78124
Instances: []types.Instance{
79125
{
80-
InstanceId: aws.String("i-1234567890abcdef0"),
126+
InstanceId: aws.String("i-1234567890abcdef1"),
81127
InstanceType: types.InstanceTypeA1Xlarge,
82128
},
83129
},
84130
},
85131
},
86132
}, nil
87133
}
88-
return &ec2.DescribeInstancesOutput{
89-
Reservations: []types.Reservation{
90-
{
91-
Instances: []types.Instance{
92-
{
93-
InstanceId: aws.String("i-1234567890abcdef0"),
94-
InstanceType: types.InstanceTypeA1Xlarge,
95-
},
96-
},
97-
},
134+
// Return empty for us-east-1b
135+
return &ec2.DescribeInstancesOutput{}, nil
136+
},
137+
DescribeAvailabilityZones: func(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error) {
138+
return &ec2.DescribeAvailabilityZonesOutput{
139+
AvailabilityZones: []types.AvailabilityZone{
140+
{ZoneName: aws.String("us-east-1a")},
141+
{ZoneName: aws.String("us-east-1b")},
98142
},
99143
}, nil
100144
},
@@ -112,19 +156,29 @@ func TestListComputeInstances(t *testing.T) {
112156
{
113157
Instances: []types.Instance{
114158
{
115-
InstanceId: aws.String("i-1234567890abcdef0"),
159+
InstanceId: aws.String("i-1234567890abcdef1"),
116160
InstanceType: types.InstanceTypeA1Xlarge,
117161
},
118162
},
119163
},
120164
},
121-
expectedCalls: 2,
165+
expectedCalls: 3,
122166
},
123167
}
124168
for name, tt := range tests {
125169
t.Run(name, func(t *testing.T) {
126170
ctrl := gomock.NewController(t)
127171
client := mocks.NewMockEC2(ctrl)
172+
173+
// Mock DescribeAvailabilityZones call
174+
if tt.DescribeAvailabilityZones != nil {
175+
client.EXPECT().
176+
DescribeAvailabilityZones(gomock.Any(), gomock.Any(), gomock.Any()).
177+
DoAndReturn(tt.DescribeAvailabilityZones).
178+
Times(1)
179+
}
180+
181+
// Mock DescribeInstances calls
128182
client.EXPECT().
129183
DescribeInstances(gomock.Any(), gomock.Any(), gomock.Any()).
130184
DoAndReturn(tt.DescribeInstances).

pkg/aws/services/ec2/ec2.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ type EC2 interface {
1313
DescribeRegions(ctx context.Context, e *ec2.DescribeRegionsInput, optFns ...func(*ec2.Options)) (*ec2.DescribeRegionsOutput, error)
1414
DescribeSpotPriceHistory(ctx context.Context, input *ec2.DescribeSpotPriceHistoryInput, optFns ...func(*ec2.Options)) (*ec2.DescribeSpotPriceHistoryOutput, error)
1515
DescribeVolumes(context.Context, *ec2.DescribeVolumesInput, ...func(*ec2.Options)) (*ec2.DescribeVolumesOutput, error)
16+
DescribeAvailabilityZones(ctx context.Context, input *ec2.DescribeAvailabilityZonesInput, optFns ...func(*ec2.Options)) (*ec2.DescribeAvailabilityZonesOutput, error)
1617
}

pkg/aws/services/mocks/ec2.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)