diff --git a/pkg/cloudprovider/ibm/catalog.go b/pkg/cloudprovider/ibm/catalog.go index f8e912f..b156bdf 100644 --- a/pkg/cloudprovider/ibm/catalog.go +++ b/pkg/cloudprovider/ibm/catalog.go @@ -147,7 +147,9 @@ func (c *GlobalCatalogClient) GetPricing(ctx context.Context, catalogEntryID str ID: &catalogEntryID, } - pricingData, _, err := sdkClient.GetPricing(pricingOptions) + pricingData, err := DoWithRateLimitRetry(ctx, func() (*globalcatalogv1.PricingGet, *core.DetailedResponse, error) { + return sdkClient.GetPricing(pricingOptions) + }) if err != nil { return nil, fmt.Errorf("calling GetPricing API: %w", err) } diff --git a/pkg/cloudprovider/ibm/ratelimit_retry.go b/pkg/cloudprovider/ibm/ratelimit_retry.go new file mode 100644 index 0000000..92e5747 --- /dev/null +++ b/pkg/cloudprovider/ibm/ratelimit_retry.go @@ -0,0 +1,81 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package ibm + +import ( + "context" + "fmt" + "net/http" + "strconv" + "time" + + "github.com/IBM/go-sdk-core/v5/core" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + initialBackoff = 100 * time.Millisecond + maxBackoff = 30 * time.Second + maxAttempts = 5 + retryAfterKey = "Retry-After" +) + +// DoWithRateLimitRetry handles HTTP 429 rate limiting with exponential backoff. +// It performs up to maxAttempts (1 initial + 4 retries), respecting the +// Retry-After header when present (seconds or HTTP-date per RFC 7231). +func DoWithRateLimitRetry[T any](ctx context.Context, fn func() (T, *core.DetailedResponse, error)) (T, error) { + var zero T + backoff := initialBackoff + + for attempt := 0; attempt < maxAttempts; attempt++ { + result, response, err := fn() + + // Success or non-rate-limit error + if response == nil || response.StatusCode != http.StatusTooManyRequests { + return result, err + } + + // Use Retry-After for this sleep only; keep exponential backoff progression separate. + // Retry-After can be delay-seconds (integer) or HTTP-date + delay := backoff + if response.Headers != nil { + if ra := response.Headers.Get(retryAfterKey); ra != "" { + if secs, parseErr := strconv.Atoi(ra); parseErr == nil && secs > 0 { + delay = time.Duration(secs) * time.Second + } else if t, parseErr := http.ParseTime(ra); parseErr == nil { + if d := time.Until(t); d > 0 { + delay = d + } + } + } + } + + log.FromContext(ctx).V(1).Info("rate limited (429), sleeping before retry", "delay", delay, "attempt", attempt+1, "maxAttempts", maxAttempts) + + timer := time.NewTimer(delay) + select { + case <-ctx.Done(): + if !timer.Stop() { + <-timer.C + } + return zero, ctx.Err() + case <-timer.C: + backoff = min(backoff*2, maxBackoff) + } + } + + return zero, fmt.Errorf("rate limited after %d attempts", maxAttempts) +} diff --git a/pkg/cloudprovider/ibm/ratelimit_retry_test.go b/pkg/cloudprovider/ibm/ratelimit_retry_test.go new file mode 100644 index 0000000..71f7ae1 --- /dev/null +++ b/pkg/cloudprovider/ibm/ratelimit_retry_test.go @@ -0,0 +1,167 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package ibm + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/IBM/go-sdk-core/v5/core" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDoWithRateLimitRetry_SuccessFirstTry(t *testing.T) { + ctx := context.Background() + calls := 0 + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + return "ok", &core.DetailedResponse{StatusCode: 200}, nil + }) + require.NoError(t, err) + assert.Equal(t, "ok", result) + assert.Equal(t, 1, calls) +} + +func TestDoWithRateLimitRetry_Non429ErrorReturnsImmediately(t *testing.T) { + ctx := context.Background() + calls := 0 + apiErr := assert.AnError + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + return "", &core.DetailedResponse{StatusCode: 500}, apiErr + }) + require.Error(t, err) + assert.ErrorIs(t, err, apiErr) + assert.Equal(t, "", result) + assert.Equal(t, 1, calls) +} + +func TestDoWithRateLimitRetry_NilResponseReturnsImmediately(t *testing.T) { + ctx := context.Background() + calls := 0 + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + return "ok", nil, nil + }) + require.NoError(t, err) + assert.Equal(t, "ok", result) + assert.Equal(t, 1, calls) +} + +func TestDoWithRateLimitRetry_429ThenSuccess(t *testing.T) { + ctx := context.Background() + calls := 0 + result, err := DoWithRateLimitRetry(ctx, func() (int, *core.DetailedResponse, error) { + calls++ + if calls == 1 { + return 0, &core.DetailedResponse{StatusCode: 429}, nil + } + return 42, &core.DetailedResponse{StatusCode: 200}, nil + }) + require.NoError(t, err) + assert.Equal(t, 42, result) + assert.Equal(t, 2, calls) +} + +func TestDoWithRateLimitRetry_ExhaustedRetries(t *testing.T) { + ctx := context.Background() + calls := 0 + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + return "", &core.DetailedResponse{StatusCode: 429}, nil + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "rate limited after 5 attempts") + assert.Equal(t, "", result) + assert.Equal(t, 5, calls) +} + +func TestDoWithRateLimitRetry_ContextCanceledDuringBackoff(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + calls := 0 + done := make(chan struct{}) + var result string + var err error + go func() { + result, err = DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + return "", &core.DetailedResponse{StatusCode: 429}, nil + }) + close(done) + }() + cancel() + <-done + require.Error(t, err) + assert.ErrorIs(t, err, context.Canceled) + assert.Equal(t, "", result) + assert.Equal(t, 1, calls) +} + +func TestDoWithRateLimitRetry_RespectsRetryAfterHeaderSeconds(t *testing.T) { + ctx := context.Background() + calls := 0 + headers := http.Header{} + headers.Set("Retry-After", "1") + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + if calls == 1 { + return "", &core.DetailedResponse{StatusCode: 429, Headers: headers}, nil + } + return "ok", &core.DetailedResponse{StatusCode: 200}, nil + }) + require.NoError(t, err) + assert.Equal(t, "ok", result) + assert.Equal(t, 2, calls) +} + +func TestDoWithRateLimitRetry_RespectsRetryAfterHeaderHTTPDate(t *testing.T) { + ctx := context.Background() + calls := 0 + headers := http.Header{} + // HTTP-date format (RFC 1123): 1 second from now so delay is short + headers.Set("Retry-After", time.Now().Add(1*time.Second).UTC().Format(time.RFC1123)) + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + if calls == 1 { + return "", &core.DetailedResponse{StatusCode: 429, Headers: headers}, nil + } + return "ok", &core.DetailedResponse{StatusCode: 200}, nil + }) + require.NoError(t, err) + assert.Equal(t, "ok", result) + assert.Equal(t, 2, calls) +} + +func TestDoWithRateLimitRetry_InvalidRetryAfterFallsBackToBackoff(t *testing.T) { + ctx := context.Background() + calls := 0 + headers := http.Header{} + // Invalid Retry-After value (not a number, not a valid HTTP-date) -> should use exponential backoff + headers.Set("Retry-After", "not-a-number-or-date") + result, err := DoWithRateLimitRetry(ctx, func() (string, *core.DetailedResponse, error) { + calls++ + if calls == 1 { + return "", &core.DetailedResponse{StatusCode: 429, Headers: headers}, nil + } + return "ok", &core.DetailedResponse{StatusCode: 200}, nil + }) + require.NoError(t, err) + assert.Equal(t, "ok", result) + assert.Equal(t, 2, calls) +}