Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3a3a6eb
fix: add go/audit package to Copybara sync for CLI and terraform-prov…
DavidS-ovm Apr 7, 2026
4aa5ee9
Create Azure adapter: MaintenanceMaintenanceConfiguration (#4537)
Lionel-Wilson Apr 7, 2026
0815f54
Create Azure adapter: NetworkVirtualNetworkGatewayConnection (#4543)
Lionel-Wilson Apr 7, 2026
de41225
feat(customermcp): Phase 3 — Infrastructure Context Resources and Inv…
DavidS-ovm Apr 8, 2026
0c5fa62
fix(deps): update google.golang.org/genproto/googleapis/rpc digest to…
renovate[bot] Apr 9, 2026
ff48715
fix(deps): update go (#4628)
renovate[bot] Apr 9, 2026
37e04b7
fix(deps): update go (#4643)
renovate[bot] Apr 10, 2026
34e18ca
chore(deps): update terraform (#4632)
renovate[bot] Apr 10, 2026
73242b5
discovery: fold first QueryError per Execute span (Honeycomb cost) (#…
DavidS-ovm Apr 10, 2026
262be70
[ENG-3671] Remove outage tracker service (#4651)
DavidS-ovm Apr 13, 2026
03ba78a
chore(deps): update github actions (major) (#4660)
DavidS-ovm Apr 13, 2026
43f4333
refactor: consolidate MCP OAuth handlers and add Area51 DCR support (…
DavidS-ovm Apr 14, 2026
d0b385d
[ENG-3665] Fix AWS source dying when a single region times out (#4670)
DavidS-ovm Apr 15, 2026
6dad670
[ENG-3715] Fix ExecuteQuery responses channel close race (#4684)
DavidS-ovm Apr 15, 2026
ed9e084
Instrument brent-be LLM calls with OTel GenAI semantic conventions (#…
DavidS-ovm Apr 15, 2026
5508cfa
Force upgrade of github.com/go-git/go-git to v5.17.1 for security fix…
DavidS-ovm Apr 15, 2026
4e843a2
[ENG-3750] Pin Docker image tags and modernise Renovate config (#4701)
DavidS-ovm Apr 16, 2026
4f9178b
(feat) new icons for PR markdown (#4710)
tphoney Apr 16, 2026
e4fa148
Run go mod tidy
actions-user Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 29 additions & 29 deletions .terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added assets/view_in_overmind-dark.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/view_in_overmind-light.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion aws-source/build/package/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the source binary
FROM golang:1.26-alpine AS builder
FROM golang:1.26.2-alpine3.23 AS builder
ARG TARGETOS
ARG TARGETARCH
ARG BUILD_VERSION
Expand Down
4 changes: 2 additions & 2 deletions aws-source/module/provider/.github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ jobs:
fetch-depth: 0

- name: Install 1Password CLI
uses: 1password/install-cli-action@v2
uses: 1password/install-cli-action@v3.0.0

- name: Load GPG secrets from 1Password
uses: 1password/load-secrets-action@v3
uses: 1password/load-secrets-action@v4.0.0
with:
export-env: true
env:
Expand Down
54 changes: 41 additions & 13 deletions aws-source/proc/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ import (
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)

// This package contains a few functions needed by the CLI to load this in-proc.
Expand Down Expand Up @@ -82,20 +84,23 @@ func ConfigFromViper() ([]aws.Config, error) {
return CreateAWSConfigs(authConfig)
}

// isOptInRegionError checks if an error indicates an opt-in region that is not enabled.
// This typically occurs when trying to authenticate with IRSA in a region that hasn't
// been enabled in the AWS account. These errors should not cause source initialization
// to fail - the region should simply be skipped.
// isTimeoutError checks if an error is a context deadline exceeded.
// A single unresponsive region (e.g. me-south-1 being decommissioned) must
// not take down the whole source — see ENG-3665.
func isTimeoutError(err error) bool {
return err != nil && errors.Is(err, context.DeadlineExceeded)
}

// isOptInRegionError checks if an error indicates an opt-in region that is not
// enabled in the AWS account (InvalidIdentityToken + OIDC).
func isOptInRegionError(err error) bool {
if err == nil {
return false
}

// Check for the InvalidIdentityToken error code from STS AssumeRoleWithWebIdentity
var apiErr smithy.APIError
if errors.As(err, &apiErr) {
if apiErr.ErrorCode() == "InvalidIdentityToken" {
// Additional validation: check if it's specifically about OIDC provider
errMsg := err.Error()
if strings.Contains(errMsg, "No OpenIDConnect provider found") {
return true
Expand All @@ -106,13 +111,22 @@ func isOptInRegionError(err error) bool {
return false
}

// isSkippableRegionError checks if an error indicates a region that cannot be
// reached and should be skipped rather than failing the entire source.
func isSkippableRegionError(err error) bool {
return isTimeoutError(err) || isOptInRegionError(err)
}

// wrapRegionError wraps misleading AWS errors with more helpful context
func wrapRegionError(err error, region string) error {
if err == nil {
return nil
}

// Check for opt-in region errors and provide helpful context
if isTimeoutError(err) {
return fmt.Errorf("%w. Region '%s' is unreachable (timeout); it may be decommissioned or experiencing an outage", err, region)
}

if isOptInRegionError(err) {
return fmt.Errorf("%w. This error often occurs when region '%s' is not enabled in the target AWS account", err, region)
}
Expand Down Expand Up @@ -322,18 +336,32 @@ func InitializeAwsSourceAdapters(ctx context.Context, e *discovery.Engine, confi
"region": cfg.Region,
}

// Check if this is an opt-in region error
if isOptInRegionError(err) {
// This region is not enabled in the account - skip it but don't fail
// Check if this is a skippable region error (timeout or opt-in)
if isSkippableRegionError(err) {
wrappedErr := wrapRegionError(err, cfg.Region)
skippedRegionsMu.Lock()
skippedRegions = append(skippedRegions, skippedRegion{
region: cfg.Region,
err: wrappedErr,
})
skippedRegionsMu.Unlock()
log.WithError(wrappedErr).WithFields(lf).Warn("Skipping region - not enabled in account")
return nil // Don't fail the pool for opt-in regions

reason := "opt-in region not enabled"
if isTimeoutError(err) {
reason = "timeout"
log.WithError(wrappedErr).WithFields(lf).Warn("Skipping region - unreachable (timeout)")
} else {
log.WithError(wrappedErr).WithFields(lf).Warn("Skipping region - not enabled in account")
}

span := trace.SpanFromContext(ctx)
span.AddEvent("ovm.adapter.regionSkipped", trace.WithAttributes(
attribute.String("ovm.adapter.region", cfg.Region),
attribute.String("ovm.adapter.skipReason", reason),
attribute.String("ovm.adapter.error", wrappedErr.Error()),
))

return nil // Don't fail the pool for skippable regions
}

// Wrap misleading OIDC errors with helpful region enablement context
Expand Down Expand Up @@ -645,7 +673,7 @@ func InitializeAwsSourceAdapters(ctx context.Context, e *discovery.Engine, confi
log.WithFields(log.Fields{
"skipped_regions": skippedRegionNames,
"count": len(skippedRegions),
}).Warn("Some regions were skipped because they are not enabled in the AWS account. The source will operate normally with the remaining regions.")
}).Warn("Some regions were skipped because they are unreachable or not enabled in the AWS account. The source will operate normally with the remaining regions.")
}

log.Debug("Sources initialized")
Expand Down
130 changes: 130 additions & 0 deletions aws-source/proc/proc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ func TestIsOptInRegionError(t *testing.T) {
err: errors.New("No OpenIDConnect provider found"),
expectedResult: false,
},
{
name: "context.DeadlineExceeded returns false",
err: context.DeadlineExceeded,
expectedResult: false,
},
{
name: "context.Canceled returns false",
err: context.Canceled,
expectedResult: false,
},
}

for _, tt := range tests {
Expand All @@ -188,6 +198,105 @@ func TestIsOptInRegionError(t *testing.T) {
}
}

func TestIsTimeoutError(t *testing.T) {
tests := []struct {
name string
err error
expectedResult bool
}{
{
name: "nil error returns false",
err: nil,
expectedResult: false,
},
{
name: "context.DeadlineExceeded returns true",
err: context.DeadlineExceeded,
expectedResult: true,
},
{
name: "wrapped context.DeadlineExceeded returns true",
err: fmt.Errorf("operation error STS: GetCallerIdentity: %w", context.DeadlineExceeded),
expectedResult: true,
},
{
name: "context.Canceled returns false",
err: context.Canceled,
expectedResult: false,
},
{
name: "wrapped context.Canceled returns false",
err: fmt.Errorf("operation error STS: GetCallerIdentity: %w", context.Canceled),
expectedResult: false,
},
{
name: "non-timeout error returns false",
err: errors.New("some random error"),
expectedResult: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isTimeoutError(tt.err)
if result != tt.expectedResult {
t.Errorf("isTimeoutError() = %v, want %v for error: %v", result, tt.expectedResult, tt.err)
}
})
}
}

func TestIsSkippableRegionError(t *testing.T) {
tests := []struct {
name string
err error
expectedResult bool
}{
{
name: "nil error returns false",
err: nil,
expectedResult: false,
},
{
name: "context.DeadlineExceeded returns true (ENG-3665)",
err: context.DeadlineExceeded,
expectedResult: true,
},
{
name: "wrapped context.DeadlineExceeded returns true (ENG-3665)",
err: fmt.Errorf("operation error STS: GetCallerIdentity: %w", context.DeadlineExceeded),
expectedResult: true,
},
{
name: "context.Canceled returns false (parent cancellation, not region timeout)",
err: context.Canceled,
expectedResult: false,
},
{
name: "opt-in region error returns true",
err: &mockAPIError{
code: "InvalidIdentityToken",
message: "No OpenIDConnect provider found in your account",
},
expectedResult: true,
},
{
name: "non-skippable error returns false",
err: errors.New("some random error"),
expectedResult: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isSkippableRegionError(tt.err)
if result != tt.expectedResult {
t.Errorf("isSkippableRegionError() = %v, want %v for error: %v", result, tt.expectedResult, tt.err)
}
})
}
}

func TestWrapRegionError(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -240,6 +349,27 @@ func TestWrapRegionError(t *testing.T) {
shouldWrap: false,
expectedText: "",
},
{
name: "timeout error gets timeout-specific message",
err: context.DeadlineExceeded,
region: "me-south-1",
shouldWrap: true,
expectedText: "unreachable (timeout)",
},
{
name: "wrapped timeout error gets timeout-specific message",
err: fmt.Errorf("operation error STS: GetCallerIdentity: %w", context.DeadlineExceeded),
region: "me-south-1",
shouldWrap: true,
expectedText: "unreachable (timeout)",
},
{
name: "canceled error is not wrapped (parent cancellation, not region timeout)",
err: context.Canceled,
region: "me-south-1",
shouldWrap: false,
expectedText: "",
},
}

for _, tt := range tests {
Expand Down
Loading