Skip to content

Commit 38ecfd8

Browse files
vjeffreyclaude
andauthored
🐛 Expose recovered provider panics via Runtime.CriticalErrors() (#7021)
* 🐛 Expose recovered provider panics via Runtime.CriticalErrors() Provider panics recovered by PR #6939 are handled gracefully (execution continues), but the errors are downgraded to field-level data errors and never propagate as real Go errors. This means consumers like cnspec cannot report them to Sentry. Add a CriticalErrors() method on Runtime that collects serious errors (recovered panics) in a thread-safe slice. Consumers can drain this after execution to report them to error trackers. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * lint * 🐛 Cap criticalErrors slice to prevent unbounded growth Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 69ea273 commit 38ecfd8

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

providers/runtime.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ type Runtime struct {
4545
close sync.Once
4646
shutdownTimeout time.Duration
4747

48+
// criticalErrors collects serious errors (e.g. recovered provider panics)
49+
// that should be reported to an error tracker even though execution continues.
50+
criticalErrors []error
51+
4852
// used to lock unsafe tasks
4953
mu sync.Mutex
5054
}
@@ -108,6 +112,26 @@ func (r *Runtime) Close() {
108112
})
109113
}
110114

115+
// CriticalErrors returns serious errors (e.g. recovered provider panics) that
116+
// occurred during execution. These errors were handled gracefully (execution
117+
// continued), but they should still be reported to an error tracker like Sentry.
118+
func (r *Runtime) CriticalErrors() []error {
119+
r.mu.Lock()
120+
defer r.mu.Unlock()
121+
out := make([]error, len(r.criticalErrors))
122+
copy(out, r.criticalErrors)
123+
return out
124+
}
125+
126+
func (r *Runtime) addCriticalError(err error) {
127+
r.mu.Lock()
128+
defer r.mu.Unlock()
129+
const maxCriticalErrors = 100
130+
if len(r.criticalErrors) < maxCriticalErrors {
131+
r.criticalErrors = append(r.criticalErrors, err)
132+
}
133+
}
134+
111135
func (r *Runtime) Recording() llx.Recording {
112136
return r.recording
113137
}
@@ -500,7 +524,9 @@ func (r *Runtime) handlePluginError(err error, provider *ConnectedProvider) (boo
500524
// this prefix is present; other Internal errors fall through.
501525
if strings.HasPrefix(st.Message(), "panic in provider ") {
502526
log.Error().Str("provider", provider.Instance.Name).Msg(st.Message())
503-
return true, errors.New("the '" + provider.Instance.Name + "' provider panicked: " + st.Message())
527+
panicErr := errors.New("the '" + provider.Instance.Name + "' provider panicked: " + st.Message())
528+
r.addCriticalError(panicErr)
529+
return true, panicErr
504530
}
505531

506532
case codes.Unavailable:

providers/runtime_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"go.mondoo.com/mql/v13/providers-sdk/v1/recording"
1212
"go.mondoo.com/mql/v13/providers-sdk/v1/resources"
1313
"go.uber.org/mock/gomock"
14+
"google.golang.org/grpc/codes"
15+
"google.golang.org/grpc/status"
1416
)
1517

1618
func TestRuntimeClose(t *testing.T) {
@@ -380,3 +382,68 @@ func TestRuntime_LookupFieldProvider_ProviderOverridesOthers_ResourceInfo(t *tes
380382
assert.Equal(t, fieldName, field.Name)
381383
assert.Equal(t, "test", field.Provider)
382384
}
385+
386+
func TestRuntime_CriticalErrors_Empty(t *testing.T) {
387+
r := &Runtime{}
388+
assert.Empty(t, r.CriticalErrors())
389+
}
390+
391+
func TestRuntime_HandlePluginError_PanicRecordsCriticalError(t *testing.T) {
392+
r := &Runtime{}
393+
provider := &ConnectedProvider{
394+
Instance: &RunningProvider{Name: "aws"},
395+
}
396+
397+
panicErr := status.Error(codes.Internal, "panic in provider aws: runtime error: nil pointer")
398+
handled, err := r.handlePluginError(panicErr, provider)
399+
400+
assert.True(t, handled)
401+
require.Error(t, err)
402+
assert.Contains(t, err.Error(), "provider panicked")
403+
404+
critErrs := r.CriticalErrors()
405+
require.Len(t, critErrs, 1)
406+
assert.Contains(t, critErrs[0].Error(), "provider panicked")
407+
}
408+
409+
func TestRuntime_HandlePluginError_CrashDoesNotRecordCriticalError(t *testing.T) {
410+
r := &Runtime{}
411+
provider := &ConnectedProvider{
412+
Instance: &RunningProvider{Name: "aws"},
413+
}
414+
415+
crashErr := status.Error(codes.Unavailable, "connection lost")
416+
handled, err := r.handlePluginError(crashErr, provider)
417+
418+
assert.False(t, handled)
419+
require.Error(t, err)
420+
assert.Empty(t, r.CriticalErrors())
421+
}
422+
423+
func TestRuntime_HandlePluginError_NonPanicInternalDoesNotRecordCriticalError(t *testing.T) {
424+
r := &Runtime{}
425+
provider := &ConnectedProvider{
426+
Instance: &RunningProvider{Name: "aws"},
427+
}
428+
429+
internalErr := status.Error(codes.Internal, "some other internal error")
430+
handled, err := r.handlePluginError(internalErr, provider)
431+
432+
assert.False(t, handled)
433+
require.Error(t, err)
434+
assert.Empty(t, r.CriticalErrors())
435+
}
436+
437+
func TestRuntime_CriticalErrors_MultiplePanics(t *testing.T) {
438+
r := &Runtime{}
439+
provider := &ConnectedProvider{
440+
Instance: &RunningProvider{Name: "aws"},
441+
}
442+
443+
for i := 0; i < 3; i++ {
444+
panicErr := status.Error(codes.Internal, "panic in provider aws: error")
445+
r.handlePluginError(panicErr, provider) // nolint:errcheck
446+
}
447+
448+
assert.Len(t, r.CriticalErrors(), 3)
449+
}

0 commit comments

Comments
 (0)