Skip to content

Commit aa921d4

Browse files
authored
🐛 Recover and report panics in provider subprocesses (#6939)
1 parent e338049 commit aa921d4

File tree

3 files changed

+99
-10
lines changed

3 files changed

+99
-10
lines changed

providers-sdk/v1/plugin/grpc.go

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,16 @@ package plugin
66
import (
77
"bytes"
88
"context"
9+
"runtime/debug"
910
"unicode/utf8"
1011

1112
plugin "github.com/hashicorp/go-plugin"
13+
"github.com/rs/zerolog/log"
1214
"google.golang.org/grpc"
15+
"google.golang.org/grpc/codes"
1316
"google.golang.org/grpc/encoding"
1417
_ "google.golang.org/grpc/encoding/proto"
18+
"google.golang.org/grpc/status"
1519
)
1620

1721
func init() {
@@ -83,6 +87,18 @@ func (m *GRPCClient) StoreData(req *StoreReq) (*StoreRes, error) {
8387
return m.client.StoreData(context.Background(), req)
8488
}
8589

90+
// recoverPanic converts a panic into a gRPC Internal error. The full stack
91+
// trace is logged locally; only a short message is sent over the wire.
92+
// The message is prefixed with "panic in provider " so the caller can
93+
// distinguish recovered panics from other Internal errors.
94+
func recoverPanic(method string, retErr *error) {
95+
if r := recover(); r != nil {
96+
stack := debug.Stack()
97+
log.Error().Str("method", method).Interface("panic", r).Str("stack", string(stack)).Msg("recovered panic in provider")
98+
*retErr = status.Errorf(codes.Internal, "panic in provider %s: %v", method, r)
99+
}
100+
}
101+
86102
// Here is the gRPC server that GRPCClient talks to.
87103
type GRPCServer struct {
88104
// This is the real implementation
@@ -95,11 +111,13 @@ func (m *GRPCServer) Heartbeat(ctx context.Context, req *HeartbeatReq) (*Heartbe
95111
return m.Impl.Heartbeat(req)
96112
}
97113

98-
func (m *GRPCServer) ParseCLI(ctx context.Context, req *ParseCLIReq) (*ParseCLIRes, error) {
114+
func (m *GRPCServer) ParseCLI(ctx context.Context, req *ParseCLIReq) (resp *ParseCLIRes, err error) {
115+
defer recoverPanic("ParseCLI", &err)
99116
return m.Impl.ParseCLI(req)
100117
}
101118

102-
func (m *GRPCServer) Connect(ctx context.Context, req *ConnectReq) (*ConnectRes, error) {
119+
func (m *GRPCServer) Connect(ctx context.Context, req *ConnectReq) (resp *ConnectRes, err error) {
120+
defer recoverPanic("Connect", &err)
103121
conn, err := m.broker.Dial(req.CallbackServer)
104122
if err != nil {
105123
return nil, err
@@ -112,11 +130,13 @@ func (m *GRPCServer) Connect(ctx context.Context, req *ConnectReq) (*ConnectRes,
112130
return m.Impl.Connect(req, a)
113131
}
114132

115-
func (m *GRPCServer) Disconnect(ctx context.Context, req *DisconnectReq) (*DisconnectRes, error) {
133+
func (m *GRPCServer) Disconnect(ctx context.Context, req *DisconnectReq) (resp *DisconnectRes, err error) {
134+
defer recoverPanic("Disconnect", &err)
116135
return m.Impl.Disconnect(req)
117136
}
118137

119-
func (m *GRPCServer) MockConnect(ctx context.Context, req *ConnectReq) (*ConnectRes, error) {
138+
func (m *GRPCServer) MockConnect(ctx context.Context, req *ConnectReq) (resp *ConnectRes, err error) {
139+
defer recoverPanic("MockConnect", &err)
120140
conn, err := m.broker.Dial(req.CallbackServer)
121141
if err != nil {
122142
return nil, err
@@ -129,20 +149,23 @@ func (m *GRPCServer) MockConnect(ctx context.Context, req *ConnectReq) (*Connect
129149
return m.Impl.MockConnect(req, a)
130150
}
131151

132-
func (m *GRPCServer) Shutdown(ctx context.Context, req *ShutdownReq) (*ShutdownRes, error) {
152+
func (m *GRPCServer) Shutdown(ctx context.Context, req *ShutdownReq) (resp *ShutdownRes, err error) {
153+
defer recoverPanic("Shutdown", &err)
133154
return m.Impl.Shutdown(req)
134155
}
135156

136-
func (m *GRPCServer) GetData(ctx context.Context, req *DataReq) (*DataRes, error) {
137-
resp, err := m.Impl.GetData(req)
157+
func (m *GRPCServer) GetData(ctx context.Context, req *DataReq) (resp *DataRes, err error) {
158+
defer recoverPanic("GetData", &err)
159+
resp, err = m.Impl.GetData(req)
138160
if err != nil {
139161
return nil, err
140162
}
141163
sanitizeDataRes(resp)
142164
return resp, nil
143165
}
144166

145-
func (m *GRPCServer) StoreData(ctx context.Context, req *StoreReq) (*StoreRes, error) {
167+
func (m *GRPCServer) StoreData(ctx context.Context, req *StoreReq) (resp *StoreRes, err error) {
168+
defer recoverPanic("StoreData", &err)
146169
return m.Impl.StoreData(req)
147170
}
148171

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright (c) Mondoo, Inc.
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package plugin
5+
6+
import (
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
"google.golang.org/grpc/codes"
12+
"google.golang.org/grpc/status"
13+
)
14+
15+
func TestRecoverPanic(t *testing.T) {
16+
t.Run("recovers string panic", func(t *testing.T) {
17+
var err error
18+
func() {
19+
defer recoverPanic("TestMethod", &err)
20+
panic("something went wrong")
21+
}()
22+
23+
require.Error(t, err)
24+
st, ok := status.FromError(err)
25+
require.True(t, ok)
26+
assert.Equal(t, codes.Internal, st.Code())
27+
assert.Equal(t, "panic in provider TestMethod: something went wrong", st.Message())
28+
assert.NotContains(t, st.Message(), "goroutine") // stack trace stays in log, not on the wire
29+
})
30+
31+
t.Run("recovers nil pointer panic", func(t *testing.T) {
32+
var err error
33+
func() {
34+
defer recoverPanic("GetData", &err)
35+
var s *string
36+
_ = *s // nil pointer dereference
37+
}()
38+
39+
require.Error(t, err)
40+
st, ok := status.FromError(err)
41+
require.True(t, ok)
42+
assert.Equal(t, codes.Internal, st.Code())
43+
assert.Contains(t, st.Message(), "panic in provider GetData")
44+
})
45+
46+
t.Run("no panic leaves error nil", func(t *testing.T) {
47+
var err error
48+
func() {
49+
defer recoverPanic("TestMethod", &err)
50+
// no panic
51+
}()
52+
53+
assert.NoError(t, err)
54+
})
55+
}

providers/runtime.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package providers
55

66
import (
77
"errors"
8+
"strings"
89
"sync"
910
"time"
1011

@@ -18,6 +19,7 @@ import (
1819
"go.mondoo.com/mql/v13/types"
1920
"go.mondoo.com/mql/v13/utils/multierr"
2021
"go.mondoo.com/mql/v13/utils/stringx"
22+
"google.golang.org/grpc/codes"
2123
"google.golang.org/grpc/status"
2224
)
2325

@@ -492,8 +494,17 @@ func (r *Runtime) handlePluginError(err error, provider *ConnectedProvider) (boo
492494
}
493495

494496
switch st.Code() {
495-
case 14:
496-
// Error: Unavailable. Happens when the plugin crashes.
497+
case codes.Internal:
498+
// A recovered panic in the provider sends an Internal error prefixed
499+
// with "panic in provider ". Only apply panic-specific handling when
500+
// this prefix is present; other Internal errors fall through.
501+
if strings.HasPrefix(st.Message(), "panic in provider ") {
502+
log.Error().Str("provider", provider.Instance.Name).Msg(st.Message())
503+
return true, errors.New("the '" + provider.Instance.Name + "' provider panicked: " + st.Message())
504+
}
505+
506+
case codes.Unavailable:
507+
// Happens when the plugin crashes.
497508
// TODO: try to restart the plugin and reset its connections
498509
provider.Instance.isClosed = true
499510
provider.Instance.err = errors.New("the '" + provider.Instance.Name + "' provider crashed: " + err.Error())

0 commit comments

Comments
 (0)