Skip to content

Commit ac3c71e

Browse files
committed
pkg/rpcserver: fix race in local server shutdown
Currently we stop both executor binary and the RPC server concurrently due to use of errgroup.WithContext. As the result, executor may SYZFAIL on the closed network connection before it's killed. This race leads to very high percent (25%) of failed repro attempts for my local syz-manager runs. When we run syz-execprog with Repeat=false, the race triggers frequently. May have something to do with heavily instrumented kernel where some operations may take longer (e.g. killing syz-executor and stopping all of its threads). This should also fix #6091
1 parent 9fce9e4 commit ac3c71e

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

pkg/rpcserver/local.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
"github.com/google/syzkaller/pkg/signal"
1818
"github.com/google/syzkaller/pkg/vminfo"
1919
"github.com/google/syzkaller/prog"
20-
"golang.org/x/sync/errgroup"
2120
)
2221

2322
type LocalConfig struct {
@@ -43,15 +42,27 @@ func RunLocal(ctx context.Context, cfg *LocalConfig) error {
4342
return err
4443
}
4544
defer localCtx.serv.Close()
46-
// groupCtx will be cancelled once any goroutine returns an error.
47-
eg, groupCtx := errgroup.WithContext(ctx)
48-
eg.Go(func() error {
49-
return localCtx.RunInstance(groupCtx, 0)
50-
})
51-
eg.Go(func() error {
52-
return localCtx.serv.Serve(groupCtx)
53-
})
54-
return eg.Wait()
45+
46+
// Note: we must not stop the RPC server before we finish RunInstance.
47+
// Otherwise, RPC server will close the connection, and executor may SYZFAIL
48+
// on the closed network connection.
49+
// We first need to wait for the executor binary to finish, and only then stop the RPC server.
50+
// However, we want to stop both if the other one errors out.
51+
instCtx, instCancel := context.WithCancel(ctx)
52+
defer instCancel()
53+
servCtx, servCancel := context.WithCancel(context.Background())
54+
defer servCancel()
55+
servErr := make(chan error, 1)
56+
go func() {
57+
servErr <- localCtx.serv.Serve(servCtx)
58+
instCancel()
59+
}()
60+
instErr := localCtx.RunInstance(instCtx, 0)
61+
servCancel()
62+
if err := <-servErr; err != nil {
63+
return err
64+
}
65+
return instErr
5566
}
5667

5768
func setupLocal(ctx context.Context, cfg *LocalConfig) (*local, context.Context, error) {

0 commit comments

Comments
 (0)