From 90c618365f23a2e8ca12419b9a321696a228ae9f Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 7 May 2025 08:53:04 +0000 Subject: [PATCH] kvserver: skip test on failed re-listen A test can't listen() on a random port, then close the listener, and expect the same port to be available for a subsequent listen(). Skip the test if the port is not available the second time around; this is very rare but can happen. See https://github.com/cockroachdb/cockroach/issues/146175. Epic: none --- pkg/kv/kvserver/raft_transport_test.go | 14 +++++++++++++- pkg/util/netutil/net.go | 15 +++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/pkg/kv/kvserver/raft_transport_test.go b/pkg/kv/kvserver/raft_transport_test.go index cb0390df2a89..30a041e7b65a 100644 --- a/pkg/kv/kvserver/raft_transport_test.go +++ b/pkg/kv/kvserver/raft_transport_test.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -108,6 +109,8 @@ type raftTransportTestContext struct { nodeRPCContext *rpc.Context gossip *gossip.Gossip st *cluster.Settings + + skipOnListenErr bool // if true, calls Skip on error from net.Listen } func newRaftTransportTestContext(t testing.TB, st *cluster.Settings) *raftTransportTestContext { @@ -184,8 +187,12 @@ func (rttc *raftTransportTestContext) AddNodeWithoutGossip( knobs, ) rttc.transports[nodeID] = transport - ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr) + ln, err := net.Listen(addr.Network(), addr.String()) + if err != nil && rttc.skipOnListenErr { + skip.IgnoreLintf(rttc.t, "skipping test due to listen error: %s", err) + } require.NoError(rttc.t, err) + require.NoError(rttc.t, netutil.ServeGRPC(stopper, grpcServer, ln)) return transport, ln.Addr() } @@ -558,6 +565,11 @@ func TestReopenConnection(t *testing.T) { StoreID: 2, ReplicaID: 2, } + + // We're re-listening on an old address here, but the port may be + // in use. In the very rare case of this happening, skip the test. + // See: https://github.com/cockroachdb/cockroach/issues/146175. + rttc.skipOnListenErr = true serverTransport, serverAddr := rttc.AddNodeWithoutGossip( serverReplica.NodeID, diff --git a/pkg/util/netutil/net.go b/pkg/util/netutil/net.go index bc921bbd1e70..da914fe9d49d 100644 --- a/pkg/util/netutil/net.go +++ b/pkg/util/netutil/net.go @@ -34,7 +34,13 @@ func ListenAndServeGRPC( if err != nil { return ln, err } + if err := ServeGRPC(stopper, server, ln); err != nil { + return nil, err + } + return ln, nil +} +func ServeGRPC(stopper *stop.Stopper, server *grpc.Server, ln net.Listener) error { ctx := context.TODO() stopper.AddCloser(stop.CloserFn(server.Stop)) @@ -44,15 +50,12 @@ func ListenAndServeGRPC( } if err := stopper.RunAsyncTask(ctx, "listen-quiesce", waitQuiesce); err != nil { waitQuiesce(ctx) - return nil, err + return err } - if err := stopper.RunAsyncTask(ctx, "serve", func(context.Context) { + return stopper.RunAsyncTask(ctx, "serve", func(context.Context) { FatalIfUnexpected(server.Serve(ln)) - }); err != nil { - return nil, err - } - return ln, nil + }) } var httpLogger = log.NewStdLogger(severity.ERROR, "net/http")