Skip to content

Commit c56431f

Browse files
committed
Add test for nil member in learnExistingMembers after concurrent purge
Signed-off-by: Ady0333 <adityashinde1525@gmail.com>
1 parent 3923f32 commit c56431f

5 files changed

Lines changed: 215 additions & 0 deletions

File tree

diff.txt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
diff --git a/gossip/discovery/discovery_test.go b/gossip/discovery/discovery_test.go
2+
index 01b80fc4f..d200cc83b 100644
3+
--- a/gossip/discovery/discovery_test.go
4+
+++ b/gossip/discovery/discovery_test.go
5+
@@ -1975,3 +1975,55 @@ func TestHandleAliveMessage_RelearnsMemberAfterConcurrentPurge(t *testing.T) {
6+
require.True(t, inID2Member, "member should be present in id2Member after re-learning")
7+
require.True(t, inAliveLastTS, "member should be present in aliveLastTS after re-learning")
8+
}
9+
+
10+
+func TestLearnExistingMembersNilMemberPanic(t *testing.T) {
11+
+ // 1) Initialize a discovery instance (use existing helpers like createDiscoveryInstanceWithNoGossip).
12+
+ inst := createDiscoveryInstanceWithNoGossip(10000, "testInst", nil)
13+
+ defer inst.Stop()
14+
+
15+
+ // Access the underlying implementation
16+
+ discImpl := inst.discoveryImpl()
17+
+
18+
+ // 2) Prepare a PKIid and endpoint.
19+
+ pkiID := common.PKIidType("test-pki-id")
20+
+ endpoint := "localhost:1234"
21+
+
22+
+ // 3) Under lock, insert an entry into aliveLastTS for that PKIid.
23+
+ // 4) Do NOT insert the corresponding entry into id2Member (simulate it being purged).
24+
+ discImpl.lock.Lock()
25+
+ discImpl.aliveLastTS[string(pkiID)] = &timestamp{
26+
+ incTime: time.Now(),
27+
+ seqNum: 1,
28+
+ lastSeen: time.Now(),
29+
+ }
30+
+ discImpl.lock.Unlock()
31+
+
32+
+ // 5) Build a valid AliveMessage and wrap it with protoext.NoopSign.
33+
+ aliveMsg := &proto.GossipMessage{
34+
+ Tag: proto.GossipMessage_EMPTY,
35+
+ Content: &proto.GossipMessage_AliveMsg{
36+
+ AliveMsg: &proto.AliveMessage{
37+
+ Membership: &proto.Member{
38+
+ PkiId: pkiID,
39+
+ Endpoint: endpoint,
40+
+ },
41+
+ Timestamp: &proto.PeerTime{
42+
+ IncNum: uint64(time.Now().UnixNano()),
43+
+ SeqNum: 2,
44+
+ },
45+
+ },
46+
+ },
47+
+ }
48+
+ signedMsg, err := protoext.NoopSign(aliveMsg)
49+
+ require.NoError(t, err)
50+
+
51+
+ // 6) Invoke the relevant flow directly.
52+
+ // We call learnExistingMembers() instead of handleAliveMessage() because handleAliveMessage
53+
+ // checks if the member is in id2Member under a read-lock and drops the message if not.
54+
+ // In the exact concurrency flaw, the member is valid during handleAliveMessage's read lock,
55+
+ // but gets purged before learnExistingMembers acquires its write lock. Calling learnExistingMembers
56+
+ // directly avoids flaky timing races while perfectly recreating the problematic inconsistent state.
57+
+ require.NotPanics(t, func() {
58+
+ discImpl.learnExistingMembers([]*protoext.SignedGossipMessage{signedMsg})
59+
+ }, "learnExistingMembers should not panic when member is nil in id2Member")
60+
+}

diff_v2.txt

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
diff --git a/gossip/discovery/discovery_test.go b/gossip/discovery/discovery_test.go
2+
index 01b80fc4f..34d0f968b 100644
3+
--- a/gossip/discovery/discovery_test.go
4+
+++ b/gossip/discovery/discovery_test.go
5+
@@ -1975,3 +1975,60 @@ func TestHandleAliveMessage_RelearnsMemberAfterConcurrentPurge(t *testing.T) {
6+
require.True(t, inID2Member, "member should be present in id2Member after re-learning")
7+
require.True(t, inAliveLastTS, "member should be present in aliveLastTS after re-learning")
8+
}
9+
+
10+
+func TestLearnExistingMembers_NilMemberAfterConcurrentPurge(t *testing.T) {
11+
+ // 1) Initialize a discovery instance (use existing helpers like createDiscoveryInstanceWithNoGossip).
12+
+ inst := createDiscoveryInstanceWithNoGossip(10000, "testInst", nil)
13+
+ defer inst.Stop()
14+
+
15+
+ // Access the underlying implementation
16+
+ d := inst.discoveryImpl()
17+
+
18+
+ // 2) Prepare a PKIid and endpoint.
19+
+ pkiID := common.PKIidType("test-pki-id")
20+
+ endpoint := "localhost:1234"
21+
+
22+
+ // 3) Under lock, insert an entry into aliveLastTS for that PKIid.
23+
+ // 4) Do NOT insert the corresponding entry into id2Member (simulate it being purged).
24+
+ d.lock.Lock()
25+
+ d.aliveLastTS[string(pkiID)] = &timestamp{
26+
+ incTime: time.Now(),
27+
+ seqNum: 1,
28+
+ lastSeen: time.Now(),
29+
+ }
30+
+ d.lock.Unlock()
31+
+
32+
+ // 5) Build a valid AliveMessage and wrap it with protoext.NoopSign.
33+
+ aliveMsg := &proto.GossipMessage{
34+
+ Tag: proto.GossipMessage_EMPTY,
35+
+ Content: &proto.GossipMessage_AliveMsg{
36+
+ AliveMsg: &proto.AliveMessage{
37+
+ Membership: &proto.Member{
38+
+ PkiId: pkiID,
39+
+ Endpoint: endpoint,
40+
+ },
41+
+ Timestamp: &proto.PeerTime{
42+
+ IncNum: uint64(time.Now().UnixNano()),
43+
+ SeqNum: 2,
44+
+ },
45+
+ },
46+
+ },
47+
+ }
48+
+ signedMsg, err := protoext.NoopSign(aliveMsg)
49+
+ require.NoError(t, err)
50+
+
51+
+ // We invoke learnExistingMembers() directly to deterministically reproduce
52+
+ // the inconsistent state where the member is present in aliveLastTS but
53+
+ // missing from id2Member.
54+
+ //
55+
+ // In the real flow, handleAliveMessage first reads state under a read lock,
56+
+ // and then learnExistingMembers acquires a write lock. A concurrent purge
57+
+ // can remove the member between these two steps, leading to a nil access.
58+
+ //
59+
+ // Reproducing this via the full handleAliveMessage path would require a
60+
+ // timing-dependent race, so we simulate the exact post-condition directly
61+
+ // to keep the test deterministic and reliable.
62+
+ require.NotPanics(t, func() {
63+
+ d.learnExistingMembers([]*protoext.SignedGossipMessage{signedMsg})
64+
+ }, "learnExistingMembers should not panic when member is nil in id2Member")
65+
+}

gossip/discovery/discovery_impl.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,10 @@ func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*protoext.SignedGo
843843

844844
// update member's data
845845
member := d.id2Member[string(am.Membership.PkiId)]
846+
if member == nil {
847+
// skip safely, do not panic
848+
continue
849+
}
846850
member.Endpoint = am.Membership.Endpoint
847851
member.Metadata = am.Membership.Metadata
848852
member.InternalEndpoint = internalEndpoint

gossip/discovery/discovery_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,3 +1975,55 @@ func TestHandleAliveMessage_RelearnsMemberAfterConcurrentPurge(t *testing.T) {
19751975
require.True(t, inID2Member, "member should be present in id2Member after re-learning")
19761976
require.True(t, inAliveLastTS, "member should be present in aliveLastTS after re-learning")
19771977
}
1978+
1979+
func TestLearnExistingMembers_NilMemberAfterConcurrentPurge(t *testing.T) {
1980+
inst := createDiscoveryInstanceWithNoGossip(0, "testNilMemberAfterPurgeInst", nil)
1981+
defer func() {
1982+
inst.Stop()
1983+
time.Sleep(50 * time.Millisecond)
1984+
}()
1985+
1986+
d := inst.discoveryImpl()
1987+
1988+
pkiID := common.PKIidType("test-pki-id")
1989+
endpoint := "localhost:1234"
1990+
1991+
// This test simulates the post-condition of a real race.
1992+
// In the real flow, handleAliveMessage reads under RLock and later calls
1993+
// learnExistingMembers under Lock. A concurrent purge can remove the member
1994+
// between these two phases, leaving it present in aliveLastTS but missing
1995+
// from id2Member.
1996+
//
1997+
// Calling learnExistingMembers directly avoids timing-dependent races
1998+
// while deterministically reproducing the inconsistent state.
1999+
d.lock.Lock()
2000+
d.aliveLastTS[string(pkiID)] = &timestamp{
2001+
incTime: time.Now(),
2002+
seqNum: 1,
2003+
lastSeen: time.Now(),
2004+
}
2005+
d.lock.Unlock()
2006+
2007+
aliveMsg := &proto.GossipMessage{
2008+
Tag: proto.GossipMessage_EMPTY,
2009+
Content: &proto.GossipMessage_AliveMsg{
2010+
AliveMsg: &proto.AliveMessage{
2011+
Membership: &proto.Member{
2012+
PkiId: pkiID,
2013+
Endpoint: endpoint,
2014+
},
2015+
Timestamp: &proto.PeerTime{
2016+
IncNum: uint64(time.Now().UnixNano()),
2017+
SeqNum: 2,
2018+
},
2019+
},
2020+
},
2021+
}
2022+
2023+
signedMsg, err := protoext.NoopSign(aliveMsg)
2024+
require.NoError(t, err)
2025+
2026+
require.NotPanics(t, func() {
2027+
d.learnExistingMembers([]*protoext.SignedGossipMessage{signedMsg})
2028+
}, "learnExistingMembers should not panic when a member is missing from id2Member")
2029+
}

test_output.txt

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
=== RUN TestLearnExistingMembersNilMemberPanic
2+
discovery_test.go:2026:
3+
Error Trace: /home/aditya/fabric/gossip/discovery/discovery_test.go:2026
4+
Error: func (assert.PanicTestFunc)(0xba7a00) should not panic
5+
Panic value: runtime error: invalid memory address or nil pointer dereference
6+
Panic stack: goroutine 40 [running]:
7+
runtime/debug.Stack()
8+
/home/aditya/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.26.1.linux-amd64/src/runtime/debug/stack.go:26 +0x5e
9+
github.com/stretchr/testify/assert.didPanic.func1()
10+
/home/aditya/fabric/vendor/github.com/stretchr/testify/assert/assertions.go:1234 +0x67
11+
panic({0xcecc20?, 0x15dddc0?})
12+
/home/aditya/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.26.1.linux-amd64/src/runtime/panic.go:860 +0x13a
13+
github.com/hyperledger/fabric/gossip/discovery.(*gossipDiscoveryImpl).learnExistingMembers(0x1066f128c420, {0x1066f1392378, 0x1, 0x1})
14+
/home/aditya/fabric/gossip/discovery/discovery_impl.go:846 +0x34c
15+
github.com/hyperledger/fabric/gossip/discovery.TestLearnExistingMembersNilMemberPanic.func1()
16+
/home/aditya/fabric/gossip/discovery/discovery_test.go:2027 +0x65
17+
github.com/stretchr/testify/assert.didPanic(0x15e0ea0?)
18+
/home/aditya/fabric/vendor/github.com/stretchr/testify/assert/assertions.go:1239 +0x70
19+
github.com/stretchr/testify/assert.NotPanics({0xe52900, 0x1066f157ab48}, 0x1066f12fa360, {0x1066f12f1f48, 0x1, 0x1})
20+
/home/aditya/fabric/vendor/github.com/stretchr/testify/assert/assertions.go:1310 +0x7e
21+
github.com/stretchr/testify/require.NotPanics({0xe58588, 0x1066f157ab48}, 0x1066f12fa360, {0x1066f12f1f48, 0x1, 0x1})
22+
/home/aditya/fabric/vendor/github.com/stretchr/testify/require/require.go:1669 +0xa6
23+
github.com/hyperledger/fabric/gossip/discovery.TestLearnExistingMembersNilMemberPanic(0x1066f157ab48)
24+
/home/aditya/fabric/gossip/discovery/discovery_test.go:2026 +0x4f3
25+
testing.tRunner(0x1066f157ab48, 0xe47c10)
26+
/home/aditya/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.26.1.linux-amd64/src/testing/testing.go:2036 +0xea
27+
created by testing.(*T).Run in goroutine 1
28+
/home/aditya/go/pkg/mod/golang.org/toolchain@v0.0.1-go1.26.1.linux-amd64/src/testing/testing.go:2101 +0x4c5
29+
Test: TestLearnExistingMembersNilMemberPanic
30+
Messages: learnExistingMembers should not panic when member is nil in id2Member
31+
--- FAIL: TestLearnExistingMembersNilMemberPanic (0.00s)
32+
FAIL
33+
FAIL github.com/hyperledger/fabric/gossip/discovery 0.028s
34+
FAIL

0 commit comments

Comments
 (0)