Skip to content

Commit b2e8967

Browse files
authored
fix(core/fetcher): resubscribe if consensus node goes offline (#4096)
1 parent 482f452 commit b2e8967

File tree

2 files changed

+140
-0
lines changed

2 files changed

+140
-0
lines changed

Diff for: core/fetcher.go

+10
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
coregrpc "github.com/tendermint/tendermint/rpc/grpc"
1414
"github.com/tendermint/tendermint/types"
1515
"google.golang.org/grpc"
16+
"google.golang.org/grpc/status"
1617

1718
libhead "github.com/celestiaorg/go-header"
1819
)
@@ -173,9 +174,12 @@ func (f *BlockFetcher) SubscribeNewBlockEvent(ctx context.Context) (<-chan types
173174

174175
subscription, err := f.client.SubscribeNewHeights(ctx, &coregrpc.SubscribeNewHeightsRequest{})
175176
if err != nil {
177+
close(f.doneCh)
178+
f.isListeningForBlocks.Store(false)
176179
return nil, err
177180
}
178181

182+
log.Debug("created a subscription. Start listening for new blocks...")
179183
signedBlockCh := make(chan types.EventDataSignedBlock)
180184
go func() {
181185
defer close(f.doneCh)
@@ -189,6 +193,12 @@ func (f *BlockFetcher) SubscribeNewBlockEvent(ctx context.Context) (<-chan types
189193
resp, err := subscription.Recv()
190194
if err != nil {
191195
log.Errorw("fetcher: error receiving new height", "err", err.Error())
196+
_, ok := status.FromError(err) // parsing the gRPC error
197+
if ok {
198+
// ok means that err contains a gRPC status error.
199+
// move on another round of resubscribing.
200+
return
201+
}
192202
continue
193203
}
194204
withTimeout, ctxCancel := context.WithTimeout(ctx, 10*time.Second)

Diff for: core/fetcher_test.go

+130
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@ package core
22

33
import (
44
"context"
5+
"errors"
56
"net"
67
"testing"
78
"time"
89

910
"github.com/stretchr/testify/assert"
1011
"github.com/stretchr/testify/require"
12+
coregrpc "github.com/tendermint/tendermint/rpc/grpc"
13+
"google.golang.org/grpc"
1114
)
1215

1316
func TestBlockFetcher_GetBlock_and_SubscribeNewBlockEvent(t *testing.T) {
@@ -40,3 +43,130 @@ func TestBlockFetcher_GetBlock_and_SubscribeNewBlockEvent(t *testing.T) {
4043
}
4144
require.NoError(t, fetcher.Stop(ctx))
4245
}
46+
47+
type mockAPIService struct {
48+
coregrpc.UnimplementedBlockAPIServer
49+
50+
grpcServer *grpc.Server
51+
fetcher *BlockFetcher
52+
}
53+
54+
func (m *mockAPIService) SubscribeNewHeights(
55+
_ *coregrpc.SubscribeNewHeightsRequest,
56+
srv coregrpc.BlockAPI_SubscribeNewHeightsServer,
57+
) error {
58+
for i := 0; i < 20; i++ {
59+
b, err := m.fetcher.GetBlock(context.Background(), int64(i))
60+
if err != nil {
61+
return err
62+
}
63+
err = srv.Send(&coregrpc.NewHeightEvent{Height: b.Header.Height, Hash: b.Header.Hash()})
64+
if err != nil {
65+
return err
66+
}
67+
time.Sleep(time.Second)
68+
}
69+
return nil
70+
}
71+
72+
func (m *mockAPIService) BlockByHeight(
73+
req *coregrpc.BlockByHeightRequest,
74+
srv coregrpc.BlockAPI_BlockByHeightServer,
75+
) error {
76+
b, err := m.fetcher.client.BlockByHeight(context.Background(), &coregrpc.BlockByHeightRequest{Height: req.Height})
77+
if err != nil {
78+
return err
79+
}
80+
data, err := b.Recv()
81+
if err != nil {
82+
return err
83+
}
84+
err = srv.Send(data)
85+
if err != nil {
86+
return err
87+
}
88+
return nil
89+
}
90+
91+
func (m *mockAPIService) Start() error {
92+
listener, err := net.Listen("tcp", ":50051")
93+
if err != nil {
94+
return err
95+
}
96+
97+
grpcServer := grpc.NewServer()
98+
m.grpcServer = grpcServer
99+
coregrpc.RegisterBlockAPIServer(grpcServer, m)
100+
go func() {
101+
err = grpcServer.Serve(listener)
102+
if err != nil && !errors.Is(err, grpc.ErrServerStopped) {
103+
panic(err)
104+
}
105+
}()
106+
return nil
107+
}
108+
109+
func (m *mockAPIService) Stop() error {
110+
m.grpcServer.Stop()
111+
return nil
112+
}
113+
114+
func (m *mockAPIService) generateBlocksWithHeights(ctx context.Context, t *testing.T) {
115+
cfg := DefaultTestConfig()
116+
fetcher, cctx := createCoreFetcher(t, cfg)
117+
m.fetcher = fetcher
118+
generateNonEmptyBlocks(t, ctx, fetcher, cfg, cctx)
119+
require.NoError(t, fetcher.Stop(ctx))
120+
}
121+
122+
// TestStart_SubscribeNewBlockEvent_Resubscription ensures that subscription will not stuck in case
123+
// gRPC server was stopped.
124+
func TestStart_SubscribeNewBlockEvent_Resubscription(t *testing.T) {
125+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
126+
t.Cleanup(cancel)
127+
m := &mockAPIService{}
128+
m.generateBlocksWithHeights(ctx, t)
129+
130+
require.NoError(t, m.Start())
131+
132+
client := newTestClient(t, "localhost", "50051")
133+
134+
fetcher, err := NewBlockFetcher(client)
135+
require.NoError(t, err)
136+
// subscribe to block event to get blocks
137+
newBlockChan, err := fetcher.SubscribeNewBlockEvent(ctx)
138+
require.NoError(t, err)
139+
140+
select {
141+
case newBlockFromChan := <-newBlockChan:
142+
h := newBlockFromChan.Header.Height
143+
_, err := fetcher.GetSignedBlock(ctx, h)
144+
require.NoError(t, err)
145+
case <-ctx.Done():
146+
require.NoError(t, ctx.Err())
147+
}
148+
149+
require.NoError(t, m.Stop())
150+
151+
// stopping the server sends an error with the status code
152+
// to client, so the subscription loop will be finished.
153+
// check that newBlockChan was closed
154+
_, ok := <-newBlockChan
155+
require.False(t, ok)
156+
157+
// start server and try to get a new subscription
158+
require.NoError(t, m.Start())
159+
newBlockChan, err = fetcher.SubscribeNewBlockEvent(ctx)
160+
require.NoError(t, err)
161+
select {
162+
case newBlockFromChan := <-newBlockChan:
163+
h := newBlockFromChan.Header.Height
164+
_, err := fetcher.GetSignedBlock(ctx, h)
165+
require.NoError(t, err)
166+
case <-ctx.Done():
167+
require.NoError(t, ctx.Err())
168+
}
169+
require.NoError(t, m.Stop())
170+
require.NoError(t, m.fetcher.Stop(ctx))
171+
require.NoError(t, fetcher.Stop(ctx))
172+
}

0 commit comments

Comments
 (0)