Skip to content

Commit 0f88aa4

Browse files
wcatzclaude
andcommitted
revert(sync): disable keepalive — causes ChainSync stall in gouroboros
Enabling keepalive fixes the node-side ExceededTimeLimit disconnects but causes ChainSync to stall after ~2000 blocks due to a gouroboros muxer interaction between the keepalive and chainsync mini-protocols. This drops throughput from ~2500 blk/s to ~6 blk/s (300x slower). Revert to keepalive=false: the node kills us every ~97s but the retry loop handles reconnects at ~1800 blk/s sustained (~2h full sync). Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 5235e1c commit 0f88aa4

File tree

2 files changed

+8
-12
lines changed

2 files changed

+8
-12
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,6 @@ duckBot
2929
vrf.skey
3030
keys/
3131

32-
# OMP config (local overrides)
32+
# AI agent config (local only)
33+
.claude/
3334
.omp/settings.local.json

sync.go

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/blinklabs-io/gouroboros/ledger/shelley"
1919
"github.com/blinklabs-io/gouroboros/protocol/chainsync"
2020
pcommon "github.com/blinklabs-io/gouroboros/protocol/common"
21-
"github.com/blinklabs-io/gouroboros/protocol/keepalive"
2221
)
2322

2423
// Shelley intersect points: last Byron block per network.
@@ -87,20 +86,16 @@ func (s *ChainSyncer) Start(ctx context.Context) error {
8786
)
8887

8988
// Connect to node via NtN (required for TCP connections)
90-
// Keepalive required: cardano-node's muxer expects keepalive probes from
91-
// the client and kills the connection after ~97s of inactivity.
92-
// Period=30s sends probes often enough to satisfy the node.
93-
// Timeout=90s is generous for response time during heavy ChainSync.
94-
keepAliveCfg := keepalive.NewConfig(
95-
keepalive.WithPeriod(30*time.Second),
96-
keepalive.WithTimeout(90*time.Second),
97-
)
89+
// Keepalive DISABLED during historical sync: enabling keepalive causes
90+
// ChainSync to stall after ~2000 blocks (gouroboros muxer interaction),
91+
// dropping throughput from ~2500 blk/s to ~6 blk/s.
92+
// Without keepalive the node kills us every ~97s (ExceededTimeLimit),
93+
// but the retry loop handles reconnects seamlessly at ~1800 blk/s sustained.
9894
errChan := make(chan error, 1)
9995
conn, connErr := ouroboros.NewConnection(
10096
ouroboros.WithNetworkMagic(uint32(s.networkMagic)),
10197
ouroboros.WithNodeToNode(true),
102-
ouroboros.WithKeepAlive(true),
103-
ouroboros.WithKeepAliveConfig(keepAliveCfg),
98+
ouroboros.WithKeepAlive(false),
10499
ouroboros.WithChainSyncConfig(chainSyncCfg),
105100
ouroboros.WithErrorChan(errChan),
106101
)

0 commit comments

Comments
 (0)