Skip to content

Commit 5cea668

Browse files
committed
flamenco, runtime: add writer tile
1 parent 56a71de commit 5cea668

22 files changed

+955
-122
lines changed

src/app/firedancer-dev/main.c

+2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ extern fd_topo_run_tile_t fd_tile_repair;
8080
extern fd_topo_run_tile_t fd_tile_storei;
8181
extern fd_topo_run_tile_t fd_tile_replay;
8282
extern fd_topo_run_tile_t fd_tile_execor;
83+
extern fd_topo_run_tile_t fd_tile_writer;
8384
extern fd_topo_run_tile_t fd_tile_batch;
8485
extern fd_topo_run_tile_t fd_tile_pohi;
8586
extern fd_topo_run_tile_t fd_tile_sender;
@@ -110,6 +111,7 @@ fd_topo_run_tile_t * TILES[] = {
110111
&fd_tile_storei,
111112
&fd_tile_replay,
112113
&fd_tile_execor,
114+
&fd_tile_writer,
113115
&fd_tile_batch,
114116
&fd_tile_pohi,
115117
&fd_tile_sender,

src/app/firedancer/config/default.toml

+17-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,23 @@
22
bank_tile_count = 1
33

44
# TODO: add docs on exec_tile_cnt / exec tile
5-
exec_tile_count = 2
5+
exec_tile_count = 4
6+
7+
# How many writer tiles to run. Writer tiles are responsible for
8+
# writing account changes made by the exec tiles back to the
9+
# accounts DB.
10+
#
11+
# Since the accounts DB is designed to be highly concurrent, most of
12+
# the time account writeback can be done in parallel without
13+
# blocking. Multiple writer tiles exploit this parallelism supported
14+
# by the accounts DB.
15+
#
16+
# If sufficient cores are available, it is recommended to set this to
17+
# the same as the number of exec tiles. However, there's no reason
18+
# for this to exceed the number of exec tiles, because the number
19+
# of in-flight transactions between exec tiles and writer tiles is
20+
# bounded by the number of exec tiles.
21+
writer_tile_count = 4
622

723
[gossip]
824
port = 8700

src/app/firedancer/main.c

+2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern fd_topo_run_tile_t fd_tile_repair;
7171
extern fd_topo_run_tile_t fd_tile_storei;
7272
extern fd_topo_run_tile_t fd_tile_replay;
7373
extern fd_topo_run_tile_t fd_tile_execor;
74+
extern fd_topo_run_tile_t fd_tile_writer;
7475
extern fd_topo_run_tile_t fd_tile_batch;
7576
extern fd_topo_run_tile_t fd_tile_pohi;
7677
extern fd_topo_run_tile_t fd_tile_sender;
@@ -98,6 +99,7 @@ fd_topo_run_tile_t * TILES[] = {
9899
&fd_tile_storei,
99100
&fd_tile_replay,
100101
&fd_tile_execor,
102+
&fd_tile_writer,
101103
&fd_tile_batch,
102104
&fd_tile_pohi,
103105
&fd_tile_sender,

src/app/firedancer/topology.c

+52-13
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ fd_topo_initialize( config_t * config ) {
203203
ulong verify_tile_cnt = config->layout.verify_tile_count;
204204
ulong bank_tile_cnt = config->layout.bank_tile_count;
205205
ulong exec_tile_cnt = config->layout.exec_tile_count;
206+
ulong writer_tile_cnt = config->layout.writer_tile_count;
206207

207208
int enable_rpc = ( config->rpc.port != 0 );
208209

@@ -236,6 +237,8 @@ fd_topo_initialize( config_t * config ) {
236237
fd_topob_wksp( topo, "sign_gossip" );
237238

238239
fd_topob_wksp( topo, "replay_exec" );
240+
fd_topob_wksp( topo, "replay_wtr" );
241+
fd_topob_wksp( topo, "exec_writer" );
239242

240243
fd_topob_wksp( topo, "voter_sign" );
241244
fd_topob_wksp( topo, "sign_voter" );
@@ -282,6 +285,7 @@ fd_topo_initialize( config_t * config ) {
282285
fd_topob_wksp( topo, "replay" );
283286
fd_topob_wksp( topo, "runtime_pub" );
284287
fd_topob_wksp( topo, "exec" );
288+
fd_topob_wksp( topo, "writer" );
285289
fd_topob_wksp( topo, "bhole" );
286290
fd_topob_wksp( topo, "bstore" );
287291
fd_topob_wksp( topo, "tcache" );
@@ -294,6 +298,7 @@ fd_topo_initialize( config_t * config ) {
294298
fd_topob_wksp( topo, "restart" );
295299
fd_topob_wksp( topo, "exec_spad" );
296300
fd_topob_wksp( topo, "exec_fseq" );
301+
fd_topob_wksp( topo, "writer_fseq" );
297302

298303
if( enable_rpc ) fd_topob_wksp( topo, "rpcsrv" );
299304

@@ -319,7 +324,20 @@ fd_topo_initialize( config_t * config ) {
319324
/**/ fd_topob_link( topo, "sign_gossip", "sign_gossip", 128UL, 64UL, 1UL );
320325
/* TODO: The MTU is currently relatively arbitrary and needs to be resized to the size of the largest
321326
message that is outbound from the replay to exec. */
322-
FOR(exec_tile_cnt) fd_topob_link( topo, "replay_exec", "replay_exec", 128UL, 10240UL, exec_tile_cnt );
327+
FOR(exec_tile_cnt) fd_topob_link( topo, "replay_exec", "replay_exec", 128UL, 10240UL, exec_tile_cnt );
328+
FOR(writer_tile_cnt) fd_topob_link( topo, "replay_wtr", "replay_wtr", 128UL, FD_REPLAY_WRITER_MTU, 1UL );
329+
/* Assuming the number of writer tiles is sufficient to keep up with
330+
the number of exec tiles, under equilibrium, we should have at least
331+
enough link space to buffer worst case input shuffling done by the
332+
stem. That is, when a link is so unlucky, that the stem RNG decided
333+
to process every other link except this one, for all writer tiles.
334+
This would be fd_ulong_pow2_up( exec_tile_cnt*writer_tile_cnt+1UL ).
335+
336+
This is all assuming we have true pipelining between exec and writer
337+
tiles. Right now, we don't. So in reality there can be at most 1
338+
in-flight transaction per exec tile, and hence a depth of 1 is in
339+
theory sufficient for each exec_writer link. */
340+
FOR(exec_tile_cnt) fd_topob_link( topo, "exec_writer", "exec_writer", 128UL, FD_EXEC_WRITER_MTU, 1UL );
323341

324342
/**/ fd_topob_link( topo, "gossip_verif", "gossip_verif", config->tiles.verify.receive_buffer_size, FD_TPU_MTU, 1UL );
325343
/**/ fd_topob_link( topo, "gossip_eqvoc", "gossip_eqvoc", 128UL, FD_TPU_MTU, 1UL );
@@ -412,6 +430,7 @@ fd_topo_initialize( config_t * config ) {
412430
413431
/**/ fd_topob_tile( topo, "replay", "replay", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
414432
FOR(exec_tile_cnt) fd_topob_tile( topo, "exec", "exec", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
433+
FOR(writer_tile_cnt) fd_topob_tile( topo, "writer", "writer", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
415434
/**/ fd_topob_tile( topo, "batch", "batch", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
416435
/* TODO: not launching the restart tile if in_wen_restart is false */
417436
//if( FD_UNLIKELY( config->tiles.restart.in_wen_restart ) ) {
@@ -425,7 +444,6 @@ fd_topo_initialize( config_t * config ) {
425444
fd_topo_tile_t * repair_tile = &topo->tiles[ fd_topo_find_tile( topo, "repair", 0UL ) ];
426445
fd_topo_tile_t * batch_tile = &topo->tiles[ fd_topo_find_tile( topo, "batch" , 0UL ) ];
427446
fd_topo_tile_t * pack_tile = &topo->tiles[ fd_topo_find_tile( topo, "pack" , 0UL ) ];
428-
fd_topo_tile_t * exec_tile = &topo->tiles[ fd_topo_find_tile( topo, "exec" , 0UL ) ];
429447

430448
/* Create a shared blockstore to be used by store and replay. */
431449
fd_topo_obj_t * blockstore_obj = setup_topo_blockstore( topo,
@@ -448,8 +466,9 @@ fd_topo_initialize( config_t * config ) {
448466
fd_topo_obj_t * runtime_pub_obj = setup_topo_runtime_pub( topo, "runtime_pub" );
449467
fd_topob_tile_uses( topo, replay_tile, runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
450468
fd_topob_tile_uses( topo, batch_tile, runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
451-
fd_topob_tile_uses( topo, pack_tile, runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
452-
fd_topob_tile_uses( topo, exec_tile, runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
469+
fd_topob_tile_uses( topo, pack_tile, runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
470+
FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
471+
FOR(writer_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "writer", i ) ], runtime_pub_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
453472
FD_TEST( fd_pod_insertf_ulong( topo->props, runtime_pub_obj->id, "runtime_pub" ) );
454473

455474
/* Create a txncache to be used by replay. */
@@ -470,15 +489,26 @@ fd_topo_initialize( config_t * config ) {
470489
for( ulong i=0UL; i<exec_tile_cnt; i++ ) {
471490
fd_topo_obj_t * exec_spad_obj = fd_topob_obj( topo, "exec_spad", "exec_spad" );
472491
fd_topob_tile_uses( topo, replay_tile, exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
473-
fd_topob_tile_uses( topo, exec_tile, exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
492+
fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
493+
for( ulong j=0UL; j<writer_tile_cnt; j++ ) {
494+
/* For txn_ctx. */
495+
fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "writer", j ) ], exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
496+
}
474497
FD_TEST( fd_pod_insertf_ulong( topo->props, exec_spad_obj->id, "exec_spad.%lu", i ) );
475498
}
476499

477500
for( ulong i=0UL; i<exec_tile_cnt; i++ ) {
478-
fd_topo_obj_t * exec_spad_obj = fd_topob_obj( topo, "fseq", "exec_fseq" );
479-
fd_topob_tile_uses( topo, exec_tile, exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
480-
fd_topob_tile_uses( topo, replay_tile, exec_spad_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
481-
FD_TEST( fd_pod_insertf_ulong( topo->props, exec_spad_obj->id, "exec_fseq.%lu", i ) );
501+
fd_topo_obj_t * exec_fseq_obj = fd_topob_obj( topo, "fseq", "exec_fseq" );
502+
fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], exec_fseq_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
503+
fd_topob_tile_uses( topo, replay_tile, exec_fseq_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
504+
FD_TEST( fd_pod_insertf_ulong( topo->props, exec_fseq_obj->id, "exec_fseq.%lu", i ) );
505+
}
506+
507+
for( ulong i=0UL; i<writer_tile_cnt; i++ ) {
508+
fd_topo_obj_t * writer_fseq_obj = fd_topob_obj( topo, "fseq", "writer_fseq" );
509+
fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "writer", i ) ], writer_fseq_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
510+
fd_topob_tile_uses( topo, replay_tile, writer_fseq_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
511+
FD_TEST( fd_pod_insertf_ulong( topo->props, writer_fseq_obj->id, "writer_fseq.%lu", i ) );
482512
}
483513

484514
/* There's another special fseq that's used to communicate the shred
@@ -607,10 +637,16 @@ fd_topo_initialize( config_t * config ) {
607637
/**/ fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "batch_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
608638
/**/ fd_topob_tile_out( topo, "replay", 0UL, "replay_voter", 0UL );
609639
FOR(bank_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_poh", i );
610-
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_exec", i ); /* TODO check order in fd_replay.c macros*/
611-
640+
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_exec", i ); /* TODO check order in fd_replay.c macros*/
641+
FOR(writer_tile_cnt) fd_topob_tile_out( topo, "replay", 0UL, "replay_wtr", i );
612642

613-
FOR(exec_tile_cnt) fd_topob_tile_in( topo, "exec", i, "metric_in", "replay_exec", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
643+
FOR(exec_tile_cnt) fd_topob_tile_in( topo, "exec", i, "metric_in", "replay_exec", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
644+
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "exec", i, "exec_writer", i );
645+
/* All writer tiles read from all exec tiles. Each exec tile has a
646+
single out link, over which all the writer tiles round-robin. */
647+
FOR(writer_tile_cnt) for( ulong j=0UL; j<exec_tile_cnt; j++ )
648+
fd_topob_tile_in( topo, "writer", i, "metric_in", "exec_writer", j, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
649+
FOR(writer_tile_cnt) fd_topob_tile_in( topo, "writer", i, "metric_in", "replay_wtr", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
614650
615651
/**/ fd_topob_tile_in( topo, "sender", 0UL, "metric_in", "stake_out", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
616652
/**/ fd_topob_tile_in( topo, "sender", 0UL, "metric_in", "gossip_voter", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
@@ -816,7 +852,8 @@ fd_topo_initialize( config_t * config ) {
816852
strncpy( tile->replay.status_cache, config->tiles.replay.status_cache, sizeof(tile->replay.status_cache) );
817853
strncpy( tile->replay.cluster_version, config->tiles.replay.cluster_version, sizeof(tile->replay.cluster_version) );
818854
tile->replay.bank_tile_count = config->layout.bank_tile_count;
819-
tile->replay.exec_tile_count = config->layout.exec_tile_count;
855+
tile->replay.exec_tile_count = config->layout.exec_tile_count;
856+
tile->replay.writer_tile_cuont = config->layout.writer_tile_count;
820857
strncpy( tile->replay.tower_checkpt, config->tiles.replay.tower_checkpt, sizeof(tile->replay.tower_checkpt) );
821858

822859
/* not specified by [tiles.replay] */
@@ -885,6 +922,8 @@ fd_topo_initialize( config_t * config ) {
885922

886923
} else if( FD_UNLIKELY( !strcmp( tile->name, "exec" ) ) ) {
887924
strncpy( tile->exec.funk_file, config->tiles.replay.funk_file, sizeof(tile->exec.funk_file) );
925+
} else if( FD_UNLIKELY( !strcmp( tile->name, "writer" ) ) ) {
926+
strncpy( tile->writer.funk_file, config->tiles.replay.funk_file, sizeof(tile->writer.funk_file) );
888927
} else if( FD_UNLIKELY( !strcmp( tile->name, "rstart" ) ) ) {
889928
tile->restart.in_wen_restart = config->tiles.restart.in_wen_restart;
890929
strncpy( tile->restart.funk_file, config->tiles.replay.funk_file, sizeof(tile->replay.funk_file) );

src/app/shared/fd_config.h

+1
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ struct fd_config {
230230
uint bank_tile_count;
231231
uint shred_tile_count;
232232
uint exec_tile_count; /* TODO: redundant ish with bank tile cnt */
233+
uint writer_tile_count;
233234
} layout;
234235

235236
struct {

src/app/shared/fd_config_parse.c

+9
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ fdctl_pod_to_cfg( config_t * config,
380380
/* Firedancer-only configuration */
381381

382382
CFG_POP ( uint, layout.exec_tile_count );
383+
CFG_POP ( uint, layout.writer_tile_count );
383384

384385
CFG_POP ( ulong, blockstore.shred_max );
385386
CFG_POP ( ulong, blockstore.block_max );
@@ -481,6 +482,14 @@ fdctl_cfg_validate( config_t * cfg ) {
481482
CFG_HAS_NON_ZERO ( layout.bank_tile_count );
482483
CFG_HAS_NON_ZERO ( layout.shred_tile_count );
483484

485+
if( 0U!=cfg->layout.writer_tile_count ) {
486+
if( FD_UNLIKELY( cfg->layout.writer_tile_count>cfg->layout.exec_tile_count ) ) {
487+
/* There can be at most 1 in-flight transaction per exec tile
488+
awaiting finalization. */
489+
FD_LOG_ERR(( "More writer tiles (%u) than exec tiles (%u)", cfg->layout.writer_tile_count, cfg->layout.exec_tile_count ));
490+
}
491+
}
492+
484493
CFG_HAS_NON_EMPTY( hugetlbfs.mount_path );
485494
CFG_HAS_NON_EMPTY( hugetlbfs.max_page_size );
486495

src/disco/fd_disco_base.h

+5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@
5858
#define FD_SHRED_REPAIR_MTU (FD_SHRED_DATA_HEADER_SZ + FD_SHRED_MERKLE_ROOT_SZ)
5959
FD_STATIC_ASSERT( FD_SHRED_REPAIR_MTU == 120 , update FD_SHRED_REPAIR_MTU );
6060

61+
/* Maximum size of frags going into the writer tile. */
62+
#define FD_REPLAY_WRITER_MTU (128UL)
63+
#define FD_EXEC_WRITER_MTU (128UL)
64+
65+
6166
#define FD_NETMUX_SIG_MIN_HDR_SZ ( 42UL) /* The default header size, which means no vlan tags and no IP options. */
6267
#define FD_NETMUX_SIG_IGNORE_HDR_SZ (102UL) /* Outside the allowable range, but still fits in 4 bits when compressed */
6368

src/disco/topo/fd_topo.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -284,22 +284,23 @@ typedef struct {
284284
char tower_checkpt[ PATH_MAX ];
285285
int plugins_enabled;
286286

287-
/* not specified in TOML */
288-
289-
int incremental_src_type;
290-
int snapshot_src_type;
291-
292287
char identity_key_path[ PATH_MAX ];
293288
uint ip_addr;
294289
int vote;
295290
char vote_account_path[ PATH_MAX ];
296291
ulong bank_tile_count;
297292
ulong exec_tile_count;
293+
ulong writer_tile_cuont;
298294
ulong full_interval;
299295
ulong incremental_interval;
300296

301297
char blockstore_file[ PATH_MAX ];
302298
char blockstore_checkpt[ PATH_MAX ];
299+
300+
/* not specified in TOML */
301+
302+
int incremental_src_type;
303+
int snapshot_src_type;
303304
} replay;
304305

305306
struct {
@@ -316,6 +317,10 @@ typedef struct {
316317
char funk_file[ PATH_MAX ];
317318
} exec;
318319

320+
struct {
321+
char funk_file[ PATH_MAX ];
322+
} writer;
323+
319324
struct {
320325
ushort send_to_port;
321326
uint send_to_ip_addr;

src/disco/topo/fd_topob.c

+1
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ fd_topob_auto_layout( fd_topo_t * topo ) {
367367
"repair", /* FIREDANCER only */
368368
"replay", /* FIREDANCER only */
369369
"exec", /* FIREDANCER only */
370+
"writer", /* FIREDANCER only */
370371
"sender", /* FIREDANCER only */
371372
"eqvoc", /* FIREDANCER only */
372373
"rpcsrv", /* FIREDANCER only */

0 commit comments

Comments
 (0)