Skip to content

Commit 0cc5b08

Browse files
authored
Merge pull request #5779 from IntersectMBO/bench-master
workbench: UTxO scaling + LMDB benchmarks, improved Nomad cluster handling
2 parents 3f5181c + 6c619b1 commit 0cc5b08

File tree

10 files changed

+267
-50
lines changed

10 files changed

+267
-50
lines changed

Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ ps: ## Plain-text list of profiles
7575
## Profile-based cluster shells (autogenerated targets)
7676
##
7777
PROFILES_BASE := default default-p2p plutus plutus-secp-ecdsa plutus-secp-schnorr oldtracing idle tracer-only
78-
PROFILES_FAST := fast fast-p2p fast-plutus fast-notracer fast-oldtracing
78+
PROFILES_FAST := fast fast-p2p fast-plutus fast-notracer fast-oldtracing faststartup-24M
7979
PROFILES_CI_TEST := ci-test ci-test-p2p ci-test-plutus ci-test-notracer ci-test-rtview ci-test-dense10
80-
PROFILES_CI_BENCH := ci-bench ci-bench-p2p ci-bench-plutus ci-bench-plutus-secp-ecdsa ci-bench-plutus-secp-schnorr ci-bench-notracer ci-bench-rtview
80+
PROFILES_CI_BENCH := ci-bench ci-bench-p2p ci-bench-plutus ci-bench-plutus-secp-ecdsa ci-bench-plutus-secp-schnorr ci-bench-notracer ci-bench-rtview ci-bench-lmdb
8181
PROFILES_TRACE_BENCH := trace-bench trace-bench-notracer trace-bench-oldtracing trace-bench-rtview
8282
PROFILES_TRACE_FULL := trace-full trace-full-rtview
8383
PROFILES_EPOCHTRANS := epoch-transition
@@ -100,7 +100,9 @@ PROFILES_NOMAD_PERF := default-nomadperf ci-test-nomadperf ci-bench-nomadp
100100
PROFILES_NOMAD_PERF += plutus-nomadperf fast-nomadperf latency-nomadperf
101101
PROFILES_NOMAD_PERF_NOP2P := default-nomadperf-nop2p oldtracing-nomadperf-nop2p ci-test-nomadperf-nop2p ci-bench-nomadperf-nop2p
102102
PROFILES_NOMAD_PERF_NOP2P += value-nomadperf-nop2p value-oldtracing-nomadperf-nop2p plutus-nomadperf-nop2p fast-nomadperf-nop2p
103-
PROFILES_NOMAD_PERFSSD := fast-nomadperfssd
103+
PROFILES_NOMAD_PERFSSD := value-nomadperfssd fast-nomadperfssd latency-nomadperfssd
104+
# single node profiles on the NomadSSD cluster on AWS
105+
PROFILES_UTXOSCALE_SOLO := utxoscale-solo-24M64G-nomadperfssd utxoscale-solo-12M64G-nomadperfssd utxoscale-solo-12M16G-nomadperfssd
104106

105107
LOCAL_PROFILES += $(PROFILES_BASE)
106108
LOCAL_PROFILES += $(PROFILES_FAST)
@@ -120,6 +122,7 @@ LOCAL_PROFILES += $(PROFILES_VENDOR)
120122
CLOUD_PROFILES += $(PROFILES_NOMAD_PERF)
121123
CLOUD_PROFILES += $(PROFILES_NOMAD_PERF_NOP2P)
122124
CLOUD_PROFILES += $(PROFILES_NOMAD_PERFSSD)
125+
CLOUD_PROFILES += $(PROFILES_UTXOSCALE_SOLO)
123126

124127

125128
## Note: to enable a shell for a profile, just add its name (one of names from 'make ps') to SHELL_PROFILES

nix/nixos/cardano-node-service.nix

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ let
128128
];
129129
};
130130
instanceDbPath = cfg.databasePath i;
131+
utxoLmdbParams = ["--v1-lmdb-ledger-db-backend"]
132+
++ lib.optionals (cfg.lmdbDatabasePath i != null)
133+
[ "--ssd-database-dir ${cfg.lmdbDatabasePath i}"
134+
# "--ssd-snapshot-tables"
135+
];
131136
cmd = builtins.filter (x: x != "") [
132137
"${cfg.executable} run"
133138
"--config ${nodeConfigFile}"
@@ -143,7 +148,8 @@ let
143148
"--tracer-socket-path-accept ${cfg.tracerSocketPathAccept i}"
144149
] ++ lib.optionals (cfg.tracerSocketPathConnect i != null) [
145150
"--tracer-socket-path-connect ${cfg.tracerSocketPathConnect i}"
146-
] ++ consensusParams.${cfg.nodeConfig.Protocol} ++ cfg.extraArgs ++ cfg.rtsArgs;
151+
] ++ lib.optionals (cfg.withUtxoHdLmdb i) utxoLmdbParams
152+
++ consensusParams.${cfg.nodeConfig.Protocol} ++ cfg.extraArgs ++ cfg.rtsArgs;
147153
in ''
148154
echo "Starting: ${concatStringsSep "\"\n echo \"" cmd}"
149155
echo "..or, once again, in a single line:"
@@ -350,6 +356,16 @@ in {
350356
description = ''Node database path, for each instance.'';
351357
};
352358

359+
lmdbDatabasePath = mkOption {
360+
type = funcToOr nullOrStr;
361+
default = null;
362+
apply = x : if builtins.isFunction x then x else if x == null then _: null else _: x;
363+
description = ''
364+
Node UTxO-HD LMDB path for performant disk I/O, for each instance.
365+
This could point to a direct-access SSD, with a specifically created journal-less file system and optimized mount options.
366+
'';
367+
};
368+
353369
socketPath = mkOption {
354370
type = funcToOr types.str;
355371
default = i : "${runtimeDir i}/node.socket";
@@ -648,6 +664,13 @@ in {
648664
default = false;
649665
};
650666

667+
withUtxoHdLmdb = mkOption {
668+
type = funcToOr types.bool;
669+
default = false;
670+
apply = x: if builtins.isFunction x then x else _: x;
671+
description = ''On an UTxO-HD enabled node, the in-memory backend is the default. This activates the on-disk backend (LMDB) instead.'';
672+
};
673+
651674
extraArgs = mkOption {
652675
type = types.listOf types.str;
653676
default = [];
@@ -692,6 +715,7 @@ in {
692715
config = mkIf cfg.enable ( let
693716
stateDirBase = "/var/lib/";
694717
runDirBase = "/run/";
718+
lmdbPaths = filter (x: x != null) (map (e: cfg.lmdbDatabasePath e) (builtins.genList lib.trivial.id cfg.instances));
695719
genInstanceConf = f: listToAttrs (if cfg.instances > 1
696720
then genList (i: let n = "cardano-node-${toString i}"; in nameValuePair n (f n i)) cfg.instances
697721
else [ (nameValuePair "cardano-node" (f "cardano-node" 0)) ]); in lib.mkMerge [
@@ -793,6 +817,10 @@ in {
793817
assertion = !(cfg.systemdSocketActivation && cfg.useNewTopology);
794818
message = "Systemd socket activation cannot be used with p2p topology due to a systemd socket re-use issue.";
795819
}
820+
{
821+
assertion = (length lmdbPaths) == (length (lib.lists.unique lmdbPaths));
822+
message = "When configuring multiple LMDB enabled nodes on one instance, lmdbDatabasePath must be unique.";
823+
}
796824
];
797825
}
798826
]);

nix/workbench/backend/nomad-job.nix

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -476,14 +476,28 @@ let
476476
}
477477
];
478478
};
479-
480-
# The Consul namespace in which group and task-level services within the
481-
# group will be registered. Use of template to access Consul KV will read
482-
# from the specified Consul namespace. Specifying namespace takes
483-
# precedence over the -consul-namespace command line argument in job run.
484-
# namespace = "";
485-
# Not available as the documentations says: Extraneous JSON object property; No argument or block type is named "namespace".
486-
479+
}
480+
//
481+
# If it needs host volumes add the constraints (can't be "null" or "[]".)
482+
### - https://developer.hashicorp.com/nomad/tutorials/stateful-workloads/stateful-workloads-host-volumes
483+
(lib.optionalAttrs (profileData.value.cluster.nomad.host_volumes != null) {
484+
volume = lib.listToAttrs (lib.lists.imap0
485+
(i: v: {
486+
# Internal name, reference to mount in this group's tasks below.
487+
name = "volume-${taskName}-${toString i}";
488+
value = {
489+
type = "host"; # We only support type "host".
490+
read_only = v.read_only;
491+
# How it is named in the Nomad Client's config.
492+
# https://developer.hashicorp.com/nomad/docs/configuration/client#host_volume-block
493+
source = v.source;
494+
};
495+
})
496+
profileData.value.cluster.nomad.host_volumes
497+
);
498+
})
499+
//
500+
{
487501
# The task stanza creates an individual unit of work, such as a Docker
488502
# container, web application, or batch processing.
489503
# https://developer.hashicorp.com/nomad/docs/job-specification/task
@@ -557,12 +571,12 @@ let
557571
# address of an AWS EC2 instance set this to
558572
# ${attr.unique.platform.aws.public-ipv4}.
559573
address =
560-
# When using the dedicated P&T Nomad cluster on AWS we use public
561-
# IPs/routing, all the other cloud runs are behind a VPC/firewall.
562-
# Local runs just use 12.0.0.1.
563-
if lib.strings.hasInfix "-nomadperf" profileData.profileName
574+
# When using dedicated Nomad clusters on AWS we want to use public
575+
# IPs/routing, all the other cloud runs will run behind a
576+
# VPC/firewall.
577+
if profileData.value.cluster.aws.use_public_routing
564578
then "\${attr.unique.platform.aws.public-ipv4}"
565-
else ""
579+
else "" # Local runs just use 127.0.0.1.
566580
;
567581
# Specifies the port to advertise for this service. The value of
568582
# port depends on which address_mode is being used:
@@ -591,6 +605,20 @@ let
591605
check = null;
592606
};
593607

608+
# If it needs host volumes mount them (defined above if any).
609+
volume_mount = if profileData.value.cluster.nomad.host_volumes != null
610+
then lib.lists.imap0
611+
(i: v: {
612+
# Internal name, defined above in the group's specification.
613+
volume = "volume-${taskName}-${toString i}";
614+
# Where it is going to be mounted inside the Task.
615+
destination = v.destination;
616+
read_only = v.read_only;
617+
})
618+
profileData.value.cluster.nomad.host_volumes
619+
else null
620+
;
621+
594622
# Specifies the set of templates to render for the task. Templates can
595623
# be used to inject both static and dynamic configuration with data
596624
# populated from environment variables, Consul and Vault.
@@ -1363,7 +1391,7 @@ let
13631391
[
13641392
# Address string to
13651393
(
1366-
if lib.strings.hasInfix "-nomadperf" profileData.profileName
1394+
if profileData.value.cluster.aws.use_public_routing
13671395
then ''--host-addr {{ env "attr.unique.platform.aws.local-ipv4" }}''
13681396
else ''--host-addr 0.0.0.0''
13691397
)

nix/workbench/backend/nomad.sh

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2323,12 +2323,23 @@ backend_nomad() {
23232323
# If the node in "${generator_task}" quits generators fails with:
23242324
# tx-generator: MuxError MuxBearerClosed "<socket: 12> closed when reading data, waiting on next header True"
23252325
# Service binary 'tx-generator' returned status: 1
2326+
msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code!")"
2327+
# Give the node where tx-generator runs some time to quit.
2328+
msg "$(yellow " Waiting 60s to check the status of supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\")")"
2329+
sleep 30
23262330
if backend_nomad is-task-program-running "${dir}" "${generator_task}" "${generator_task}" 5
23272331
then
23282332
# This was not expected!
23292333
# But check it wasn't a race condition of a stopping cluster!
23302334
if ! test -f "${dir}"/flag/cluster-stopping
23312335
then
2336+
msg "$(red "ERROR: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code while supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\") is still running!")"
2337+
# The tx-generator can fail because something happened with
2338+
# the nodes (out of memory?), this gives the nodes more time
2339+
# to shutdown properly and/or show any possible cause of
2340+
# trouble before being killed.
2341+
msg "$(yellow "WARNING: Waiting one minute so nodes are not killed immediately")"
2342+
sleep 60
23322343
touch "${dir}"/flag/cluster-stopping
23332344
fatal "Generator quit unexpectedly!!!"
23342345
fi
@@ -2337,14 +2348,14 @@ backend_nomad() {
23372348
touch "${dir}"/generator/quit
23382349
# Show the warning and continue with the counter
23392350
echo -ne "\n"
2340-
msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\" quit with an error exit code")"
2351+
msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code but expected when supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\") is not running")"
23412352
msg_ne "nomad: $(blue Waiting) until all pool nodes are stopped: 000000"
23422353
fi
23432354
else
23442355
touch "${dir}"/generator/quit
23452356
# Show the warning and continue with the counter
23462357
echo -ne "\n"
2347-
msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\" quit with a non-error exit code")"
2358+
msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with a non-error exit code")"
23482359
msg_ne "nomad: $(blue Waiting) until all pool nodes are stopped: 000000"
23492360
fi
23502361
fi # Finish generator checks.
@@ -3855,7 +3866,7 @@ client {
38553866
38563867
# Specifies an arbitrary string used to logically group client nodes by
38573868
# user-defined class. This can be used during job placement as a filter.
3858-
node_class = "perf" # Using the "world.dev.cardano.org" testing class for "perf".
3869+
node_class = "" # Make sure we are not using namespaces locally.
38593870
38603871
# "artifact" parameters (fail fast!!!)
38613872
######################################

nix/workbench/backend/supervisor-conf.nix

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -120,24 +120,25 @@ let
120120
startretries = 0;
121121
# Seconds it needs to stay running to consider the start successful
122122
# In cases with a big genesis file, like the "value" profile with ~600
123-
# mega, if this file has an error the node can fail after the 5 seconds
124-
# we use as default for the other programs and the error will be catched
125-
# later by the healthcheck service with a misleading message.
126-
# We found with our reference machines (c5.2xlarge, 16 MB and 8 cores),
127-
# when running the "value" profile, that with 50 seconds at least one
128-
# node was assummed successful (its socket was created). So to the
129-
# default 5 we add 45 seconds when the UTxO size is the one of the
130-
# "value" profile and seconds proportionaly to this for the others.
131-
### derived.utxo_generated
132-
### - fast: 18000 (Default of 5s is OK)
133-
### - ci-test: 18000 (Default of 5s is OK)
134-
### - default: 43200 (Default of 5s is OK)
135-
### - plutus: 61200 (Default of 5s is OK)
136-
### - forge-stress-pre: 72000
137-
### - forge-stress-large: 144000
138-
### - value: 1536000 (30s more needed)
139-
### - chainsync-early-alonzo: 31104000
140-
startsecs = 5 + (profileData.derived.utxo_generated / (1536000 / 50));
123+
# mega, if this file has a format error the node can fail after the 5
124+
# seconds we use as default for the other "program"s and the error will
125+
# be caught later by the healthcheck service with a misleading message.
126+
# We found with our AWS reference machines (c5.2xlarge, 16 MB and 8
127+
# cores), when running the "value" profile, that with 50 seconds at
128+
# least one node was assumed successful (its socket was created). So to
129+
# the default 5 we add 50 seconds when the UTxO set size is the one of
130+
# the "value" profile and seconds proportionally to this for the others.
131+
# Not directly related to "genesis.extra_future_offset" or
132+
# "derived.genesis_future_offset".
133+
### derived.dataset_measure
134+
### - fast: 0 (Default of 5s is OK)
135+
### - ci-test: 0 (Default of 5s is OK)
136+
### - default: 0 (Default of 5s is OK)
137+
### - plutus: 0 (Default of 5s is OK)
138+
### - forge-stress-pre: 5000000
139+
### - forge-stress-large: 11300000
140+
### - value: 5000000 (50s more needed)
141+
startsecs = 5 + (profileData.derived.dataset_measure / (5000000 / 50));
141142
})
142143
nodeSpecs))
143144
//

nix/workbench/nomad.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ EOL
429429
local nomad_class
430430
nomad_class="$(jq -r .cluster.nomad.class "${WB_SHELL_PROFILE_DATA}"/profile.json)"
431431
local perf_nodes
432-
perf_nodes="$(nomad node status -filter 'Status=="ready"' -filter "NodeClass==\"${nomad_class}\"" -json)"
432+
perf_nodes="$(nomad node status -filter "Status==\"ready\" and NodeClass==\"${nomad_class}\"" -json)"
433433
# Create the base JSON string but without the "attributes" because those
434434
# are only available when fetching the status of individual nodes.
435435
local nodes_json

nix/workbench/profile/prof0-defaults.jq

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ def era_defaults($era):
6161

6262
, node:
6363
{ rts_flags_override: []
64+
, heap_limit: null ## optional: heap limit in MB (translates to RTS flag -M)
6465
, shutdown_on_slot_synced: null
6566
, shutdown_on_block_synced: null
6667
, tracing_backend: "trace-dispatcher" ## or "iohk-monitoring"
6768
, tracer: true
69+
, utxo_lmdb: false ## use LMDB backend (instead of default in-mem) on a UTxO-HD node; will be ignored by non-UTxO-HD nodes
6870
, verbatim:
6971
{
7072
}
@@ -96,20 +98,25 @@ def era_defaults($era):
9698
{ producer: {cores: 2, memory: 15000, memory_max: 16000}
9799
, explorer: {cores: 2, memory: 15000, memory_max: 16000}
98100
}
101+
# Volumes like {source: "ssd1", destination: "/ssd1", read_only: false}
102+
, host_volumes: null
99103
, fetch_logs_ssh: false
100104
}
101105
, aws:
102106
{ instance_type:
103107
{ producer: "c5.2xlarge"
104108
, explorer: "m5.4xlarge"
105109
}
110+
# "attr.unique.platform.aws.public-ipv4" to bind and service definition.
111+
, use_public_routing: false
106112
}
107113
, minimun_storage:
108114
{ producer: 12582912 # 12×1024×1024
109115
, explorer: 14155776 # 13.5×1024×1024
110116
}
111117
, keep_running: false
118+
, ssd_directory: null
112119
}
113-
114120
}
121+
115122
} | (.common * (.[$era] // {}));

0 commit comments

Comments
 (0)