Skip to content

Commit a3096c3

Browse files
authored
Merge pull request #5997 from IntersectMBO/mkarg/tracer-prometheus
cardano-tracer: OpenMetrics compliance for Prometheus; fix `forHuman` output in journald
2 parents ea98b81 + fbe6675 commit a3096c3

File tree

34 files changed

+545
-217
lines changed

34 files changed

+545
-217
lines changed

cardano-node/cardano-node.cabal

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ library
206206
, strict-sop-core
207207
, strict-stm
208208
, time
209-
, trace-dispatcher ^>= 2.6.0
210-
, trace-forward ^>= 2.2.6
209+
, trace-dispatcher ^>= 2.7.0
210+
, trace-forward ^>= 2.2.7
211211
, trace-resources ^>= 0.2.2
212212
, tracer-transformers
213213
, transformers

cardano-node/src/Cardano/Node/Tracing/Documentation.hs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ import Ouroboros.Network.TxSubmission.Inbound (TraceTxSubmissionInboun
9696
import Ouroboros.Network.TxSubmission.Outbound (TraceTxSubmissionOutbound)
9797

9898
import Control.Exception (SomeException)
99+
import Control.Monad (forM_)
99100
import Data.Aeson.Types (ToJSON)
100101
import Data.Proxy (Proxy (..))
101102
import qualified Data.Text.IO as T
@@ -110,6 +111,7 @@ data TraceDocumentationCmd
110111
= TraceDocumentationCmd
111112
{ tdcConfigFile :: FilePath
112113
, tdcOutput :: FilePath
114+
, tdMetricsHelp :: Maybe FilePath
113115
}
114116

115117
parseTraceDocumentationCmd :: Opt.Parser TraceDocumentationCmd
@@ -124,14 +126,20 @@ parseTraceDocumentationCmd =
124126
(TraceDocumentationCmd
125127
<$> Opt.strOption
126128
( Opt.long "config"
127-
<> Opt.metavar "NODE-CONFIGURATION"
129+
<> Opt.metavar "FILE"
128130
<> Opt.help "Configuration file for the cardano-node"
129131
)
130132
<*> Opt.strOption
131133
( Opt.long "output-file"
132134
<> Opt.metavar "FILE"
133-
<> Opt.help "Generated documentation output file"
135+
<> Opt.help "Generated documentation output file (Markdown)"
134136
)
137+
<*> Opt.optional (Opt.strOption
138+
( Opt.long "output-metric-help"
139+
<> Opt.metavar "FILE"
140+
<> Opt.help "Metrics helptext file for cardano-tracer (JSON)"
141+
)
142+
)
135143
Opt.<**> Opt.helper)
136144
$ mconcat [ Opt.progDesc "Generate the trace documentation" ]
137145
]
@@ -147,18 +155,19 @@ runTraceDocumentationCmd
147155
:: TraceDocumentationCmd
148156
-> IO ()
149157
runTraceDocumentationCmd TraceDocumentationCmd{..} = do
150-
docTracers tdcConfigFile tdcOutput
158+
docTracers tdcConfigFile tdcOutput tdMetricsHelp
151159

152160
-- Have to repeat the construction of the tracers here,
153161
-- as the tracers are behind old tracer interface after construction in mkDispatchTracers.
154162
-- Can be changed, when old tracers have gone
155163
docTracers ::
156164
FilePath
157165
-> FilePath
166+
-> Maybe FilePath
158167
-> IO ()
159-
docTracers configFileName outputFileName = do
168+
docTracers configFileName outputFileName mbMetricsHelpFilename = do
160169
(bl, trConfig) <- docTracersFirstPhase (Just configFileName)
161-
docTracersSecondPhase outputFileName trConfig bl
170+
docTracersSecondPhase outputFileName mbMetricsHelpFilename trConfig bl
162171

163172

164173
-- Have to repeat the construction of the tracers here,
@@ -761,12 +770,16 @@ docTracersFirstPhase condConfigFileName = do
761770

762771
docTracersSecondPhase ::
763772
FilePath
773+
-> Maybe FilePath
764774
-> TraceConfig
765775
-> DocTracer
766776
-> IO ()
767-
docTracersSecondPhase outputFileName trConfig bl = do
768-
content <- docuResultsToText bl trConfig
769-
handle <- openFile outputFileName WriteMode
770-
hSetEncoding handle utf8
771-
T.hPutStr handle content
772-
hClose handle
777+
docTracersSecondPhase outputFileName mbMetricsHelpFilename trConfig bl = do
778+
docuResultsToText bl trConfig
779+
>>= doWrite outputFileName
780+
forM_ mbMetricsHelpFilename $ \f ->
781+
doWrite f (docuResultsToMetricsHelptext bl)
782+
where
783+
doWrite outfile text =
784+
withFile outfile WriteMode $ \handle ->
785+
hSetEncoding handle utf8 >> T.hPutStr handle text

cardano-tracer/CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
# ChangeLog
22

3-
## 0.3 (September 20, 2024)
3+
## 0.3 (September 26, 2024)
44

55
* Abondon `snap` webserver in favour of `wai`/`warp` for Prometheus and EKG Monitoring.
66
* Add dynamic routing to EKG stores of all connected nodes.
77
* Derive URL compliant routes from connected node names (instead of plain node names).
88
* Remove the requirement of two distinct ports for the EKG backend (changing `hasEKG` config type).
9+
* Improved OpenMetrics compliance of Prometheus exposition; also addresses [issue#5140][i5140].
10+
* Prometheus help annotations can be provided via the new optional config value `metricsHelp`.
911
* For optional RTView component only: Disable SSL/https connections. Force `snap-server`
1012
dependency to build with `-flag -openssl`.
1113
* Add JSON responses when listing connected nodes for both Prometheus and EKG Monitoring.
14+
* Fix: actually send `forHuman` rendering output to journald when specified.
1215
* Add consistency check for redundant port values in the config.
1316

1417
## 0.2.4 (August 13, 2024)
@@ -48,3 +51,7 @@
4851
## 0.1.0
4952

5053
Initial version.
54+
55+
56+
57+
[i5140]: https://github.com/IntersectMBO/cardano-node/issues/5140

cardano-tracer/bench/cardano-tracer-bench.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ main = do
7575
, teReforwardTraceObjects = \_-> pure ()
7676
, teRegistry = handleRegistry
7777
, teStateDir = Nothing
78+
, teMetricsHelp = []
7879
}
7980

8081
tracerEnvRTView :: TracerEnvRTView
@@ -148,6 +149,7 @@ main = do
148149
, rotation = Nothing
149150
, verbosity = Nothing
150151
, metricsComp = Nothing
152+
, metricsHelp = Nothing
151153
, hasForwarding = Nothing
152154
, resourceFreq = Nothing
153155
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
{
2+
"Forge.DelegMapSize": "Delegation map size",
3+
"Forge.UtxoSize": "UTxO set size",
4+
"Mem.resident": "Kernel-reported RSS (resident set size)",
5+
"RTS.alloc": "RTS-reported bytes allocated",
6+
"RTS.gcHeapBytes": "RTS-reported heap bytes",
7+
"RTS.gcLiveBytes": "RTS-reported live bytes",
8+
"RTS.gcMajorNum": "Major GCs",
9+
"RTS.gcMinorNum": "Minor GCs",
10+
"RTS.gcticks": "RTS-reported CPU ticks spent on GC",
11+
"RTS.mutticks": "RTS-reported CPU ticks spent on mutator",
12+
"RTS.threads": "RTS green thread count",
13+
"Stat.cputicks": "Kernel-reported CPU ticks (1/100th of a second), since process start",
14+
"Stat.fsRd": "FS bytes read",
15+
"Stat.fsWr": "FS bytes written",
16+
"Stat.netRd": "IP packet bytes read",
17+
"Stat.netWr": "IP packet bytes written",
18+
"SuppressedMessages...": "",
19+
"aboutToLeadSlotLast": "",
20+
"adoptedOwnBlockSlotLast": "",
21+
"adoptionThreadDied": "",
22+
"blockContext": "",
23+
"blockFromFuture": "",
24+
"blockNum": "Number of blocks in this chain fragment.",
25+
"blockReplayProgress": "Progress in percent",
26+
"blockfetchclient.blockdelay": "",
27+
"blockfetchclient.blockdelay.cdfFive": "",
28+
"blockfetchclient.blockdelay.cdfOne": "",
29+
"blockfetchclient.blockdelay.cdfThree": "",
30+
"blockfetchclient.blocksize": "",
31+
"blockfetchclient.lateblocks": "",
32+
"blocksForged": "How many blocks did this node forge?",
33+
"cardano_build_info": "Cardano node build info",
34+
"cardano_version_major": "Cardano node version information",
35+
"cardano_version_minor": "Cardano node version information",
36+
"cardano_version_patch": "Cardano node version information",
37+
"connectedPeers": "Number of connected peers",
38+
"connectionManager.duplexConns": "",
39+
"connectionManager.fullDuplexConns": "",
40+
"connectionManager.inboundConns": "",
41+
"connectionManager.outboundConns": "",
42+
"connectionManager.unidirectionalConns": "",
43+
"couldNotForgeSlotLast": "",
44+
"currentKESPeriod": "",
45+
"density": "The actual number of blocks created over the maximum expected number of blocks that could be created over the span of the last @k@ blocks.",
46+
"epoch": "In which epoch is the tip of the current chain.",
47+
"forgedInvalidSlotLast": "",
48+
"forgedSlotLast": "",
49+
"forging_enabled": "Can this node forge blocks? (Is it provided with block forging credentials) 0 = no, 1 = yes",
50+
"haskell_compiler_major": "Cardano compiler version information",
51+
"haskell_compiler_minor": "Cardano compiler version information",
52+
"headersServed": "A counter triggered on any header event",
53+
"headersServed.falling": "A counter triggered only on header event with falling edge",
54+
"inboundGovernor.Cold": "",
55+
"inboundGovernor.Hot": "",
56+
"inboundGovernor.Idle": "",
57+
"inboundGovernor.Warm": "",
58+
"ledgerState": "",
59+
"ledgerView": "",
60+
"localInboundGovernor.cold": "",
61+
"localInboundGovernor.hot": "",
62+
"localInboundGovernor.idle": "",
63+
"localInboundGovernor.warm": "",
64+
"mempoolBytes": "Byte size of the mempool",
65+
"nodeCannotForge": "How many times was this node unable to forge [a block]?",
66+
"nodeIsLeader": "How many times was this node slot leader?",
67+
"nodeNotLeader": "",
68+
"notAdoptedSlotLast": "",
69+
"operationalCertificateExpiryKESPeriod": "",
70+
"operationalCertificateStartKESPeriod": "",
71+
"peerSelection.ActiveBigLedgerPeers": "Number of active big ledger peers",
72+
"peerSelection.ActiveBigLedgerPeersDemotions": "Number of active big ledger peers demotions",
73+
"peerSelection.ActiveBootstrapPeers": "Number of active bootstrap peers",
74+
"peerSelection.ActiveBootstrapPeersDemotions": "Number of active bootstrap peers demotions",
75+
"peerSelection.ActiveLocalRootPeers": "Number of active local root peers",
76+
"peerSelection.ActiveLocalRootPeersDemotions": "Number of active local root peers demotions",
77+
"peerSelection.ActiveNonRootPeers": "Number of active non root peers",
78+
"peerSelection.ActiveNonRootPeersDemotions": "Number of active non root peers demotions",
79+
"peerSelection.ActivePeers": "Number of active peers",
80+
"peerSelection.ActivePeersDemotions": "Number of active peers demotions",
81+
"peerSelection.Cold": "Number of cold peers",
82+
"peerSelection.ColdBigLedgerPeers": "Number of cold big ledger peers",
83+
"peerSelection.ColdBigLedgerPeersPromotions": "Number of cold big ledger peers promotions",
84+
"peerSelection.ColdBootstrapPeersPromotions": "Number of cold bootstrap peers promotions",
85+
"peerSelection.ColdNonRootPeersPromotions": "Number of cold non root peers promotions",
86+
"peerSelection.ColdPeersPromotions": "Number of cold peers promotions",
87+
"peerSelection.EstablishedBigLedgerPeers": "Number of established big ledger peers",
88+
"peerSelection.EstablishedBootstrapPeers": "Number of established bootstrap peers",
89+
"peerSelection.EstablishedLocalRootPeers": "Number of established local root peers",
90+
"peerSelection.EstablishedNonRootPeers": "Number of established non root peers",
91+
"peerSelection.EstablishedPeers": "Number of established peers",
92+
"peerSelection.Hot": "Number of hot peers",
93+
"peerSelection.HotBigLedgerPeers": "Number of hot big ledger peers",
94+
"peerSelection.KnownBigLedgerPeers": "Number of known big ledger peers",
95+
"peerSelection.KnownBootstrapPeers": "Number of known bootstrap peers",
96+
"peerSelection.KnownLocalRootPeers": "Number of known local root peers",
97+
"peerSelection.KnownNonRootPeers": "Number of known non root peers",
98+
"peerSelection.KnownPeers": "Number of known peers",
99+
"peerSelection.LocalRoots": "Numbers of warm & hot local roots",
100+
"peerSelection.RootPeers": "Number of root peers",
101+
"peerSelection.Warm": "Number of warm peers",
102+
"peerSelection.WarmBigLedgerPeers": "Number of warm big ledger peers",
103+
"peerSelection.WarmBigLedgerPeersDemotions": "Number of warm big ledger peers demotions",
104+
"peerSelection.WarmBigLedgerPeersPromotions": "Number of warm big ledger peers promotions",
105+
"peerSelection.WarmBootstrapPeersDemotions": "Number of warm bootstrap peers demotions",
106+
"peerSelection.WarmBootstrapPeersPromotions": "Number of warm bootstrap peers promotions",
107+
"peerSelection.WarmLocalRootPeersPromotions": "Number of warm local root peers promotions",
108+
"peerSelection.WarmNonRootPeersDemotions": "Number of warm non root peers demotions",
109+
"peerSelection.WarmNonRootPeersPromotions": "Number of warm non root peers promotions",
110+
"peerSelection.WarmPeersDemotions": "Number of warm peers demotions",
111+
"peerSelection.WarmPeersPromotions": "Number of warm peers promotions",
112+
"peerSelection.churn.DecreasedActiveBigLedgerPeers": "number of decreased active big ledger peers",
113+
"peerSelection.churn.DecreasedActivePeers": "number of decreased active peers",
114+
"peerSelection.churn.DecreasedEstablishedBigLedgerPeers": "number of decreased established big ledger peers",
115+
"peerSelection.churn.DecreasedEstablishedPeers": "number of decreased established peers",
116+
"peerSelection.churn.DecreasedKnownBigLedgerPeers": "number of decreased known big ledger peers",
117+
"peerSelection.churn.DecreasedKnownPeers": "number of decreased known peers",
118+
"peerSelection.churn.IncreasedActiveBigLedgerPeers": "number of increased active big ledger peers",
119+
"peerSelection.churn.IncreasedActivePeers": "number of increased active peers",
120+
"peerSelection.churn.IncreasedEstablishedBigLedgerPeers": "number of increased established big ledger peers",
121+
"peerSelection.churn.IncreasedEstablishedPeers": "number of increased established peers",
122+
"peerSelection.churn.IncreasedKnownBigLedgerPeers": "number of increased known big ledger peers",
123+
"peerSelection.churn.IncreasedKnownPeers": "number of increased known peers",
124+
"peersFromNodeKernel": "",
125+
"remainingKESPeriods": "",
126+
"served.block": "",
127+
"slotInEpoch": "Relative slot number of the tip of the current chain within the epoch..",
128+
"slotIsImmutable": "",
129+
"slotNum": "Number of slots in this chain fragment.",
130+
"slotsMissed": "How many slots did this node miss?",
131+
"submissions.accepted": "",
132+
"submissions.rejected": "",
133+
"submissions.submitted": "",
134+
"systemStartTime": "The UTC time this node was started.",
135+
"txsInMempool": "Transactions in mempool",
136+
"txsProcessedNum": ""
137+
}

cardano-tracer/docs/cardano-tracer.md

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,24 @@
44

55
# Contents
66

7-
1. [Introduction](#Introduction)
8-
1. [Motivation](#Motivation)
9-
3. [Overview](#Overview)
10-
2. [Build and run](#Build-and-run)
11-
3. [Configuration](#Configuration)
12-
1. [Distributed Scenario](#Distributed-scenario)
13-
2. [Local Scenario](#Local-scenario)
14-
3. [Network Magic](#Network-magic)
15-
4. [Requests](#Requests)
16-
5. [Logging](#Logging)
17-
6. [Logs Rotation](#Logs-rotation)
18-
7. [Prometheus](#Prometheus)
19-
8. [EKG Monitoring](#EKG-monitoring)
20-
9. [Verbosity](#Verbosity)
21-
10. [RTView](#RTView)
7+
- [Cardano Tracer](#cardano-tracer)
8+
- [Contents](#contents)
9+
- [Introduction](#introduction)
10+
- [Motivation](#motivation)
11+
- [Overview](#overview)
12+
- [Build and run](#build-and-run)
13+
- [Configuration](#configuration)
14+
- [Distributed Scenario](#distributed-scenario)
15+
- [Important](#important)
16+
- [Local Scenario](#local-scenario)
17+
- [Network Magic](#network-magic)
18+
- [Requests](#requests)
19+
- [Logging](#logging)
20+
- [Logs Rotation](#logs-rotation)
21+
- [Prometheus](#prometheus)
22+
- [EKG Monitoring](#ekg-monitoring)
23+
- [Verbosity](#verbosity)
24+
- [RTView](#rtview)
2225

2326
# Introduction
2427

@@ -390,20 +393,51 @@ $ curl --silent -H "Accept: application/json" '127.0.0.1:3200' | jq '.'
390393
}
391394
```
392395

393-
The Promethus output is a map from Prometheus metric to value:
396+
Prometheus uses the text-based exposition format, complete with `# TYPE` and `# HELP` annotations. The latter ones have to be provided by the `metricsHelp` config value (see below).
397+
398+
The output should be [OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#text-format) compliant. Example snippet:
394399

395400
```
396401
$ curl '127.0.0.1:3200/12700130004'
397-
blockNum_int 35
398-
rts_gc_init_cpu_ms 5
399-
rts_gc_par_tot_bytes_copied 0
400-
served_block_counter 31
401-
submissions_accepted_counter 2771
402-
density_real 5.7692307692307696e-2
403-
blocksForged_int 6
404-
402+
# TYPE Mem_resident_int gauge
403+
# HELP Mem_resident_int Kernel-reported RSS (resident set size)
404+
Mem_resident_int 103792640
405+
# TYPE rts_gc_max_bytes_used gauge
406+
rts_gc_max_bytes_used 5811512
407+
# TYPE rts_gc_gc_cpu_ms counter
408+
rts_gc_gc_cpu_ms 50
409+
# TYPE RTS_gcMajorNum_int gauge
410+
# HELP RTS_gcMajorNum_int Major GCs
411+
RTS_gcMajorNum_int 4
412+
# TYPE rts_gc_par_avg_bytes_copied gauge
413+
rts_gc_par_avg_bytes_copied 0
414+
# TYPE rts_gc_num_bytes_usage_samples counter
415+
rts_gc_num_bytes_usage_samples 4
416+
# TYPE remainingKESPeriods_int gauge
417+
remainingKESPeriods_int 62
418+
# TYPE rts_gc_bytes_copied counter
419+
rts_gc_bytes_copied 17114384
420+
# TYPE nodeCannotForge_int gauge
421+
# HELP nodeCannotForge_int How many times was this node unable to forge [a block]?
422+
# EOF
423+
```
424+
425+
Passing metric help annotations to the service can be done in the config file, either as a key-value map from metric name to help text, or as a seperate JSON file containing such a map.
426+
The system's internal metric names have to be used as keys (cf. [metrics documentation](https://github.com/input-output-hk/cardano-node-wiki/blob/main/docs/new-tracing/tracers_doc_generated.md#metrics)).
427+
```
428+
"metricsHelp": "path/to/key-value-map.json"
429+
```
430+
or
431+
```
432+
"metricsHelp": {
433+
"Mem.resident": "Kernel-reported RSS (resident set size)",
434+
"RTS.gcMajorNum": "Major GCs",
435+
"nodeCannotForge": "How many times was this node unable to forge [a block]?"
436+
}
405437
```
406438

439+
440+
407441
## EKG Monitoring
408442

409443
At top-level route `/` EKG gives a list of connected nodes.

0 commit comments

Comments
 (0)