Skip to content

Commit 4fd4b15

Browse files
committed
feat: add prometheus metrics
1 parent 8e5b47e commit 4fd4b15

13 files changed

Lines changed: 542 additions & 5 deletions

File tree

.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,10 @@ NOTIFICATION_TELEGRAM_BOT_TOKEN=secret
55
NOTIFICATION_TELEGRAM_CHAT_ID=secret
66
NOTIFICATION_SLACK_WEBHOOK=https://hooks.slack.com/services/secret/secret/secret
77
NOTIFICATION_GENERIC_WEBHOOK=http://host:port/path
8+
9+
# Prometheus metrics
10+
METRICS_ENABLED=false
11+
METRICS_PORT=8000
12+
METRICS_ADDRESS=0.0.0.0
13+
14+
LOG_LEVEL=INFO

README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ docker run \
3131
-e NOTIFICATION_TELEGRAM_CHAT_ID="secret" \
3232
-e NOTIFICATION_SLACK_WEBHOOK="https://hooks.slack.com/services/secret/secret/secret" \
3333
-e NOTIFICATION_GENERIC_WEBHOOK="http://host:port/path" \
34+
-e METRICS_ENABLED="true" \
35+
-e METRICS_PORT="8000" \
36+
-e LOG_LEVEL="INFO" \
3437
ghcr.io/flare-foundation/fsp-observer:main
3538
```
3639

@@ -44,3 +47,46 @@ RPC_BASE_URL="https://flare-api.flare.network" \
4447
IDENTITY_ADDRESS="0x0000000000000000000000000000000000000000" \
4548
python main.py
4649
```
50+
51+
## Environment variables
52+
53+
| Variable | Required | Default | Description |
54+
|---|---|---|---|
55+
| `RPC_BASE_URL` | yes | - | RPC base URL without `/ext/bc/C/rpc` suffix |
56+
| `IDENTITY_ADDRESS` | yes | - | Identity address of the observed entity |
57+
| `FEE_THRESHOLD` | no | `25` | Balance threshold in FLR to trigger low balance warning |
58+
| `NOTIFICATION_DISCORD_WEBHOOK` | no | - | Discord webhook URL (comma-separated for multiple) |
59+
| `NOTIFICATION_DISCORD_EMBED_WEBHOOK` | no | - | Discord embed webhook URL |
60+
| `NOTIFICATION_SLACK_WEBHOOK` | no | - | Slack webhook URL |
61+
| `NOTIFICATION_TELEGRAM_BOT_TOKEN` | no | - | Telegram bot token (comma-separated for multiple) |
62+
| `NOTIFICATION_TELEGRAM_CHAT_ID` | no | - | Telegram chat ID (comma-separated, paired with bot tokens) |
63+
| `NOTIFICATION_GENERIC_WEBHOOK` | no | - | Generic HTTP POST webhook URL |
64+
| `METRICS_ENABLED` | no | `false` | Enable Prometheus metrics endpoint |
65+
| `METRICS_PORT` | no | `8000` | Prometheus metrics server port |
66+
| `METRICS_ADDRESS` | no | `0.0.0.0` | Prometheus metrics server bind address |
67+
| `LOG_LEVEL` | no | `INFO` | Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
68+
69+
## Prometheus metrics
70+
71+
When `METRICS_ENABLED=true`, metrics are exposed at `http://host:METRICS_PORT/metrics`.
72+
73+
| Metric | Type | Description |
74+
|---|---|---|
75+
| `flare_fsp_submit_ok_total` | Counter | Successful submissions per protocol/phase |
76+
| `flare_fsp_submit_late_total` | Counter | Late submissions per protocol/phase |
77+
| `flare_fsp_submit_early_total` | Counter | Early submissions per protocol/phase |
78+
| `flare_fsp_submit_missing_total` | Counter | Missing submissions per protocol/phase |
79+
| `flare_fsp_address_balance_wei` | Gauge | Address balance in wei per role |
80+
| `flare_fsp_registered_current_epoch` | Gauge | 1 if registered in current reward epoch |
81+
| `flare_fsp_registered_next_epoch` | Gauge | 1 if registered for next reward epoch |
82+
| `flare_fsp_voting_round` | Gauge | Current voting round ID |
83+
| `flare_fsp_reward_epoch` | Gauge | Current reward epoch ID |
84+
| `flare_fsp_node_uptime_ratio` | Gauge | Node uptime ratio per node ID |
85+
| `flare_fsp_fast_update_blocks_since_last` | Gauge | Blocks since last fast update submission |
86+
| `flare_fsp_ftso_anchor_feeds_success_rate_bips` | Gauge | FTSO anchor feeds success rate in bips |
87+
| `flare_fsp_fdc_participation_rate_bips` | Gauge | FDC participation rate in bips |
88+
| `flare_fsp_reveal_offence_total` | Counter | Reveal offences per protocol |
89+
| `flare_fsp_signature_grace_period_missed_total` | Counter | Signature submissions past grace period |
90+
| `flare_fsp_signature_mismatch_total` | Counter | Signature mismatches per protocol |
91+
| `flare_fsp_contract_address_wrong_total` | Counter | Wrong contract address detections |
92+
| `flare_fsp_unclaimed_rewards_wei` | Gauge | Unclaimed reward amount in wei |

configuration/config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Configuration,
99
Contracts,
1010
Epoch,
11+
MetricsConfig,
1112
Notification,
1213
NotificationDiscord,
1314
NotificationGeneric,
@@ -132,6 +133,13 @@ def get_notification_config() -> Notification:
132133
)
133134

134135

136+
def get_metrics_config() -> MetricsConfig:
137+
enabled = os.environ.get("METRICS_ENABLED", "false").lower() == "true"
138+
port = int(os.environ.get("METRICS_PORT", "8000"))
139+
address = os.environ.get("METRICS_ADDRESS", "0.0.0.0")
140+
return MetricsConfig(enabled=enabled, port=port, address=address)
141+
142+
135143
def get_config() -> Configuration:
136144
rpc_base_url = os.environ.get("RPC_BASE_URL")
137145
if rpc_base_url is None:
@@ -155,6 +163,8 @@ def get_config() -> Configuration:
155163
_fee_threshold = os.environ.get("FEE_THRESHOLD", "25")
156164
fee_threshold = int(_fee_threshold)
157165

166+
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
167+
158168
config = Configuration(
159169
rpc_url=rpc_url,
160170
p_chain_rpc_url=p_chain_rpc_url,
@@ -164,6 +174,8 @@ def get_config() -> Configuration:
164174
epoch=get_epoch(chain_id),
165175
notification=get_notification_config(),
166176
fee_threshold=fee_threshold,
177+
metrics=get_metrics_config(),
178+
log_level=log_level,
167179
)
168180

169181
return config

configuration/types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,13 @@ class Notification:
250250
generic: NotificationGeneric
251251

252252

253+
@frozen
254+
class MetricsConfig:
255+
enabled: bool
256+
port: int
257+
address: str
258+
259+
253260
@frozen
254261
class Configuration:
255262
identity_address: ChecksumAddress
@@ -260,3 +267,5 @@ class Configuration:
260267
epoch: Epoch
261268
notification: Notification
262269
fee_threshold: int
270+
metrics: MetricsConfig
271+
log_level: str

observer/address.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from web3 import AsyncWeb3
55

66
from configuration.types import Configuration
7+
from observer import metrics
78
from observer.message import Message, MessageLevel
89

910

@@ -20,6 +21,9 @@ async def check_addresses(
2021

2122
for name, addr in address_list:
2223
balance = await w.eth.get_balance(addr, "latest")
24+
metrics.ADDRESS_BALANCE.labels(
25+
identity_address=metrics.identity_address, address=addr, role=name
26+
).set(balance)
2327
if balance < config.fee_threshold * 1e18:
2428
level = MessageLevel.WARNING
2529
if balance <= 5e18:

observer/contract_manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from attrs import define
44

55
from configuration.types import Contract, Contracts
6+
from observer import metrics
67
from observer.message import Message, MessageLevel
78

89

@@ -21,6 +22,9 @@ def check_submission_address(self, address) -> Sequence[Message]:
2122
mb = Message.builder()
2223
messages = []
2324
if address != self.contracts.Submission.address:
25+
metrics.CONTRACT_ADDRESS_WRONG.labels(
26+
identity_address=metrics.identity_address, contract="submission"
27+
).inc()
2428
messages.append(
2529
mb.build(MessageLevel.CRITICAL, "Incorrect Submmission address")
2630
)
@@ -30,5 +34,8 @@ def check_relay_address(self, address) -> Sequence[Message]:
3034
mb = Message.builder()
3135
messages = []
3236
if address != self.contracts.Relay.address:
37+
metrics.CONTRACT_ADDRESS_WRONG.labels(
38+
identity_address=metrics.identity_address, contract="relay"
39+
).inc()
3340
messages.append(mb.build(MessageLevel.CRITICAL, "Incorrect Relay address"))
3441
return messages

observer/metrics.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
from prometheus_client import Counter, Gauge, start_http_server
2+
3+
# Identity address set once at startup via setup()
4+
identity_address: str = ""
5+
6+
7+
def setup(ia: str) -> None:
8+
global identity_address
9+
identity_address = ia
10+
11+
12+
# ---------------------------------------------------------------------------
13+
# State
14+
# ---------------------------------------------------------------------------
15+
16+
VOTING_ROUND = Gauge(
17+
"flare_fsp_voting_round_current",
18+
"Current voting round ID",
19+
)
20+
21+
REWARD_EPOCH = Gauge(
22+
"flare_fsp_reward_epoch_current",
23+
"Current reward epoch ID",
24+
)
25+
26+
REGISTERED_CURRENT_EPOCH = Gauge(
27+
"flare_fsp_registered_current_epoch",
28+
"Whether the entity is in the active signing policy for the current epoch (0 or 1)",
29+
["identity_address"],
30+
)
31+
32+
REGISTERED_NEXT_EPOCH = Gauge(
33+
"flare_fsp_registered_next_epoch",
34+
"Whether the entity has registered for the next epoch (0 or 1)",
35+
["identity_address"],
36+
)
37+
38+
# ---------------------------------------------------------------------------
39+
# Submissions — Counters per protocol (ftso, fdc) and phase
40+
# (submit1, submit2, signatures)
41+
# ---------------------------------------------------------------------------
42+
43+
SUBMIT_OK = Counter(
44+
"flare_fsp_submit_ok_total",
45+
"Total rounds where submission was present and valid",
46+
["identity_address", "protocol", "phase"],
47+
)
48+
49+
SUBMIT_MISSING = Counter(
50+
"flare_fsp_submit_missing_total",
51+
"Total rounds where submission was absent",
52+
["identity_address", "protocol", "phase"],
53+
)
54+
55+
SUBMIT_LATE = Counter(
56+
"flare_fsp_submit_late_total",
57+
"Total rounds where submission was sent after the allowed window",
58+
["identity_address", "protocol", "phase"],
59+
)
60+
61+
SUBMIT_EARLY = Counter(
62+
"flare_fsp_submit_early_total",
63+
"Total rounds where submission was sent before the allowed window",
64+
["identity_address", "protocol", "phase"],
65+
)
66+
67+
# ---------------------------------------------------------------------------
68+
# Minimal conditions
69+
# ---------------------------------------------------------------------------
70+
71+
FTSO_ANCHOR_FEEDS_SUCCESS_RATE = Gauge(
72+
"flare_fsp_ftso_anchor_feeds_success_rate_bips",
73+
"FTSO anchor feeds success rate in bips (0-10000) over the last 2 hours",
74+
["identity_address"],
75+
)
76+
77+
FAST_UPDATE_BLOCKS_SINCE_LAST = Gauge(
78+
"flare_fsp_fast_update_blocks_since_last",
79+
"Number of blocks elapsed since the last fast update submission",
80+
["identity_address"],
81+
)
82+
83+
NODE_UPTIME_RATIO = Gauge(
84+
"flare_fsp_node_uptime_ratio",
85+
"Node uptime ratio over the sliding window (0.0 to 1.0)",
86+
["identity_address", "node_id"],
87+
)
88+
89+
FDC_PARTICIPATION_RATE = Gauge(
90+
"flare_fsp_fdc_participation_rate_bips",
91+
"FDC participation rate in bips (0-10000) over the last 2 hours",
92+
["identity_address"],
93+
)
94+
95+
# ---------------------------------------------------------------------------
96+
# Balance
97+
# ---------------------------------------------------------------------------
98+
99+
ADDRESS_BALANCE = Gauge(
100+
"flare_fsp_address_balance_wei",
101+
"Address balance in wei",
102+
["identity_address", "address", "role"],
103+
)
104+
105+
# ---------------------------------------------------------------------------
106+
# Validation issues
107+
# ---------------------------------------------------------------------------
108+
109+
REVEAL_OFFENCE = Counter(
110+
"flare_fsp_reveal_offence_total",
111+
"Total rounds where a reveal offence occurred"
112+
" (missing reveal after commit, or hash mismatch)",
113+
["identity_address", "protocol"],
114+
)
115+
116+
SIGNATURE_GRACE_PERIOD_MISSED = Counter(
117+
"flare_fsp_signature_grace_period_missed_total",
118+
"Total rounds where submitSignatures was sent after the grace period deadline",
119+
["identity_address", "protocol"],
120+
)
121+
122+
SIGNATURE_MISMATCH = Counter(
123+
"flare_fsp_signature_mismatch_total",
124+
"Total rounds where submitSignatures signature did not match finalization",
125+
["identity_address", "protocol"],
126+
)
127+
128+
# ---------------------------------------------------------------------------
129+
# Contract address issues
130+
# ---------------------------------------------------------------------------
131+
132+
CONTRACT_ADDRESS_WRONG = Counter(
133+
"flare_fsp_contract_address_wrong_total",
134+
"Total times a wrong contract address was detected (submission or relay)",
135+
["identity_address", "contract"],
136+
)
137+
138+
# ---------------------------------------------------------------------------
139+
# Unclaimed rewards
140+
# ---------------------------------------------------------------------------
141+
142+
UNCLAIMED_REWARDS = Gauge(
143+
"flare_fsp_unclaimed_rewards_wei",
144+
"Unclaimed reward amount in wei per address/epoch/claim_type",
145+
["identity_address", "address", "reward_epoch", "claim_type"],
146+
)
147+
148+
149+
def initialize_labels(node_ids: list[str] | None = None) -> None:
150+
"""Pre-initialize all label combinations so time series appear immediately at 0."""
151+
assert identity_address, "metrics.setup() must be called before initialize_labels()"
152+
for protocol, phases in [
153+
("ftso", ["submit1", "submit2", "signatures"]),
154+
("fdc", ["submit2", "signatures"]),
155+
]:
156+
for phase in phases:
157+
SUBMIT_OK.labels(
158+
identity_address=identity_address, protocol=protocol, phase=phase
159+
)
160+
SUBMIT_MISSING.labels(
161+
identity_address=identity_address, protocol=protocol, phase=phase
162+
)
163+
SUBMIT_LATE.labels(
164+
identity_address=identity_address, protocol=protocol, phase=phase
165+
)
166+
SUBMIT_EARLY.labels(
167+
identity_address=identity_address, protocol=protocol, phase=phase
168+
)
169+
170+
REVEAL_OFFENCE.labels(identity_address=identity_address, protocol=protocol)
171+
SIGNATURE_GRACE_PERIOD_MISSED.labels(
172+
identity_address=identity_address, protocol=protocol
173+
)
174+
SIGNATURE_MISMATCH.labels(identity_address=identity_address, protocol=protocol)
175+
176+
for contract in ["submission", "relay"]:
177+
CONTRACT_ADDRESS_WRONG.labels(
178+
identity_address=identity_address, contract=contract
179+
)
180+
181+
FAST_UPDATE_BLOCKS_SINCE_LAST.labels(identity_address=identity_address).set(0)
182+
183+
if node_ids:
184+
for node_id in node_ids:
185+
NODE_UPTIME_RATIO.labels(
186+
identity_address=identity_address, node_id=node_id
187+
).set(0)
188+
189+
190+
def start_metrics_server(port: int, address: str = "0.0.0.0") -> None:
191+
start_http_server(port, addr=address)

0 commit comments

Comments
 (0)