Skip to content

Commit 2cdeff4

Browse files
authored
feat(balances): Update write alert scripts with helpul prompts (#5628)
### Description - Updates the write alert scripts with helpful prompts and general QoL improvements - Checks if the proposed change introduces a regression i.e remove a chain from the existing alert, logs an error and aborts - Read the actual alert threshold via grafana api, compares with proposed thresholds, prompts the user to confirm that they want to introduce these changes ![image](https://github.com/user-attachments/assets/dc61b247-d131-4b07-b6c5-634094865e5f) - Check if proposed update will lead to alert firing and prompts the user to confirm with these changes ![image](https://github.com/user-attachments/assets/eb9972b1-3a7f-401e-9ae5-33d234da20bc) ### Testing Manual
1 parent 10d60a6 commit 2cdeff4

6 files changed

Lines changed: 283 additions & 27 deletions

File tree

typescript/infra/scripts/funding/calculate-relayer-daily-burn.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import {
2020
sortThresholds,
2121
} from '../../src/funding/balances.js';
2222
import {
23+
LOCAL_PROM_URL,
24+
PROMETHEUS_LOCAL_PORT,
2325
PrometheusInstantResult,
2426
fetchPrometheusInstantExpression,
2527
portForwardPrometheusServer,
@@ -38,7 +40,6 @@ const SCRAPER_READ_ONLY_DB_SECRET_NAME =
3840

3941
const LOOK_BACK_DAYS = 10; // the number of days to look back for average destination tx costs
4042
const MIN_NUMBER_OF_TXS = 100; // the minimum number of txs to consider for daily burn
41-
const PROMETHEUS_LOCAL_PORT = 9090;
4243
const MIN_BURN_INCREASE_FACTOR = 0.05; // burn should be at least 5% higher than current to be updated
4344
const LOW_PROPOSED_BURN_FACTOR = 0.5; // proposed burn should be at least 50% lower than current to initiate user review
4445

@@ -146,8 +147,6 @@ async function getSealevelBurnProm(
146147
PROMETHEUS_LOCAL_PORT,
147148
);
148149

149-
const promUrl = `http://localhost:${PROMETHEUS_LOCAL_PORT}`;
150-
151150
const burn: ChainMap<number> = {};
152151

153152
const rangeHours = LOOK_BACK_DAYS * 24;
@@ -186,7 +185,10 @@ async function getSealevelBurnProm(
186185
let results: PrometheusInstantResult[];
187186

188187
try {
189-
results = await fetchPrometheusInstantExpression(promUrl, promQlQuery);
188+
results = await fetchPrometheusInstantExpression(
189+
LOCAL_PROM_URL,
190+
promQlQuery,
191+
);
190192
} finally {
191193
portForwardProcess.kill();
192194
rootLogger.info('Prometheus server port-forward process killed');

typescript/infra/scripts/funding/write-alert.ts

Lines changed: 252 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import { confirm } from '@inquirer/prompts';
2+
import { ChildProcess } from 'child_process';
3+
14
import { ChainMap } from '@hyperlane-xyz/sdk';
25
import { rootLogger } from '@hyperlane-xyz/utils';
36

@@ -9,66 +12,295 @@ import {
912
} from '../../src/config/funding/balances.js';
1013
import {
1114
AlertType,
15+
ProvisionedAlertRule,
1216
alertConfigMapping,
1317
} from '../../src/config/funding/grafanaAlerts.js';
18+
import { parseBalancesPromQLQuery } from '../../src/funding/alerts.js';
1419
import { validateThresholds } from '../../src/funding/balances.js';
1520
import {
1621
fetchGrafanaAlert,
1722
fetchGrafanaServiceAccountToken,
1823
generateQuery,
1924
updateGrafanaAlert,
2025
} from '../../src/infrastructure/monitoring/grafana.js';
26+
import {
27+
LOCAL_PROM_URL,
28+
PROMETHEUS_LOCAL_PORT,
29+
fetchPrometheusInstantExpression,
30+
portForwardPrometheusServer,
31+
} from '../../src/infrastructure/monitoring/prometheus.js';
2132
import { readJSONAtPath } from '../../src/utils/utils.js';
2233

34+
interface AlertUpdateInfo {
35+
alertType: AlertType;
36+
grafanaAlertId: string;
37+
provisionedAlertRule: ProvisionedAlertRule;
38+
query: string;
39+
}
40+
41+
interface RegressionError {
42+
alertType: AlertType;
43+
missingChains: string[];
44+
}
45+
2346
async function main() {
47+
// runs a validation check to ensure the threshold configs are valid relative to each other
48+
await validateBalanceThresholdConfigs();
49+
2450
const saToken = await fetchGrafanaServiceAccountToken();
51+
const portForwardProcess = await portForwardPrometheusServer(
52+
PROMETHEUS_LOCAL_PORT,
53+
);
54+
55+
const alertsToUpdate = Object.values(AlertType);
56+
const alertUpdateInfo: AlertUpdateInfo[] = [];
57+
const missingChainErrors: RegressionError[] = [];
58+
59+
try {
60+
for (const alert of alertsToUpdate) {
61+
// fetch alertRule config from Grafana via the Grafana API
62+
const alertRule = await fetchGrafanaAlert(alert, saToken);
63+
64+
// read the proposed thresholds from the config file
65+
let proposedThresholds: ChainMap<number> = {};
66+
try {
67+
proposedThresholds = readJSONAtPath(
68+
`${THRESHOLD_CONFIG_PATH}/${alertConfigMapping[alert].configFileName}`,
69+
);
70+
} catch (e) {
71+
rootLogger.error(`Error reading ${alert} config: ${e}`);
72+
process.exit(1);
73+
}
74+
75+
// parse the current thresholds from the existing query
76+
const existingQuery = alertRule.queries[0];
77+
const currentThresholds = parseBalancesPromQLQuery(
78+
existingQuery,
79+
alertConfigMapping[alert].walletName,
80+
);
81+
82+
// log an error if a chain is defined in current thresholds but not in the proposed thresholds
83+
// this is to ensure that we don't introduce a regression where a chain is no longer being monitored
84+
const missingChains = Object.keys(currentThresholds).filter(
85+
(chain) => !proposedThresholds[chain],
86+
);
87+
if (missingChains.length > 0) {
88+
missingChainErrors.push({
89+
alertType: alert,
90+
missingChains,
91+
});
92+
rootLogger.error(
93+
`Missing thresholds for chains: ${missingChains.join(
94+
', ',
95+
)} for ${alert} config, skipping updating this alert`,
96+
);
97+
continue;
98+
}
99+
100+
// generate a table of the differences in the thresholds, prompt the user to confirm the changes
101+
const diffTable = generateDiffTable(
102+
currentThresholds,
103+
proposedThresholds,
104+
);
105+
if (diffTable.length > 0) {
106+
rootLogger.info(`Differences in ${alert} thresholds:`);
107+
console.table(diffTable);
108+
109+
const confirmed = await confirm({
110+
message: `Do you want to update thresholds for ${alert}?`,
111+
});
112+
113+
if (!confirmed) {
114+
rootLogger.info(
115+
`Exiting without updating any alerts, this is to avoid thresholds from being out of sync`,
116+
);
117+
process.exit(0);
118+
}
119+
} else {
120+
rootLogger.info(
121+
`Proposed thresholds for ${alert} are the same as existing thresholds, skipping`,
122+
);
123+
continue;
124+
}
125+
126+
// prompt the user to confirm that they are ok with the alert firing for chains after the update
127+
const query = generateQuery(alert, proposedThresholds);
128+
await confirmFiringAlerts(
129+
alert,
130+
query,
131+
currentThresholds,
132+
proposedThresholds,
133+
);
134+
135+
alertUpdateInfo.push({
136+
alertType: alert,
137+
grafanaAlertId: alertConfigMapping[alert].grafanaAlertId,
138+
provisionedAlertRule: alertRule.rawData,
139+
query,
140+
});
141+
}
142+
143+
// abort if there are any missing thresholds in the config to avoid introducing a regression
144+
handleMissingChainErrors(missingChainErrors);
145+
146+
// update the alerts with the new thresholds via the Grafana API
147+
await updateAlerts(alertUpdateInfo, saToken, portForwardProcess);
148+
} finally {
149+
portForwardProcess.kill();
150+
}
151+
}
25152

153+
async function validateBalanceThresholdConfigs() {
26154
const balanceThresholdTypes = Object.values(BalanceThresholdType);
27-
const balanceThresholdConfigs: ThresholdsData = balanceThresholdTypes.reduce(
155+
const balanceThresholdConfigs = balanceThresholdTypes.reduce(
28156
(acc, balanceThresholdType) => {
29157
const thresholds = readJSONAtPath(
30158
`${THRESHOLD_CONFIG_PATH}/${balanceThresholdConfigMapping[balanceThresholdType].configFileName}`,
31159
) as ChainMap<string>;
32160

33161
return {
34162
...acc,
35-
[balanceThresholdType]: {
36-
thresholds,
37-
},
163+
[balanceThresholdType]: thresholds,
38164
};
39165
},
40166
{} as ThresholdsData,
41167
);
42168

43169
validateThresholds(balanceThresholdConfigs);
170+
}
44171

45-
const alertsToUpdate = Object.values(AlertType);
172+
async function fetchFiringThresholdAlert(query: string): Promise<string[]> {
173+
const results = await fetchPrometheusInstantExpression(LOCAL_PROM_URL, query);
174+
175+
const alertingChains: string[] = [];
176+
177+
for (const series of results) {
178+
const chain = series.metric.chain;
179+
180+
if (series.value && parseFloat(series.value[1]) < 0) {
181+
alertingChains.push(chain);
182+
} else if (series.histogram) {
183+
rootLogger.warn(
184+
`Unexpected histogram data found for "${chain} in Prometheus, skipping.`,
185+
);
186+
}
187+
}
188+
189+
return alertingChains;
190+
}
46191

47-
for (const alert of alertsToUpdate) {
48-
// fetch alertRule config from Grafana
49-
const alertRule = await fetchGrafanaAlert(alert, saToken);
192+
async function updateAlerts(
193+
alertUpdateInfo: AlertUpdateInfo[],
194+
saToken: string,
195+
portForwardProcess: ChildProcess,
196+
) {
197+
// sort alertUpdateInfo by alertConfigMapping writePriority in descending order
198+
// the intention is to update alerts with higher writePriority first
199+
// if there are any errors, we don't want to continue updating alert thresholds with lower writePriority
200+
// to avoid the thresholds being out of sync, this is only effective when we are increasing thresholds which is the most common case
201+
alertUpdateInfo.sort(
202+
(a, b) =>
203+
alertConfigMapping[b.alertType].writePriority -
204+
alertConfigMapping[a.alertType].writePriority,
205+
);
50206

51-
let thresholds: ChainMap<string> = {};
207+
for (const alertInfo of alertUpdateInfo) {
52208
try {
53-
thresholds = readJSONAtPath(
54-
`${THRESHOLD_CONFIG_PATH}/${alertConfigMapping[alert].configFileName}`,
209+
await updateGrafanaAlert(
210+
alertInfo.grafanaAlertId,
211+
alertInfo.provisionedAlertRule,
212+
alertInfo.query,
213+
saToken,
55214
);
215+
rootLogger.info(`Updated ${alertInfo.alertType} alert`);
56216
} catch (e) {
57-
rootLogger.error(`Error reading ${alert} config: ${e}`);
217+
rootLogger.error(
218+
`Error updating ${alertInfo.alertType} alert, aborting updating the rest of the alerts: ${e}`,
219+
);
220+
// exiting here so we don't continue updating alerts with lower writePriority
221+
portForwardProcess.kill();
58222
process.exit(1);
59223
}
224+
}
225+
}
60226

61-
const query = generateQuery(alert, thresholds);
227+
function generateDiffTable(
228+
currentThresholds: ChainMap<number>,
229+
proposedThresholds: ChainMap<number>,
230+
) {
231+
const diffTable = Object.entries(proposedThresholds).reduce(
232+
(acc, [chain, newThreshold]) => {
233+
const currentThreshold = currentThresholds[chain];
234+
if (currentThreshold !== proposedThresholds[chain]) {
235+
acc.push({
236+
chain,
237+
current: currentThreshold,
238+
new: newThreshold,
239+
change:
240+
currentThreshold === undefined
241+
? 'new'
242+
: currentThreshold < newThreshold
243+
? 'increase'
244+
: 'decrease',
245+
});
246+
}
247+
return acc;
248+
},
249+
[] as {
250+
chain: string;
251+
current: number;
252+
new: number;
253+
change: 'increase' | 'decrease' | 'new';
254+
}[],
255+
);
256+
257+
return diffTable;
258+
}
62259

63-
// only change the query
64-
await updateGrafanaAlert(
65-
alertConfigMapping[alert].grafanaAlertId,
66-
alertRule.rawData,
67-
query,
68-
saToken,
260+
function handleMissingChainErrors(missingChainErrors: RegressionError[]) {
261+
if (missingChainErrors.length === 0) return;
262+
263+
for (const error of missingChainErrors) {
264+
rootLogger.error(
265+
`Missing thresholds for chains: ${error.missingChains.join(', ')} for ${
266+
error.alertType
267+
} config`,
69268
);
269+
}
270+
rootLogger.error(
271+
`Aborting updating alerts due to missing thresholds in config`,
272+
);
273+
process.exit(1);
274+
}
70275

71-
rootLogger.info(`Updated ${alert} alert`);
276+
async function confirmFiringAlerts(
277+
alert: AlertType,
278+
query: string,
279+
currentThresholds: ChainMap<number>,
280+
proposedThresholds: ChainMap<number>,
281+
) {
282+
const alertingChains = await fetchFiringThresholdAlert(query);
283+
if (alertingChains.length === 0) return;
284+
285+
rootLogger.warn(
286+
`updating ${alert} alert will result in alerting for the following chains`,
287+
);
288+
console.table(
289+
alertingChains.map((chain) => ({
290+
chain,
291+
current: currentThresholds[chain],
292+
proposed: proposedThresholds[chain],
293+
})),
294+
);
295+
296+
const confirmed = await confirm({
297+
message: `Do you want to proceed with updating the alert thresholds for ${alert}?`,
298+
});
299+
if (!confirmed) {
300+
rootLogger.info(
301+
`Exiting without updating any alerts, this is to avoid thresholds from being out of sync as we do not want to update the ${alert} alert`,
302+
);
303+
process.exit(0);
72304
}
73305
}
74306

0 commit comments

Comments
 (0)