1+ import { confirm } from '@inquirer/prompts' ;
2+ import { ChildProcess } from 'child_process' ;
3+
14import { ChainMap } from '@hyperlane-xyz/sdk' ;
25import { rootLogger } from '@hyperlane-xyz/utils' ;
36
@@ -9,66 +12,295 @@ import {
912} from '../../src/config/funding/balances.js' ;
1013import {
1114 AlertType ,
15+ ProvisionedAlertRule ,
1216 alertConfigMapping ,
1317} from '../../src/config/funding/grafanaAlerts.js' ;
18+ import { parseBalancesPromQLQuery } from '../../src/funding/alerts.js' ;
1419import { validateThresholds } from '../../src/funding/balances.js' ;
1520import {
1621 fetchGrafanaAlert ,
1722 fetchGrafanaServiceAccountToken ,
1823 generateQuery ,
1924 updateGrafanaAlert ,
2025} from '../../src/infrastructure/monitoring/grafana.js' ;
26+ import {
27+ LOCAL_PROM_URL ,
28+ PROMETHEUS_LOCAL_PORT ,
29+ fetchPrometheusInstantExpression ,
30+ portForwardPrometheusServer ,
31+ } from '../../src/infrastructure/monitoring/prometheus.js' ;
2132import { readJSONAtPath } from '../../src/utils/utils.js' ;
2233
34+ interface AlertUpdateInfo {
35+ alertType : AlertType ;
36+ grafanaAlertId : string ;
37+ provisionedAlertRule : ProvisionedAlertRule ;
38+ query : string ;
39+ }
40+
41+ interface RegressionError {
42+ alertType : AlertType ;
43+ missingChains : string [ ] ;
44+ }
45+
2346async function main ( ) {
47+ // runs a validation check to ensure the threshold configs are valid relative to each other
48+ await validateBalanceThresholdConfigs ( ) ;
49+
2450 const saToken = await fetchGrafanaServiceAccountToken ( ) ;
51+ const portForwardProcess = await portForwardPrometheusServer (
52+ PROMETHEUS_LOCAL_PORT ,
53+ ) ;
54+
55+ const alertsToUpdate = Object . values ( AlertType ) ;
56+ const alertUpdateInfo : AlertUpdateInfo [ ] = [ ] ;
57+ const missingChainErrors : RegressionError [ ] = [ ] ;
58+
59+ try {
60+ for ( const alert of alertsToUpdate ) {
61+ // fetch alertRule config from Grafana via the Grafana API
62+ const alertRule = await fetchGrafanaAlert ( alert , saToken ) ;
63+
64+ // read the proposed thresholds from the config file
65+ let proposedThresholds : ChainMap < number > = { } ;
66+ try {
67+ proposedThresholds = readJSONAtPath (
68+ `${ THRESHOLD_CONFIG_PATH } /${ alertConfigMapping [ alert ] . configFileName } ` ,
69+ ) ;
70+ } catch ( e ) {
71+ rootLogger . error ( `Error reading ${ alert } config: ${ e } ` ) ;
72+ process . exit ( 1 ) ;
73+ }
74+
75+ // parse the current thresholds from the existing query
76+ const existingQuery = alertRule . queries [ 0 ] ;
77+ const currentThresholds = parseBalancesPromQLQuery (
78+ existingQuery ,
79+ alertConfigMapping [ alert ] . walletName ,
80+ ) ;
81+
82+ // log an error if a chain is defined in current thresholds but not in the proposed thresholds
83+ // this is to ensure that we don't introduce a regression where a chain is no longer being monitored
84+ const missingChains = Object . keys ( currentThresholds ) . filter (
85+ ( chain ) => ! proposedThresholds [ chain ] ,
86+ ) ;
87+ if ( missingChains . length > 0 ) {
88+ missingChainErrors . push ( {
89+ alertType : alert ,
90+ missingChains,
91+ } ) ;
92+ rootLogger . error (
93+ `Missing thresholds for chains: ${ missingChains . join (
94+ ', ' ,
95+ ) } for ${ alert } config, skipping updating this alert`,
96+ ) ;
97+ continue ;
98+ }
99+
100+ // generate a table of the differences in the thresholds, prompt the user to confirm the changes
101+ const diffTable = generateDiffTable (
102+ currentThresholds ,
103+ proposedThresholds ,
104+ ) ;
105+ if ( diffTable . length > 0 ) {
106+ rootLogger . info ( `Differences in ${ alert } thresholds:` ) ;
107+ console . table ( diffTable ) ;
108+
109+ const confirmed = await confirm ( {
110+ message : `Do you want to update thresholds for ${ alert } ?` ,
111+ } ) ;
112+
113+ if ( ! confirmed ) {
114+ rootLogger . info (
115+ `Exiting without updating any alerts, this is to avoid thresholds from being out of sync` ,
116+ ) ;
117+ process . exit ( 0 ) ;
118+ }
119+ } else {
120+ rootLogger . info (
121+ `Proposed thresholds for ${ alert } are the same as existing thresholds, skipping` ,
122+ ) ;
123+ continue ;
124+ }
125+
126+ // prompt the user to confirm that they are ok with the alert firing for chains after the update
127+ const query = generateQuery ( alert , proposedThresholds ) ;
128+ await confirmFiringAlerts (
129+ alert ,
130+ query ,
131+ currentThresholds ,
132+ proposedThresholds ,
133+ ) ;
134+
135+ alertUpdateInfo . push ( {
136+ alertType : alert ,
137+ grafanaAlertId : alertConfigMapping [ alert ] . grafanaAlertId ,
138+ provisionedAlertRule : alertRule . rawData ,
139+ query,
140+ } ) ;
141+ }
142+
143+ // abort if there are any missing thresholds in the config to avoid introducing a regression
144+ handleMissingChainErrors ( missingChainErrors ) ;
145+
146+ // update the alerts with the new thresholds via the Grafana API
147+ await updateAlerts ( alertUpdateInfo , saToken , portForwardProcess ) ;
148+ } finally {
149+ portForwardProcess . kill ( ) ;
150+ }
151+ }
25152
153+ async function validateBalanceThresholdConfigs ( ) {
26154 const balanceThresholdTypes = Object . values ( BalanceThresholdType ) ;
27- const balanceThresholdConfigs : ThresholdsData = balanceThresholdTypes . reduce (
155+ const balanceThresholdConfigs = balanceThresholdTypes . reduce (
28156 ( acc , balanceThresholdType ) => {
29157 const thresholds = readJSONAtPath (
30158 `${ THRESHOLD_CONFIG_PATH } /${ balanceThresholdConfigMapping [ balanceThresholdType ] . configFileName } ` ,
31159 ) as ChainMap < string > ;
32160
33161 return {
34162 ...acc ,
35- [ balanceThresholdType ] : {
36- thresholds,
37- } ,
163+ [ balanceThresholdType ] : thresholds ,
38164 } ;
39165 } ,
40166 { } as ThresholdsData ,
41167 ) ;
42168
43169 validateThresholds ( balanceThresholdConfigs ) ;
170+ }
44171
45- const alertsToUpdate = Object . values ( AlertType ) ;
172+ async function fetchFiringThresholdAlert ( query : string ) : Promise < string [ ] > {
173+ const results = await fetchPrometheusInstantExpression ( LOCAL_PROM_URL , query ) ;
174+
175+ const alertingChains : string [ ] = [ ] ;
176+
177+ for ( const series of results ) {
178+ const chain = series . metric . chain ;
179+
180+ if ( series . value && parseFloat ( series . value [ 1 ] ) < 0 ) {
181+ alertingChains . push ( chain ) ;
182+ } else if ( series . histogram ) {
183+ rootLogger . warn (
184+ `Unexpected histogram data found for "${ chain } in Prometheus, skipping.` ,
185+ ) ;
186+ }
187+ }
188+
189+ return alertingChains ;
190+ }
46191
47- for ( const alert of alertsToUpdate ) {
48- // fetch alertRule config from Grafana
49- const alertRule = await fetchGrafanaAlert ( alert , saToken ) ;
192+ async function updateAlerts (
193+ alertUpdateInfo : AlertUpdateInfo [ ] ,
194+ saToken : string ,
195+ portForwardProcess : ChildProcess ,
196+ ) {
197+ // sort alertUpdateInfo by alertConfigMapping writePriority in descending order
198+ // the intention is to update alerts with higher writePriority first
199+ // if there are any errors, we don't want to continue updating alert thresholds with lower writePriority
200+ // to avoid the thresholds being out of sync, this is only effective when we are increasing thresholds which is the most common case
201+ alertUpdateInfo . sort (
202+ ( a , b ) =>
203+ alertConfigMapping [ b . alertType ] . writePriority -
204+ alertConfigMapping [ a . alertType ] . writePriority ,
205+ ) ;
50206
51- let thresholds : ChainMap < string > = { } ;
207+ for ( const alertInfo of alertUpdateInfo ) {
52208 try {
53- thresholds = readJSONAtPath (
54- `${ THRESHOLD_CONFIG_PATH } /${ alertConfigMapping [ alert ] . configFileName } ` ,
209+ await updateGrafanaAlert (
210+ alertInfo . grafanaAlertId ,
211+ alertInfo . provisionedAlertRule ,
212+ alertInfo . query ,
213+ saToken ,
55214 ) ;
215+ rootLogger . info ( `Updated ${ alertInfo . alertType } alert` ) ;
56216 } catch ( e ) {
57- rootLogger . error ( `Error reading ${ alert } config: ${ e } ` ) ;
217+ rootLogger . error (
218+ `Error updating ${ alertInfo . alertType } alert, aborting updating the rest of the alerts: ${ e } ` ,
219+ ) ;
220+ // exiting here so we don't continue updating alerts with lower writePriority
221+ portForwardProcess . kill ( ) ;
58222 process . exit ( 1 ) ;
59223 }
224+ }
225+ }
60226
61- const query = generateQuery ( alert , thresholds ) ;
227+ function generateDiffTable (
228+ currentThresholds : ChainMap < number > ,
229+ proposedThresholds : ChainMap < number > ,
230+ ) {
231+ const diffTable = Object . entries ( proposedThresholds ) . reduce (
232+ ( acc , [ chain , newThreshold ] ) => {
233+ const currentThreshold = currentThresholds [ chain ] ;
234+ if ( currentThreshold !== proposedThresholds [ chain ] ) {
235+ acc . push ( {
236+ chain,
237+ current : currentThreshold ,
238+ new : newThreshold ,
239+ change :
240+ currentThreshold === undefined
241+ ? 'new'
242+ : currentThreshold < newThreshold
243+ ? 'increase'
244+ : 'decrease' ,
245+ } ) ;
246+ }
247+ return acc ;
248+ } ,
249+ [ ] as {
250+ chain : string ;
251+ current : number ;
252+ new : number ;
253+ change : 'increase' | 'decrease' | 'new' ;
254+ } [ ] ,
255+ ) ;
256+
257+ return diffTable ;
258+ }
62259
63- // only change the query
64- await updateGrafanaAlert (
65- alertConfigMapping [ alert ] . grafanaAlertId ,
66- alertRule . rawData ,
67- query ,
68- saToken ,
260+ function handleMissingChainErrors ( missingChainErrors : RegressionError [ ] ) {
261+ if ( missingChainErrors . length === 0 ) return ;
262+
263+ for ( const error of missingChainErrors ) {
264+ rootLogger . error (
265+ `Missing thresholds for chains: ${ error . missingChains . join ( ', ' ) } for ${
266+ error . alertType
267+ } config`,
69268 ) ;
269+ }
270+ rootLogger . error (
271+ `Aborting updating alerts due to missing thresholds in config` ,
272+ ) ;
273+ process . exit ( 1 ) ;
274+ }
70275
71- rootLogger . info ( `Updated ${ alert } alert` ) ;
276+ async function confirmFiringAlerts (
277+ alert : AlertType ,
278+ query : string ,
279+ currentThresholds : ChainMap < number > ,
280+ proposedThresholds : ChainMap < number > ,
281+ ) {
282+ const alertingChains = await fetchFiringThresholdAlert ( query ) ;
283+ if ( alertingChains . length === 0 ) return ;
284+
285+ rootLogger . warn (
286+ `updating ${ alert } alert will result in alerting for the following chains` ,
287+ ) ;
288+ console . table (
289+ alertingChains . map ( ( chain ) => ( {
290+ chain,
291+ current : currentThresholds [ chain ] ,
292+ proposed : proposedThresholds [ chain ] ,
293+ } ) ) ,
294+ ) ;
295+
296+ const confirmed = await confirm ( {
297+ message : `Do you want to proceed with updating the alert thresholds for ${ alert } ?` ,
298+ } ) ;
299+ if ( ! confirmed ) {
300+ rootLogger . info (
301+ `Exiting without updating any alerts, this is to avoid thresholds from being out of sync as we do not want to update the ${ alert } alert` ,
302+ ) ;
303+ process . exit ( 0 ) ;
72304 }
73305}
74306
0 commit comments