Skip to content

Commit 381cf06

Browse files
Update utility functions to handle potential null values for at-risk and watchdog data to avoid eror for historyAlertStatus leading to all chart returning unavailable
1 parent af9951b commit 381cf06

File tree

2 files changed

+87
-50
lines changed

2 files changed

+87
-50
lines changed

ui/src/services/platformlibrary/metrics.ts

Lines changed: 84 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {
22
PORT_NODE_EXPORTER,
33
STATUS_CRITICAL,
4+
STATUS_HEALTH,
45
STATUS_WARNING,
56
} from '../../constants';
67
import { queryPromtheusMetrics } from '../prometheus/fetchMetrics';
@@ -1186,6 +1187,16 @@ function convertSegmentToAlert(segment) {
11861187
description:
11871188
'Alerting services were unavailable during this period of time',
11881189
};
1190+
1191+
case STATUS_HEALTH:
1192+
return {
1193+
...baseSegment,
1194+
severity: 'healthy',
1195+
labels: {
1196+
alertname: 'NoAlerts',
1197+
},
1198+
description: 'The cluster is healthy',
1199+
};
11891200
}
11901201
}
11911202

@@ -1201,62 +1212,86 @@ export const getClusterAlertSegmentQuery = (duration: number) => {
12011212
startingTimeISO,
12021213
currentTimeISO,
12031214
frequency,
1204-
}: TimeSpanProps) =>
1205-
queryPrometheusRange(
1215+
}: TimeSpanProps) => {
1216+
const prometheusPromise = queryPrometheusRange(
12061217
startingTimeISO,
12071218
currentTimeISO,
12081219
frequency,
12091220
encodeURIComponent(query),
1210-
)?.then((resolve) => {
1211-
if (resolve.status === 'error') {
1212-
throw resolve.error;
1213-
}
1221+
);
12141222

1215-
if (resolve.data.resultType !== 'matrix') {
1216-
throw new Error('Failed to fetch data from Prometheus');
1217-
}
1223+
if (!prometheusPromise) {
1224+
throw new Error(
1225+
'queryPrometheusRange returned null/undefined - Prometheus might not be available',
1226+
);
1227+
}
12181228

1219-
const clusterAtRiskResult = resolve.data.result.find(
1220-
(result) => result.metric.alertname === 'ClusterAtRisk',
1221-
) || {
1222-
values: [],
1223-
};
1229+
return prometheusPromise
1230+
?.then((resolve) => {
1231+
if (resolve.status === 'error') {
1232+
throw new Error(resolve.error);
1233+
}
12241234

1225-
const clusterDegradedResult = resolve.data.result.find(
1226-
(result) => result.metric.alertname === 'ClusterDegraded',
1227-
) || {
1228-
values: [],
1229-
};
1235+
const clusterAtRiskResult = resolve.data.result.find(
1236+
// @ts-expect-error - FIXME when you are working on it
1237+
(result) => result.metric.alertname === 'ClusterAtRisk',
1238+
) || {
1239+
values: [],
1240+
};
12301241

1231-
const watchdogResult = resolve.data.result.find(
1232-
(result) => result.metric.alertname === 'Watchdog',
1233-
) || {
1234-
values: [],
1235-
};
1236-
const pointsAtRisk = addMissingDataPoint(
1237-
clusterAtRiskResult.values,
1238-
Date.parse(startingTimeISO) / 1000,
1239-
Date.parse(currentTimeISO) / 1000 - Date.parse(startingTimeISO) / 1000,
1240-
frequency,
1241-
);
1242-
const pointsDegraded = addMissingDataPoint(
1243-
clusterDegradedResult.values,
1244-
Date.parse(startingTimeISO) / 1000,
1245-
Date.parse(currentTimeISO) / 1000 - Date.parse(startingTimeISO) / 1000,
1246-
frequency,
1247-
);
1248-
const pointsWatchdog = addMissingDataPoint(
1249-
watchdogResult.values,
1250-
Date.parse(startingTimeISO) / 1000,
1251-
Date.parse(currentTimeISO) / 1000 - Date.parse(startingTimeISO) / 1000,
1252-
frequency,
1253-
);
1254-
return getSegments({
1255-
pointsDegraded,
1256-
pointsAtRisk,
1257-
pointsWatchdog,
1258-
}).map(convertSegmentToAlert);
1259-
});
1242+
const clusterDegradedResult = resolve.data.result.find(
1243+
// @ts-expect-error - FIXME when you are working on it
1244+
(result) => result.metric.alertname === 'ClusterDegraded',
1245+
) || {
1246+
values: [],
1247+
};
1248+
1249+
const watchdogResult = resolve.data.result.find(
1250+
// @ts-expect-error - FIXME when you are working on it
1251+
(result) => result.metric.alertname === 'Watchdog',
1252+
) || {
1253+
values: [],
1254+
};
1255+
const pointsAtRisk = addMissingDataPoint(
1256+
// @ts-expect-error - FIXME when you are working on it
1257+
clusterAtRiskResult.values,
1258+
Date.parse(startingTimeISO) / 1000,
1259+
Date.parse(currentTimeISO) / 1000 -
1260+
Date.parse(startingTimeISO) / 1000,
1261+
frequency,
1262+
);
1263+
1264+
const pointsDegraded = addMissingDataPoint(
1265+
// @ts-expect-error - FIXME when you are working on it
1266+
clusterDegradedResult.values,
1267+
Date.parse(startingTimeISO) / 1000,
1268+
Date.parse(currentTimeISO) / 1000 -
1269+
Date.parse(startingTimeISO) / 1000,
1270+
frequency,
1271+
);
1272+
1273+
const pointsWatchdog = addMissingDataPoint(
1274+
// @ts-expect-error - FIXME when you are working on it
1275+
watchdogResult.values,
1276+
Date.parse(startingTimeISO) / 1000,
1277+
Date.parse(currentTimeISO) / 1000 -
1278+
Date.parse(startingTimeISO) / 1000,
1279+
frequency,
1280+
);
1281+
1282+
const segments = getSegments({
1283+
pointsDegraded,
1284+
pointsAtRisk,
1285+
pointsWatchdog,
1286+
});
1287+
console.log('DEBUG: segments', segments);
1288+
return segments.map(convertSegmentToAlert);
1289+
})
1290+
.catch((error) => {
1291+
// Re-throw the error to see the original error in React Query
1292+
throw error;
1293+
});
1294+
};
12601295

12611296
return {
12621297
queryKey: ['clusterAlertsNumber', duration],
@@ -1279,6 +1314,7 @@ export const getClusterAlertSegmentQuery = (duration: number) => {
12791314
refetchOnWindowFocus: false,
12801315
};
12811316
};
1317+
12821318
export const prometheusKey = {
12831319
query: (query) => ['query', query],
12841320
queryRange: ['queryRange'],

ui/src/services/utils.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,10 @@ export function getNaNSegments(points: [number, number | string | null][]): {
324324
}
325325
export function getSegments({ pointsAtRisk, pointsDegraded, pointsWatchdog }) {
326326
return pointsDegraded.reduce((agg, [timestamp, degradedValue], index) => {
327-
const atRiskValue = pointsAtRisk[index][1];
327+
const atRiskValue = pointsAtRisk[index]?.[1] || 0;
328+
const watchdogValue = pointsWatchdog[index]?.[1] || '0';
328329
const currentType =
329-
pointsWatchdog[index][1] !== '1'
330+
watchdogValue !== '1'
330331
? NAN_STRING
331332
: atRiskValue > 0
332333
? STATUS_CRITICAL

0 commit comments

Comments
 (0)