Skip to content

Commit af0176b

Browse files
committed
feat: agent service healthchecks
1 parent c250fc7 commit af0176b

File tree

7 files changed

+142
-2
lines changed

7 files changed

+142
-2
lines changed

packages/fasset-bots-cli/src/run/run-agent.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import "dotenv/config";
22
import "source-map-support/register";
33

4-
import { AgentBotRunner, PricePublisherService, startActivityTimestampUpdater, stopActivityTimestampUpdater, TimeKeeperService, TimekeeperTimingConfig } from "@flarenetwork/fasset-bots-core";
4+
import { AgentBotRunner, PricePublisherService, startActivityTimestampUpdater, startHealthcheckMonitor, stopActivityTimestampUpdater, TimeKeeperService, TimekeeperTimingConfig } from "@flarenetwork/fasset-bots-core";
55
import { closeBotConfig, createBotConfig, loadAgentConfigFile, Secrets } from "@flarenetwork/fasset-bots-core/config";
66
import { assertCmd, assertNotNullCmd, authenticatedHttpProvider, CommandLineError, formatFixed, initWeb3, isNotNull, logger, sendWeb3Transaction, toBN, toBNExp, web3 } from "@flarenetwork/fasset-bots-core/utils";
77
import BN from "bn.js";
88
import { programWithCommonOptions } from "../utils/program";
99
import { toplevelRun } from "../utils/toplevel";
10+
import { AgentNotifier } from "../../../fasset-bots-core/src/utils/notifier/AgentNotifier";
1011

1112
const timekeeperConfig: TimekeeperTimingConfig = {
1213
queryWindow: 172800,
@@ -17,6 +18,7 @@ const timekeeperConfig: TimekeeperTimingConfig = {
1718
}
1819

1920
const activityUpdateInterval = 60000; // 1min
21+
const healthcheckMonitorInterval = 60000; // 1min
2022

2123
const program = programWithCommonOptions("agent", "all_fassets");
2224

@@ -104,6 +106,9 @@ program.action(async () => {
104106
}
105107
// start activity update
106108
startActivityTimestampUpdater(botConfig.orm.em, activityUpdateInterval);
109+
// start healthcheck monitor
110+
const agent = new AgentNotifier(owner.address, runner.notifierTransports);
111+
startHealthcheckMonitor(botConfig, agent, healthcheckMonitorInterval);
107112
// run
108113
try {
109114
console.log("Agent bot started, press CTRL+C to end");
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { web3 } from "@flarenetwork/fasset-bots-core/utils";
2+
import { AgentNotifier } from "../utils/notifier/AgentNotifier";
3+
import { BlockchainIndexerHelper } from "../underlying-chain/BlockchainIndexerHelper";
4+
import { FlareDataConnectorClientHelper } from "../underlying-chain/FlareDataConnectorClientHelper";
5+
import { IBlockChainWallet } from "../underlying-chain/interfaces/IBlockChainWallet";
6+
import { AgentBotConfig } from "../config";
7+
import { logger } from "../utils/logger";
8+
9+
const MAX_BLOCK_TIMESTAMP_DELAY_SECONDS = 30
10+
11+
async function checkNativeRpcHealth(): Promise<string | undefined> {
12+
try {
13+
const height = await web3.eth.getBlockNumber()
14+
const block = await web3.eth.getBlock(height)
15+
const now = Math.floor(Date.now() / 1000)
16+
if (Number(block.timestamp) < now - MAX_BLOCK_TIMESTAMP_DELAY_SECONDS) {
17+
return 'block height too far behind'
18+
}
19+
} catch (e: any) {
20+
return String(e?.message)
21+
}
22+
}
23+
24+
async function checkIndexerHealth(
25+
indexer: BlockchainIndexerHelper
26+
): Promise<string | undefined> {
27+
try {
28+
const ok = await indexer.getHealth()
29+
if (!ok) {
30+
return 'health check failed'
31+
}
32+
} catch (e: any) {
33+
return String(e?.message)
34+
}
35+
}
36+
37+
async function checkDalHealth(
38+
fdc: FlareDataConnectorClientHelper
39+
): Promise<string | undefined> {
40+
try {
41+
const healthy = await fdc.getHealth()
42+
const n = healthy.filter(h => h).length
43+
if (n < fdc.dataAccessLayerClients.length) {
44+
const unhealthy = healthy.map((h, i) => h ? '' : fdc.dataAccessLayerUrls[i]).join(',')
45+
return `clients ${unhealthy} are healthy`
46+
} else if (n == 0) {
47+
return 'no clients available'
48+
}
49+
} catch (e: any) {
50+
return String(e?.message)
51+
}
52+
}
53+
54+
async function checkUnderlyingWalletHealth(
55+
wallet: IBlockChainWallet
56+
): Promise<string | undefined> {
57+
try {
58+
const resp = await wallet.getTransactionFee({ isPayment: true })
59+
if (resp.lten(0)) {
60+
return 'client returned invalid response'
61+
}
62+
} catch (e: any) {
63+
return String(e?.message)
64+
}
65+
}
66+
67+
async function serviceHealthcheck(
68+
config: AgentBotConfig,
69+
notifier: AgentNotifier
70+
) {
71+
const errors: [string, string][] = []
72+
for (const c of config.fAssets.values()) {
73+
const rpcHealth = await checkNativeRpcHealth()
74+
const indexerHealth = await checkIndexerHealth(c.blockchainIndexerClient)
75+
// @ts-ignore flareDataConnector has the right implementation
76+
const dalHealth = await checkDalHealth(c.flareDataConnector)
77+
const walletHealth = await checkUnderlyingWalletHealth(c.wallet)
78+
if (rpcHealth != null) {
79+
errors.push(['native rpc', rpcHealth])
80+
} else if (indexerHealth != null) {
81+
errors.push(['FDC verifier', indexerHealth])
82+
} else if (dalHealth != null) {
83+
errors.push(['DAL', dalHealth])
84+
} else if (walletHealth != null) {
85+
errors.push(['underlying rpc', walletHealth])
86+
}
87+
}
88+
for (const [service, reason] of errors) {
89+
notifier.serviceUnhealthy(service, reason)
90+
}
91+
}
92+
93+
let healthcheckMonitorTimer: NodeJS.Timeout | null = null;
94+
95+
export function startHealthcheckMonitor(
96+
config: AgentBotConfig,
97+
notifier: AgentNotifier,
98+
healthcheckInterval: number
99+
) {
100+
void serviceHealthcheck(config, notifier);
101+
healthcheckMonitorTimer = setInterval(
102+
() => void serviceHealthcheck(config, notifier),
103+
healthcheckInterval
104+
);
105+
}
106+
107+
export function stopHealthcheckMonitor() {
108+
if (healthcheckMonitorTimer) {
109+
clearInterval(healthcheckMonitorTimer);
110+
logger.info("Healthcheck monitor timer was cleared.");
111+
console.log("Healthcheck monitor timer was cleared.");
112+
}
113+
}

packages/fasset-bots-core/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export * from "./config/config-file-loader";
2626
export { encryptText, decryptText, EncryptionMethod } from "@flarenetwork/simple-wallet";
2727
export { isJSON, promptForPassword } from "./utils/prompt";
2828
export { startActivityTimestampUpdater, stopActivityTimestampUpdater, lastActivityTimestampSeconds } from "./actors/activity-updater";
29+
export { startHealthcheckMonitor, stopHealthcheckMonitor } from "./actors/service-health-checker";
2930
export * from "./fasset/Conversions";
3031
export * from "./state/CollateralPrice";
3132
export * from "./underlying-chain/interfaces/IBlockChain";

packages/fasset-bots-core/src/underlying-chain/BlockchainIndexerHelper.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,11 @@ export class BlockchainIndexerHelper implements IBlockChain {
202202
return blockHeight;
203203
}
204204

205+
async getHealth(): Promise<boolean> {
206+
const respdata = await this.verifier.get<ApiWrapper<boolean>>(`/api/health`, "getHealth");
207+
return respdata.status == "OK";
208+
}
209+
205210
async getCurrentBlockHeightFromIndexer(): Promise<number> {
206211
const respdata = await this.verifier.get<ApiWrapper<number>>(`/api/indexer/block-height-tip`, "getCurrentBlockHeightFromIndexer");
207212
const status = respdata.status;

packages/fasset-bots-core/src/underlying-chain/FlareDataConnectorClientHelper.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
ConfirmedBlockHeightExists, Payment, ReferencedPaymentNonexistence, decodeAttestationName
44
} from "@flarenetwork/state-connector-protocol";
55
import { IFdcHubInstance, IFdcRequestFeeConfigurationsInstance, IFdcVerificationInstance, IRelayInstance } from "../../typechain-truffle";
6-
import { FspStatusResult } from "../utils/data-access-layer-types";
6+
import { DalHealthResult, FspStatusResult } from "../utils/data-access-layer-types";
77
import { findRequiredEvent } from "../utils/events/truffle";
88
import { formatArgs } from "../utils/formatting";
99
import { DEFAULT_RETRIES, ZERO_BYTES32, retry, sleep } from "../utils/helpers";
@@ -257,6 +257,13 @@ export class FlareDataConnectorClientHelper implements IFlareDataConnectorClient
257257
return proof;
258258
}
259259

260+
async getHealth(): Promise<boolean[]> {
261+
const responses = await Promise.all(this.dataAccessLayerClients.map(
262+
client => client.get<DalHealthResult>('/api/health', 'getHealth', 0))
263+
);
264+
return responses.map(r => r.healthy)
265+
}
266+
260267
/* istanbul ignore next */
261268
private async verifyProof(proofData: AttestationProof): Promise<boolean> {
262269
const normalizedProofData = web3DeepNormalize(proofData);

packages/fasset-bots-core/src/utils/data-access-layer-types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ export interface FtsoFeedResult {
2121
turnoutBIPS: number | string;
2222
decimals: number | string;
2323
}
24+
25+
export interface DalHealthResult {
26+
healthy: boolean;
27+
}

packages/fasset-bots-core/src/utils/notifier/AgentNotifier.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ export enum AgentNotificationKey {
118118
// unexpected errors - typically external service or network errors
119119
GENERIC_INFO = "INFO",
120120
UNEXPECTED_ERROR = "UNEXPECTED ERROR",
121+
SERVICE_UNHEALTHY = "SERVICE UNHEALTHY",
121122
}
122123

123124
export const agentNotifierThrottlingTimes: NotifierThrottlingConfigs = {
@@ -142,6 +143,10 @@ export class AgentNotifier extends BaseNotifier<AgentNotificationKey> {
142143
await this.danger(AgentNotificationKey.UNEXPECTED_ERROR, fullMessage);
143144
}
144145

146+
async serviceUnhealthy(service: string, reason: string) {
147+
await this.danger(AgentNotificationKey.SERVICE_UNHEALTHY, `Service ${service} is unhealthy: "${reason}".`)
148+
}
149+
145150
async agentCreationFailed(error: string) {
146151
await this.danger(AgentNotificationKey.AGENT_CREATED_ERROR, `Failed to create agent: ${error}.`);
147152
}

0 commit comments

Comments
 (0)