Skip to content

Commit f79517d

Browse files
authored
feat: Add retry script (#7053)
1 parent 1a5a1d0 commit f79517d

2 files changed

Lines changed: 278 additions & 1 deletion

File tree

docs/ai-agents/operational-debugging.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ Claude uses optimized, progressive queries to minimize tokens and maximize relev
610610

611611
### Base Agent Query Templates (with noise filtering)
612612

613-
**Relayer Query:**
613+
**Relayer Query (Optimized with Enhanced Noise Filtering):**
614614

615615
```
616616
resource.type="k8s_container"
@@ -623,7 +623,20 @@ labels."k8s-pod/app_kubernetes_io/instance"="omniscient-relayer"
623623
labels."k8s-pod/app_kubernetes_io/name"="hyperlane-agent"
624624
-jsonPayload.fields.message="No message found in DB for leaf index"
625625
-jsonPayload.fields.message="Found log(s) in index range"
626+
-jsonPayload.fields.message="Dispatching get_public_key"
627+
NOT "Instantiated AWS signer"
628+
-jsonPayload.fields.message="Ingesting leaf"
629+
-jsonPayload.fields.message="Message already marked as processed in DB"
630+
-jsonPayload.fields.message="Message destined for self, skipping"
631+
-jsonPayload.fields.message="Message has already been delivered, marking as submitted."
632+
-jsonPayload.fields.message="Processor working on message"
633+
-jsonPayload.fields.message="Message destined for unknown domain, skipping"
626634
-jsonPayload.fields.message="Popped OpQueue operations"
635+
-jsonPayload.fields.message="Validator returned latest index"
636+
-jsonPayload.fields.message="Found signed checkpoint"
637+
-jsonPayload.fields.return="Ok(None)"
638+
-jsonPayload.fields.message="Fast forwarded current sequence"
639+
-jsonPayload.fields.message="Cursor can't make progress, sleeping"
627640
-jsonPayload.fields.message="fallback_request"
628641
```
629642

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
import { spawn } from 'child_process';
2+
import yargs from 'yargs';
3+
4+
import { Contexts } from '../../config/contexts.js';
5+
import { DeployEnvironment } from '../../src/config/environment.js';
6+
import {
7+
assertCorrectKubeContext,
8+
getArgs,
9+
withContext,
10+
} from '../agent-utils.js';
11+
import { getConfigsBasedOnArgs } from '../core-utils.js';
12+
13+
interface RetryOptions {
14+
environment: DeployEnvironment;
15+
messageId?: string;
16+
originDomain?: number;
17+
destinationDomain?: number;
18+
sender?: string;
19+
recipient?: string;
20+
context?: Contexts;
21+
namespace?: string;
22+
port?: number;
23+
}
24+
25+
async function retryMessage(options: RetryOptions) {
26+
const {
27+
environment,
28+
messageId,
29+
originDomain,
30+
destinationDomain,
31+
sender,
32+
recipient,
33+
context = Contexts.Hyperlane,
34+
namespace,
35+
port = 9090,
36+
} = options;
37+
38+
console.log(`🔄 Starting message retry for environment: ${environment}`);
39+
40+
// Get environment config and ensure correct kube context
41+
const { envConfig } = await getConfigsBasedOnArgs({ environment, context });
42+
await assertCorrectKubeContext(envConfig);
43+
44+
// Determine namespace - use provided or derive from environment
45+
const ns = namespace || environment;
46+
47+
// Construct pod name based on context following actual helm release naming:
48+
// Helm release: omniscient-relayer (default) or omniscient-relayer-{context}
49+
// Pod name: {helm-release}-hyperlane-agent-relayer-0
50+
const helmRelease =
51+
context === Contexts.Hyperlane
52+
? 'omniscient-relayer'
53+
: `omniscient-relayer-${context}`;
54+
const podName = `${helmRelease}-hyperlane-agent-relayer-0`;
55+
56+
console.log(`📡 Setting up port-forward to ${podName} in namespace ${ns}...`);
57+
58+
// Start port-forward process
59+
const portForward = spawn('kubectl', [
60+
'port-forward',
61+
podName,
62+
`${port}:9090`,
63+
'-n',
64+
ns,
65+
]);
66+
67+
let isConnected = false;
68+
let retries = 0;
69+
const maxRetries = 30; // 30 seconds max wait
70+
71+
// Wait for port-forward to be ready
72+
await new Promise<void>((resolve, reject) => {
73+
const checkConnection = async () => {
74+
try {
75+
await fetch(`http://localhost:${port}/health`, {
76+
signal: AbortSignal.timeout(1000),
77+
});
78+
isConnected = true;
79+
console.log(`✅ Port-forward established on port ${port}`);
80+
resolve();
81+
} catch (error) {
82+
retries++;
83+
if (retries >= maxRetries) {
84+
reject(
85+
new Error('Port-forward failed to establish after 30 seconds'),
86+
);
87+
return;
88+
}
89+
// Wait 1 second and try again
90+
setTimeout(checkConnection, 1000);
91+
}
92+
};
93+
94+
// Start checking after 2 seconds to let kubectl initialize
95+
setTimeout(checkConnection, 2000);
96+
});
97+
98+
try {
99+
// Prepare request body based on retry method - API expects array of rules
100+
let requestBody: any[] = [];
101+
102+
if (messageId) {
103+
requestBody = [{ messageid: messageId }];
104+
console.log(`🎯 Triggering retry for specific message: ${messageId}`);
105+
} else if (originDomain || destinationDomain || sender || recipient) {
106+
// Use correct API field names from relayer implementation
107+
requestBody = [
108+
{
109+
...(originDomain && { origindomain: originDomain }),
110+
...(destinationDomain && { destinationdomain: destinationDomain }),
111+
...(sender && { senderaddress: sender }),
112+
...(recipient && { recipientaddress: recipient }),
113+
},
114+
];
115+
116+
console.log(`🎯 Triggering retry with filters:`, requestBody[0]);
117+
} else {
118+
requestBody = [];
119+
console.log(`🔄 Triggering retry for all eligible messages`);
120+
}
121+
122+
console.log(`📤 Sending retry request to relayer API...`);
123+
console.log(`🔍 Request body:`, JSON.stringify(requestBody, null, 2));
124+
125+
// Make retry request
126+
const response = await fetch(`http://localhost:${port}/message_retry`, {
127+
method: 'POST',
128+
headers: {
129+
'Content-Type': 'application/json',
130+
},
131+
body: JSON.stringify(requestBody),
132+
});
133+
134+
if (!response.ok) {
135+
throw new Error(`HTTP error! status: ${response.status}`);
136+
}
137+
138+
const result = await response.json();
139+
console.log(`✅ Retry request successful:`, result);
140+
console.log(
141+
`📊 Messages evaluated: ${result.evaluated}, matched: ${result.matched}`,
142+
);
143+
144+
if (result.matched > 0) {
145+
console.log(
146+
`🚀 ${result.matched} message(s) moved to front of processing queue`,
147+
);
148+
} else {
149+
console.log(`ℹ️ No messages matched the retry criteria`);
150+
}
151+
} catch (error) {
152+
console.error('❌ Error making retry request:', error);
153+
throw error;
154+
} finally {
155+
// Clean up port-forward
156+
console.log('🧹 Cleaning up port-forward...');
157+
portForward.kill('SIGTERM');
158+
159+
// Give it a moment to clean up gracefully
160+
await new Promise((resolve) => setTimeout(resolve, 1000));
161+
}
162+
}
163+
164+
async function main() {
165+
const argv = await withContext(getArgs())
166+
.option('message-id', {
167+
alias: 'm',
168+
describe: 'Specific message ID to retry',
169+
type: 'string',
170+
})
171+
.option('origin-domain', {
172+
alias: 'o',
173+
describe: 'Origin domain ID to filter messages',
174+
type: 'number',
175+
})
176+
.option('destination-domain', {
177+
alias: 'd',
178+
describe: 'Destination domain ID to filter messages',
179+
type: 'number',
180+
})
181+
.option('sender', {
182+
alias: 's',
183+
describe: 'Sender address to filter messages',
184+
type: 'string',
185+
})
186+
.option('recipient', {
187+
alias: 'r',
188+
describe: 'Recipient address to filter messages',
189+
type: 'string',
190+
})
191+
.option('namespace', {
192+
alias: 'n',
193+
describe: 'Kubernetes namespace (auto-detected if not provided)',
194+
type: 'string',
195+
})
196+
.option('port', {
197+
alias: 'p',
198+
describe: 'Local port for port-forward',
199+
type: 'number',
200+
default: 9090,
201+
})
202+
.conflicts('message-id', [
203+
'origin-domain',
204+
'destination-domain',
205+
'sender',
206+
'recipient',
207+
])
208+
.help()
209+
.alias('h', 'help')
210+
.example([
211+
['$0 -e mainnet3', 'Retry all eligible messages in mainnet3'],
212+
['$0 -e mainnet3 -m 0xe202b08d...', 'Retry specific message by ID'],
213+
[
214+
'$0 -e mainnet3 -o 56 -d 1',
215+
'Retry messages from BSC (56) to Ethereum (1)',
216+
],
217+
[
218+
'$0 -e mainnet3 -s 0x1234... -r 0x5678...',
219+
'Retry messages from sender to recipient',
220+
],
221+
[
222+
'$0 -e testnet4 -x neutron',
223+
'Retry messages in testnet4 with neutron context',
224+
],
225+
]).argv;
226+
227+
// Validate that at least one filter method is provided when using whitelist
228+
if (
229+
!argv.messageId &&
230+
(argv.originDomain ||
231+
argv.destinationDomain ||
232+
argv.sender ||
233+
argv.recipient)
234+
) {
235+
if (
236+
!argv.originDomain &&
237+
!argv.destinationDomain &&
238+
!argv.sender &&
239+
!argv.recipient
240+
) {
241+
console.error(
242+
'❌ When using whitelist filtering, at least one filter must be specified',
243+
);
244+
process.exit(1);
245+
}
246+
}
247+
248+
try {
249+
await retryMessage(argv);
250+
} catch (error) {
251+
console.error('💥 Message retry failed:', error);
252+
process.exit(1);
253+
}
254+
}
255+
256+
main()
257+
.then(() => {
258+
console.log('🎉 Message retry completed successfully');
259+
process.exit(0);
260+
})
261+
.catch((err) => {
262+
console.error('💥 Error in message retry:', err);
263+
process.exit(1);
264+
});

0 commit comments

Comments
 (0)