Skip to content

Commit 6a72af3

Browse files
authored
chore: temporal slack alerting [IN-904] (#1574)
Signed-off-by: Joana Maia <jmaia@contractor.linuxfoundation.org>
1 parent c7a519f commit 6a72af3

File tree

4 files changed

+71
-2
lines changed

4 files changed

+71
-2
lines changed

submodules/crowd.dev

workers/temporal/base/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"@crowd/temporal": "workspace:*",
1717
"@crowd/telemetry": "workspace:*",
1818
"@crowd/logging": "workspace:*",
19+
"@crowd/slack": "workspace:*",
1920
"@temporalio/worker": "~1.11.1",
2021
"@temporalio/activity": "~1.11.1",
2122
"@temporalio/workflow": "~1.11.1"
Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
// Copyright (c) 2025 The Linux Foundation and each contributor.
22
// SPDX-License-Identifier: MIT
3+
import { getServiceChildLogger } from '@crowd/logging'
4+
import { SlackChannel, SlackPersona, sendSlackNotificationAsync } from '@crowd/slack'
35
import telemetry from '@crowd/telemetry'
46

7+
const log = getServiceChildLogger('activity-interceptor')
8+
59
async function telemetryDistribution(
610
name: string,
711
value: number,
@@ -10,4 +14,15 @@ async function telemetryDistribution(
1014
telemetry.distribution(name, value, tags)
1115
}
1216

13-
export { telemetryDistribution }
17+
async function slackNotify(message: string, persona: SlackPersona) {
18+
// Accept string to allow workflow code to pass string literals without importing enum
19+
await sendSlackNotificationAsync(
20+
SlackChannel.ALERTS,
21+
persona as SlackPersona,
22+
'Temporal Alert',
23+
message,
24+
)
25+
log.info('Slack notification sent from Temporal activity')
26+
}
27+
28+
export { telemetryDistribution, slackNotify }

workers/temporal/base/src/interceptors.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) 2025 The Linux Foundation and each contributor.
22
// SPDX-License-Identifier: MIT
33
import {
4+
ActivityFailure,
5+
ApplicationFailure,
46
Next,
57
WorkflowExecuteInput,
68
WorkflowInboundCallsInterceptor,
@@ -10,10 +12,41 @@ import {
1012

1113
import * as activities from './activities'
1214

15+
// Use string literal instead of enum to avoid bundling @slack/webhook in workflow code
16+
// The activity will handle the type conversion
17+
const SLACK_PERSONA_ERROR_REPORTER = 'ERROR_REPORTER' as const
18+
1319
const activity = proxyActivities<typeof activities>({
1420
startToCloseTimeout: '10 seconds',
1521
})
1622

23+
/**
24+
* Extract detailed error information when an activity reaches retry limit
25+
*/
26+
function getActivityRetryLimitDetails(err: ActivityFailure): string {
27+
let details = `*Activity:* \`${err.activityType}\`\n`
28+
details += `*Activity ID:* \`${err.activityId || 'N/A'}\`\n`
29+
details += `*Retry State:* ${err.retryState}\n\n`
30+
31+
// Get the root cause error message and type
32+
if (err.cause) {
33+
details += `*Error:* ${err.cause.message}\n`
34+
35+
// If it's an ApplicationFailure, get the type (e.g., AxiosError)
36+
if (err.cause instanceof ApplicationFailure && err.cause.type) {
37+
details += `*Error Type:* ${err.cause.type}\n`
38+
}
39+
40+
// Add stack trace (first 10 lines for context)
41+
if (err.cause.stack) {
42+
const stackLines = err.cause.stack.split('\n').slice(0, 10)
43+
details += `\n*Stack Trace (first 10 lines):*\n\`\`\`\n${stackLines.join('\n')}\n\`\`\``
44+
}
45+
}
46+
47+
return details
48+
}
49+
1750
export class WorkflowMonitoringInterceptor implements WorkflowInboundCallsInterceptor {
1851
async execute(
1952
input: WorkflowExecuteInput,
@@ -33,6 +66,26 @@ export class WorkflowMonitoringInterceptor implements WorkflowInboundCallsInterc
3366
try {
3467
const result = await next(input)
3568
return result
69+
} catch (err) {
70+
// Type guard to ensure err is an Error object
71+
const error = err instanceof Error ? err : new Error(String(err))
72+
73+
if (error.message !== 'Workflow continued as new') {
74+
// Only send detailed notification if it's an activity that reached retry limit
75+
if (err instanceof ActivityFailure && err.retryState === 'MAXIMUM_ATTEMPTS_REACHED') {
76+
const errorDetails = getActivityRetryLimitDetails(err)
77+
const message = `*Workflow Failed: Activity Retry Limit Reached*\n\n*Workflow:* \`${info.workflowType}\`\n*Workflow ID:* \`${info.workflowId}\`\n*Run ID:* \`${info.runId}\`\n\n${errorDetails}`
78+
79+
await activity.slackNotify(message, SLACK_PERSONA_ERROR_REPORTER)
80+
} else {
81+
// For other errors, send a simpler notification
82+
const message = `*Workflow Failed*\n\n*Workflow:* \`${info.workflowType}\`\n*Workflow ID:* \`${info.workflowId}\`\n*Run ID:* \`${info.runId}\`\n*Error:* ${error.message}`
83+
84+
await activity.slackNotify(message, SLACK_PERSONA_ERROR_REPORTER)
85+
}
86+
}
87+
88+
throw err
3689
} finally {
3790
const end = new Date()
3891
const duration = end.getTime() - start.getTime()

0 commit comments

Comments
 (0)