Skip to content

Commit 7016510

Browse files
committed
don't track failures for benign application errors
1 parent 89418df commit 7016510

4 files changed

Lines changed: 20 additions & 6 deletions

File tree

core/src/telemetry/metrics.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use temporal_sdk_core_api::telemetry::metrics::{
1313
NoOpCoreMeter,
1414
};
1515
use temporal_sdk_core_protos::temporal::api::enums::v1::WorkflowTaskFailedCause;
16+
use temporal_sdk_core_protos::temporal::api::failure::v1::Failure;
1617

1718
/// Used to track context associated with metrics, and record/update them
1819
///
@@ -592,6 +593,19 @@ pub(super) const TASK_SLOTS_AVAILABLE_NAME: &str = "worker_task_slots_available"
592593
pub(super) const TASK_SLOTS_USED_NAME: &str = "worker_task_slots_used";
593594
pub(super) const STICKY_CACHE_SIZE_NAME: &str = "sticky_cache_size";
594595

596+
/// Calls the provided metric function only if the failure is not a benign application failure.
597+
pub(crate) fn record_failure_metric(
598+
failure: &Option<Failure>,
599+
metric_fn: impl FnOnce(),
600+
) {
601+
let is_benign = failure
602+
.as_ref()
603+
.map_or(false, |f| f.is_benign_application_failure());
604+
if !is_benign {
605+
metric_fn();
606+
}
607+
}
608+
595609
/// Helps define buckets once in terms of millis, but also generates a seconds version
596610
macro_rules! define_latency_buckets {
597611
($(($metric_name:pat, $name:ident, $sec_name:ident, [$($bucket:expr),*])),*) => {

core/src/worker/activities.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
UsedMeteredSemPermit,
1414
},
1515
pollers::{BoxedActPoller, PermittedTqResp, TrackedPermittedTqResp, new_activity_task_poller},
16-
telemetry::metrics::{MetricsContext, activity_type, eager, workflow_type},
16+
telemetry::metrics::{MetricsContext, activity_type, eager, workflow_type, record_failure_metric},
1717
worker::{
1818
activities::activity_heartbeat_manager::ActivityHeartbeatError, client::WorkerClient,
1919
},
@@ -349,7 +349,7 @@ impl WorkerActivityTasks {
349349
.err()
350350
}
351351
aer::Status::Failed(ar::Failure { failure }) => {
352-
act_metrics.act_execution_failed();
352+
record_failure_metric(&failure, || act_metrics.act_execution_failed());
353353
client
354354
.fail_activity_task(task_token.clone(), failure)
355355
.await

core/src/worker/activities/local_activities.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::{
33
abstractions::{MeteredPermitDealer, OwnedMeteredSemPermit, UsedMeteredSemPermit, dbg_panic},
44
protosext::ValidScheduleLA,
55
retry_logic::RetryPolicyExt,
6-
telemetry::metrics::{activity_type, workflow_type},
6+
telemetry::metrics::{activity_type, workflow_type, record_failure_metric},
77
worker::workflow::HeartbeatTimeoutMsg,
88
};
99
use futures_util::{
@@ -583,7 +583,7 @@ impl LocalActivityManager {
583583
la_metrics.la_exec_latency(runtime);
584584
let outcome = match &status {
585585
LocalActivityExecutionResult::Failed(fail) => {
586-
la_metrics.la_execution_failed();
586+
record_failure_metric(&fail.failure, || la_metrics.la_execution_failed());
587587
Outcome::FailurePath {
588588
backoff: calc_backoff!(fail),
589589
}

core/src/worker/workflow/managed_run.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,8 +1125,8 @@ impl ManagedRun {
11251125
Some(CmdAttribs::CompleteWorkflowExecutionCommandAttributes(_)) => {
11261126
self.metrics.wf_completed();
11271127
}
1128-
Some(CmdAttribs::FailWorkflowExecutionCommandAttributes(_)) => {
1129-
self.metrics.wf_failed();
1128+
Some(CmdAttribs::FailWorkflowExecutionCommandAttributes(attrs)) => {
1129+
metrics::record_failure_metric(&attrs.failure, || self.metrics.wf_failed());
11301130
}
11311131
Some(CmdAttribs::ContinueAsNewWorkflowExecutionCommandAttributes(_)) => {
11321132
self.metrics.wf_continued_as_new();

0 commit comments

Comments
 (0)