Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions core/src/telemetry/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use temporal_sdk_core_api::telemetry::metrics::{
NoOpCoreMeter,
};
use temporal_sdk_core_protos::temporal::api::enums::v1::WorkflowTaskFailedCause;
use temporal_sdk_core_protos::temporal::api::failure::v1::Failure;

/// Used to track context associated with metrics, and record/update them
///
Expand Down Expand Up @@ -592,6 +593,19 @@ pub(super) const TASK_SLOTS_AVAILABLE_NAME: &str = "worker_task_slots_available"
pub(super) const TASK_SLOTS_USED_NAME: &str = "worker_task_slots_used";
pub(super) const STICKY_CACHE_SIZE_NAME: &str = "sticky_cache_size";

/// Calls the provided metric function only if the failure is not a benign application failure.
pub(crate) fn record_failure_metric(
failure: &Option<Failure>,
metric_fn: impl FnOnce(),
) {
let is_benign = failure
.as_ref()
.map_or(false, |f| f.is_benign_application_failure());
if !is_benign {
metric_fn();
}
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we really need this function - the closure passing is a bit performative. I think it'd be easier to call this at the callsite, or, if you want to save a few lines, you can make a trait that handles the option stuff an implement it for Option<Failure> and that way you can call should_record_metric() directly on the Option<Failure>

/// Helps define buckets once in terms of millis, but also generates a seconds version
macro_rules! define_latency_buckets {
($(($metric_name:pat, $name:ident, $sec_name:ident, [$($bucket:expr),*])),*) => {
Expand Down
4 changes: 2 additions & 2 deletions core/src/worker/activities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
UsedMeteredSemPermit,
},
pollers::{BoxedActPoller, PermittedTqResp, TrackedPermittedTqResp, new_activity_task_poller},
telemetry::metrics::{MetricsContext, activity_type, eager, workflow_type},
telemetry::metrics::{MetricsContext, activity_type, eager, workflow_type, record_failure_metric},
worker::{
activities::activity_heartbeat_manager::ActivityHeartbeatError, client::WorkerClient,
},
Expand Down Expand Up @@ -349,7 +349,7 @@ impl WorkerActivityTasks {
.err()
}
aer::Status::Failed(ar::Failure { failure }) => {
act_metrics.act_execution_failed();
record_failure_metric(&failure, || act_metrics.act_execution_failed());
client
.fail_activity_task(task_token.clone(), failure)
.await
Expand Down
4 changes: 2 additions & 2 deletions core/src/worker/activities/local_activities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
abstractions::{MeteredPermitDealer, OwnedMeteredSemPermit, UsedMeteredSemPermit, dbg_panic},
protosext::ValidScheduleLA,
retry_logic::RetryPolicyExt,
telemetry::metrics::{activity_type, workflow_type},
telemetry::metrics::{activity_type, workflow_type, record_failure_metric},
worker::workflow::HeartbeatTimeoutMsg,
};
use futures_util::{
Expand Down Expand Up @@ -583,7 +583,7 @@ impl LocalActivityManager {
la_metrics.la_exec_latency(runtime);
let outcome = match &status {
LocalActivityExecutionResult::Failed(fail) => {
la_metrics.la_execution_failed();
record_failure_metric(&fail.failure, || la_metrics.la_execution_failed());
Outcome::FailurePath {
backoff: calc_backoff!(fail),
}
Expand Down
4 changes: 2 additions & 2 deletions core/src/worker/workflow/managed_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1125,8 +1125,8 @@ impl ManagedRun {
Some(CmdAttribs::CompleteWorkflowExecutionCommandAttributes(_)) => {
self.metrics.wf_completed();
}
Some(CmdAttribs::FailWorkflowExecutionCommandAttributes(_)) => {
self.metrics.wf_failed();
Some(CmdAttribs::FailWorkflowExecutionCommandAttributes(attrs)) => {
metrics::record_failure_metric(&attrs.failure, || self.metrics.wf_failed());
}
Some(CmdAttribs::ContinueAsNewWorkflowExecutionCommandAttributes(_)) => {
self.metrics.wf_continued_as_new();
Expand Down
Loading
Loading