From 7e67eb5a6fde7cceb8ffabe1d3138a1f6df53cbb Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Wed, 22 Apr 2026 12:59:42 -0600 Subject: [PATCH 01/15] bring rust sdk metrics into alignment with standardization plan --- examples/metrics_example.rs | 26 +- harness/src/main.rs | 2 +- src/client/conductor_client.rs | 35 +- src/client/workflow_client.rs | 66 ++- src/events/dispatcher.rs | 66 ++- src/events/mod.rs | 8 +- src/events/task_runner_events.rs | 184 +++++++++ src/http/api_client.rs | 349 ++++++---------- src/http/mod.rs | 2 + src/metrics/collector.rs | 680 +++++++++++++++++++++---------- src/metrics/settings.rs | 11 +- src/worker/task_handler.rs | 20 +- src/worker/task_runner.rs | 73 +++- 13 files changed, 1033 insertions(+), 489 deletions(-) diff --git a/examples/metrics_example.rs b/examples/metrics_example.rs index 4759f2b..2f98f61 100644 --- a/examples/metrics_example.rs +++ b/examples/metrics_example.rs @@ -103,19 +103,25 @@ async fn main() -> Result<()> { println!("{}", "=".repeat(70)); println!("\nMetrics endpoint: http://localhost:9090/metrics"); println!("Health endpoint: http://localhost:9090/health"); - println!("\nAvailable Metrics:"); + println!("\nAvailable Metrics (canonical Conductor SDK catalog):"); println!(" Counter metrics:"); - println!(" - conductor_task_poll_total{{task_type}}"); - println!(" - conductor_task_poll_error_total{{task_type, error_type}}"); - println!(" - conductor_task_execute_error_total{{task_type, error_type}}"); - println!(" - conductor_task_update_error_total{{task_type}}"); - println!(" - conductor_task_paused_total{{task_type}}"); + println!(" - task_poll_total{{taskType}}"); + println!(" - task_poll_error_total{{taskType, exception}}"); + println!(" - task_execution_started_total{{taskType}}"); + println!(" - task_execute_error_total{{taskType, exception}}"); + println!(" - task_update_error_total{{taskType, exception}}"); + println!(" - task_paused_total{{taskType}}"); + println!(" - thread_uncaught_exceptions_total{{taskType, exception}}"); + println!(" - workflow_start_error_total{{workflowType, exception}}"); println!("\n Histogram metrics:"); - println!(" - conductor_task_poll_time_seconds{{task_type, status}}"); - println!(" - conductor_task_execute_time_seconds{{task_type, status}}"); + println!(" - task_poll_time_seconds{{taskType, status}}"); + println!(" - task_execute_time_seconds{{taskType, status}}"); + println!(" - task_update_time_seconds{{taskType, status}}"); + println!(" - http_api_client_request_seconds{{method, uri, status}}"); println!("\n Gauge metrics:"); - println!(" - conductor_task_result_size_bytes{{task_type}}"); - println!(" - conductor_active_workers{{task_type}}"); + println!(" - task_result_size_bytes{{taskType}}"); + println!(" - workflow_input_size_bytes{{workflowType}}"); + println!(" - active_workers{{taskType}}"); println!("\nWorkers:"); println!(" - quick_task: Fast execution (~50ms)"); println!(" - variable_task: Variable execution time"); diff --git a/harness/src/main.rs b/harness/src/main.rs index 71d62c1..a514337 100644 --- a/harness/src/main.rs +++ b/harness/src/main.rs @@ -167,7 +167,7 @@ async fn main() { } let governor = Arc::new(WorkflowGovernor::new( - client.workflow_client(), + handler.conductor_client().workflow_client(), WORKFLOW_NAME.to_string(), workflows_per_sec, )); diff --git a/src/client/conductor_client.rs b/src/client/conductor_client.rs index fa2184a..33f3089 100644 --- a/src/client/conductor_client.rs +++ b/src/client/conductor_client.rs @@ -3,6 +3,7 @@ use crate::configuration::Configuration; use crate::error::Result; +use crate::events::EventDispatcher; use crate::http::ApiClient; use super::{ @@ -17,18 +18,46 @@ use super::{ #[derive(Clone)] pub struct ConductorClient { api: ApiClient, + /// Shared event dispatcher used by service clients that emit events + /// (currently [`WorkflowClient`]). Defaults to an empty dispatcher; + /// replace with [`Self::with_event_dispatcher`] to wire up listeners + /// such as the metrics collector. + events: EventDispatcher, } impl ConductorClient { /// Create a new Conductor client with the given configuration pub fn new(config: Configuration) -> Result { let api = ApiClient::new(config)?; - Ok(Self { api }) + Ok(Self { + api, + events: EventDispatcher::default(), + }) } /// Create from an existing API client pub fn from_api_client(api: ApiClient) -> Self { - Self { api } + Self { + api, + events: EventDispatcher::default(), + } + } + + /// Share an [`EventDispatcher`] with this client so that service clients + /// (such as [`WorkflowClient`]) publish events to it. + /// + /// Typically used to route workflow-lifecycle events through the same + /// dispatcher as [`TaskHandler`](crate::worker::TaskHandler), allowing a + /// single `MetricsCollector` to observe both task- and workflow-level + /// metrics. + pub fn with_event_dispatcher(mut self, events: EventDispatcher) -> Self { + self.events = events; + self + } + + /// Access the shared event dispatcher. + pub fn event_dispatcher(&self) -> &EventDispatcher { + &self.events } /// Get the task client for polling and updating tasks @@ -43,7 +72,7 @@ impl ConductorClient { /// Get the workflow client for workflow operations pub fn workflow_client(&self) -> WorkflowClient { - WorkflowClient::new(self.api.clone()) + WorkflowClient::new_with_events(self.api.clone(), self.events.clone()) } /// Alias for workflow_client() - matches Python SDK naming diff --git a/src/client/workflow_client.rs b/src/client/workflow_client.rs index e8b06a8..758f708 100644 --- a/src/client/workflow_client.rs +++ b/src/client/workflow_client.rs @@ -6,6 +6,7 @@ use std::time::Duration; use tracing::{debug, info}; use crate::error::Result; +use crate::events::{exception_label, EventDispatcher, WorkflowStartFailure, WorkflowStarted}; use crate::http::ApiClient; use crate::models::{StartWorkflowRequest, Workflow, WorkflowDef}; @@ -13,12 +14,30 @@ use crate::models::{StartWorkflowRequest, Workflow, WorkflowDef}; #[derive(Clone)] pub struct WorkflowClient { api: ApiClient, + /// Event dispatcher used to publish `WorkflowStarted` / + /// `WorkflowStartFailure`. Defaults to an empty dispatcher (no-op); + /// construct via [`WorkflowClient::new_with_events`] to hook metrics in. + events: EventDispatcher, } impl WorkflowClient { - /// Create a new workflow client + /// Create a new workflow client without an event dispatcher. + /// + /// The `WorkflowStarted` / `WorkflowStartFailure` events will still be + /// published, but no listeners will see them. Use + /// [`WorkflowClient::new_with_events`] to wire a shared dispatcher (e.g. + /// one owned by [`TaskHandler`](crate::worker::TaskHandler)) so the + /// `MetricsCollector` can observe workflow-start metrics. pub fn new(api: ApiClient) -> Self { - Self { api } + Self { + api, + events: EventDispatcher::default(), + } + } + + /// Create a new workflow client wired to an existing [`EventDispatcher`]. + pub fn new_with_events(api: ApiClient, events: EventDispatcher) -> Self { + Self { api, events } } /// Start a workflow asynchronously @@ -28,15 +47,40 @@ impl WorkflowClient { "Starting workflow" ); - let workflow_id: String = self.api.post_text("/workflow", request).await?; - - info!( - workflow_name = %request.name, - workflow_id = %workflow_id, - "Workflow started" - ); - - Ok(workflow_id) + // Compute input byte size up-front so it is available for both the + // success-path gauge and for the failure-path tracing. Uses the same + // JSON serialization that the transport will perform, so the reported + // bytes match what actually leaves this process. + let input_size_bytes = serde_json::to_vec(&request.input) + .map(|v| v.len()) + .unwrap_or(0); + + match self.api.post_text::("/workflow", request).await { + Ok(workflow_id) => { + info!( + workflow_name = %request.name, + workflow_id = %workflow_id, + "Workflow started" + ); + + self.events.publish_workflow_started(&WorkflowStarted::new( + &request.name, + request.version.unwrap_or(1), + input_size_bytes, + )); + + Ok(workflow_id) + } + Err(e) => { + let exception = exception_label(&e); + self.events + .publish_workflow_start_failure(&WorkflowStartFailure::new( + &request.name, + exception, + )); + Err(e) + } + } } /// Execute a workflow synchronously and wait for completion diff --git a/src/events/dispatcher.rs b/src/events/dispatcher.rs index d960afa..9aa3c6f 100644 --- a/src/events/dispatcher.rs +++ b/src/events/dispatcher.rs @@ -6,8 +6,9 @@ use std::sync::Arc; use tracing::error; use super::{ - PollCompleted, PollFailure, PollStarted, TaskExecutionCompleted, TaskExecutionFailure, - TaskExecutionStarted, TaskRunnerEventsListener, TaskUpdateFailure, + PollCompleted, PollFailure, PollSkippedPaused, PollStarted, TaskExecutionCompleted, + TaskExecutionFailure, TaskExecutionStarted, TaskRunnerEventsListener, TaskUpdateCompleted, + TaskUpdateFailure, ThreadUncaughtException, WorkflowStartFailure, WorkflowStarted, }; /// Async event dispatcher for task runner events @@ -64,7 +65,6 @@ impl EventDispatcher { /// Publish a poll started event pub fn publish_poll_started(&self, event: &PollStarted) { - // Clone listeners and release lock before calling listeners let listeners = self.get_listeners(); for listener in listeners { if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { @@ -99,6 +99,18 @@ impl EventDispatcher { } } + /// Publish a poll-skipped-due-to-pause event + pub fn publish_poll_skipped_paused(&self, event: &PollSkippedPaused) { + let listeners = self.get_listeners(); + for listener in listeners { + if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + listener.on_poll_skipped_paused(event); + })) { + error!("Listener panicked on poll_skipped_paused: {:?}", e); + } + } + } + /// Publish a task execution started event pub fn publish_task_execution_started(&self, event: &TaskExecutionStarted) { let listeners = self.get_listeners(); @@ -135,6 +147,18 @@ impl EventDispatcher { } } + /// Publish a task update completed event + pub fn publish_task_update_completed(&self, event: &TaskUpdateCompleted) { + let listeners = self.get_listeners(); + for listener in listeners { + if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + listener.on_task_update_completed(event); + })) { + error!("Listener panicked on task_update_completed: {:?}", e); + } + } + } + /// Publish a task update failure event pub fn publish_task_update_failure(&self, event: &TaskUpdateFailure) { let listeners = self.get_listeners(); @@ -146,6 +170,42 @@ impl EventDispatcher { } } } + + /// Publish an uncaught-panic event + pub fn publish_thread_uncaught_exception(&self, event: &ThreadUncaughtException) { + let listeners = self.get_listeners(); + for listener in listeners { + if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + listener.on_thread_uncaught_exception(event); + })) { + error!("Listener panicked on thread_uncaught_exception: {:?}", e); + } + } + } + + /// Publish a workflow started event + pub fn publish_workflow_started(&self, event: &WorkflowStarted) { + let listeners = self.get_listeners(); + for listener in listeners { + if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + listener.on_workflow_started(event); + })) { + error!("Listener panicked on workflow_started: {:?}", e); + } + } + } + + /// Publish a workflow start failure event + pub fn publish_workflow_start_failure(&self, event: &WorkflowStartFailure) { + let listeners = self.get_listeners(); + for listener in listeners { + if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + listener.on_workflow_start_failure(event); + })) { + error!("Listener panicked on workflow_start_failure: {:?}", e); + } + } + } } /// Synchronous event dispatcher (for use in non-async contexts) diff --git a/src/events/mod.rs b/src/events/mod.rs index 2dca0ca..47b77a9 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -2,10 +2,14 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. mod dispatcher; +mod exception; mod task_runner_events; pub use dispatcher::{EventDispatcher, SyncEventDispatcher}; +pub use exception::{exception_label, exception_label_for_panic, type_name_of}; pub use task_runner_events::{ - PollCompleted, PollFailure, PollStarted, TaskExecutionCompleted, TaskExecutionFailure, - TaskExecutionStarted, TaskRunnerEvent, TaskRunnerEventsListener, TaskUpdateFailure, + PollCompleted, PollFailure, PollSkippedPaused, PollStarted, TaskExecutionCompleted, + TaskExecutionFailure, TaskExecutionStarted, TaskRunnerEvent, TaskRunnerEventsListener, + TaskUpdateCompleted, TaskUpdateFailure, ThreadUncaughtException, WorkflowStartFailure, + WorkflowStarted, }; diff --git a/src/events/task_runner_events.rs b/src/events/task_runner_events.rs index 3d79e06..ea81cfc 100644 --- a/src/events/task_runner_events.rs +++ b/src/events/task_runner_events.rs @@ -90,7 +90,10 @@ pub struct PollFailure { pub task_type: String, pub worker_id: String, pub duration: Duration, + /// Human-readable error message (from `Display`). Not a metric label. pub error: String, + /// Canonical exception *type name* for the `exception` metric label. + pub exception: String, pub timestamp: DateTime, } @@ -100,12 +103,14 @@ impl PollFailure { worker_id: impl Into, duration: Duration, error: impl Into, + exception: impl Into, ) -> Self { Self { task_type: task_type.into(), worker_id: worker_id.into(), duration, error: error.into(), + exception: exception.into(), timestamp: Utc::now(), } } @@ -121,6 +126,34 @@ impl TaskRunnerEvent for PollFailure { } } +/// Event published when the runner skips a poll because the worker is paused. +#[derive(Debug, Clone)] +pub struct PollSkippedPaused { + pub task_type: String, + pub worker_id: String, + pub timestamp: DateTime, +} + +impl PollSkippedPaused { + pub fn new(task_type: impl Into, worker_id: impl Into) -> Self { + Self { + task_type: task_type.into(), + worker_id: worker_id.into(), + timestamp: Utc::now(), + } + } +} + +impl TaskRunnerEvent for PollSkippedPaused { + fn task_type(&self) -> &str { + &self.task_type + } + + fn timestamp(&self) -> DateTime { + self.timestamp + } +} + /// Event published when task execution starts #[derive(Debug, Clone)] pub struct TaskExecutionStarted { @@ -209,12 +242,16 @@ pub struct TaskExecutionFailure { pub workflow_instance_id: String, pub worker_id: String, pub duration: Duration, + /// Human-readable error message. Not a metric label. pub error: String, + /// Canonical exception *type name* for the `exception` metric label. + pub exception: String, pub is_retryable: bool, pub timestamp: DateTime, } impl TaskExecutionFailure { + #[allow(clippy::too_many_arguments)] pub fn new( task_type: impl Into, task_id: impl Into, @@ -222,6 +259,7 @@ impl TaskExecutionFailure { worker_id: impl Into, duration: Duration, error: impl Into, + exception: impl Into, is_retryable: bool, ) -> Self { Self { @@ -231,6 +269,7 @@ impl TaskExecutionFailure { worker_id: worker_id.into(), duration, error: error.into(), + exception: exception.into(), is_retryable, timestamp: Utc::now(), } @@ -247,6 +286,49 @@ impl TaskRunnerEvent for TaskExecutionFailure { } } +/// Event published when a `TaskClient::update_task*` call succeeds. +/// +/// Fires once per completed task; used to observe the +/// `task_update_time_seconds{status="SUCCESS"}` histogram. +#[derive(Debug, Clone)] +pub struct TaskUpdateCompleted { + pub task_type: String, + pub task_id: String, + pub workflow_instance_id: String, + pub worker_id: String, + pub duration: Duration, + pub timestamp: DateTime, +} + +impl TaskUpdateCompleted { + pub fn new( + task_type: impl Into, + task_id: impl Into, + workflow_instance_id: impl Into, + worker_id: impl Into, + duration: Duration, + ) -> Self { + Self { + task_type: task_type.into(), + task_id: task_id.into(), + workflow_instance_id: workflow_instance_id.into(), + worker_id: worker_id.into(), + duration, + timestamp: Utc::now(), + } + } +} + +impl TaskRunnerEvent for TaskUpdateCompleted { + fn task_type(&self) -> &str { + &self.task_type + } + + fn timestamp(&self) -> DateTime { + self.timestamp + } +} + /// Event published when task update fails after all retries #[derive(Debug, Clone)] pub struct TaskUpdateFailure { @@ -254,18 +336,26 @@ pub struct TaskUpdateFailure { pub task_id: String, pub workflow_instance_id: String, pub worker_id: String, + /// Wall-clock duration of the (final, failed) update attempt sequence. + pub duration: Duration, + /// Human-readable error message. Not a metric label. pub error: String, + /// Canonical exception *type name* for the `exception` metric label. + pub exception: String, pub retry_count: u32, pub timestamp: DateTime, } impl TaskUpdateFailure { + #[allow(clippy::too_many_arguments)] pub fn new( task_type: impl Into, task_id: impl Into, workflow_instance_id: impl Into, worker_id: impl Into, + duration: Duration, error: impl Into, + exception: impl Into, retry_count: u32, ) -> Self { Self { @@ -273,7 +363,9 @@ impl TaskUpdateFailure { task_id: task_id.into(), workflow_instance_id: workflow_instance_id.into(), worker_id: worker_id.into(), + duration, error: error.into(), + exception: exception.into(), retry_count, timestamp: Utc::now(), } @@ -290,6 +382,83 @@ impl TaskRunnerEvent for TaskUpdateFailure { } } +/// Event published when a spawned worker task terminates with an uncaught +/// panic. Used to populate `thread_uncaught_exceptions_total`. +#[derive(Debug, Clone)] +pub struct ThreadUncaughtException { + pub task_type: String, + /// Canonical exception *type name* for the `exception` metric label. + pub exception: String, + pub timestamp: DateTime, +} + +impl ThreadUncaughtException { + pub fn new(task_type: impl Into, exception: impl Into) -> Self { + Self { + task_type: task_type.into(), + exception: exception.into(), + timestamp: Utc::now(), + } + } +} + +impl TaskRunnerEvent for ThreadUncaughtException { + fn task_type(&self) -> &str { + &self.task_type + } + + fn timestamp(&self) -> DateTime { + self.timestamp + } +} + +/// Event published by [`WorkflowClient::start_workflow`](crate::client::WorkflowClient::start_workflow) +/// after a successful call. Carries the serialized input byte size so the +/// `workflow_input_size_bytes` gauge can be updated. +#[derive(Debug, Clone)] +pub struct WorkflowStarted { + pub workflow_type: String, + pub version: i32, + pub input_size_bytes: usize, + pub timestamp: DateTime, +} + +impl WorkflowStarted { + pub fn new( + workflow_type: impl Into, + version: i32, + input_size_bytes: usize, + ) -> Self { + Self { + workflow_type: workflow_type.into(), + version, + input_size_bytes, + timestamp: Utc::now(), + } + } +} + +/// Event published by [`WorkflowClient::start_workflow`](crate::client::WorkflowClient::start_workflow) +/// when the HTTP call returns an error. Used to populate +/// `workflow_start_error_total{workflowType, exception}`. +#[derive(Debug, Clone)] +pub struct WorkflowStartFailure { + pub workflow_type: String, + /// Canonical exception *type name* for the `exception` metric label. + pub exception: String, + pub timestamp: DateTime, +} + +impl WorkflowStartFailure { + pub fn new(workflow_type: impl Into, exception: impl Into) -> Self { + Self { + workflow_type: workflow_type.into(), + exception: exception.into(), + timestamp: Utc::now(), + } + } +} + /// Listener trait for task runner events /// /// Implement this trait to receive task execution lifecycle events. @@ -305,6 +474,9 @@ pub trait TaskRunnerEventsListener: Send + Sync { /// Called when polling fails fn on_poll_failure(&self, _event: &PollFailure) {} + /// Called when a poll is skipped because the worker is paused + fn on_poll_skipped_paused(&self, _event: &PollSkippedPaused) {} + /// Called when task execution begins fn on_task_execution_started(&self, _event: &TaskExecutionStarted) {} @@ -314,8 +486,20 @@ pub trait TaskRunnerEventsListener: Send + Sync { /// Called when task execution fails fn on_task_execution_failure(&self, _event: &TaskExecutionFailure) {} + /// Called when a task update to the server completes successfully + fn on_task_update_completed(&self, _event: &TaskUpdateCompleted) {} + /// Called when task update fails after all retries fn on_task_update_failure(&self, _event: &TaskUpdateFailure) {} + + /// Called when a spawned worker task terminates via uncaught panic + fn on_thread_uncaught_exception(&self, _event: &ThreadUncaughtException) {} + + /// Called after a workflow is successfully started via `WorkflowClient` + fn on_workflow_started(&self, _event: &WorkflowStarted) {} + + /// Called when a `WorkflowClient::start_workflow` call fails + fn on_workflow_start_failure(&self, _event: &WorkflowStartFailure) {} } #[cfg(test)] diff --git a/src/http/api_client.rs b/src/http/api_client.rs index e454531..5f2556b 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -10,6 +10,7 @@ use tracing::{debug, error, info, warn}; use crate::configuration::Configuration; use crate::error::{ConductorError, Result}; +use crate::http::metrics::{HttpMetricsObserver, NoopHttpMetricsObserver}; /// Token response from authentication endpoint #[derive(Debug, serde::Deserialize)] @@ -55,6 +56,12 @@ pub struct ApiClient { token_refresh_lock: Arc>, /// Cached result of OSS detection (None = not yet probed) is_oss: Arc>>, + /// Shared HTTP metrics observer. Starts as a no-op; can be replaced at + /// runtime via [`ApiClient::set_http_metrics`] so that a + /// `MetricsCollector` built later (e.g. inside `TaskHandler::enable_metrics`) + /// can observe requests made by clients already vended from this + /// `ApiClient` (since clones share the same inner `Arc`). + http_metrics: Arc>>, } impl ApiClient { @@ -80,6 +87,7 @@ impl ApiClient { last_refresh_attempt: Arc::new(RwLock::new(None)), token_refresh_lock: Arc::new(Mutex::new(())), is_oss: Arc::new(RwLock::new(None)), + http_metrics: Arc::new(parking_lot::RwLock::new(NoopHttpMetricsObserver::arc())), }) } @@ -88,6 +96,76 @@ impl ApiClient { &self.base_url } + /// Install an [`HttpMetricsObserver`] that will be invoked after every + /// request completes. Replaces the previously-installed observer. + /// + /// This swap is visible to every clone of this `ApiClient` (they share the + /// same inner `Arc>`), so metrics can be enabled after service + /// clients have already been vended. + pub fn set_http_metrics(&self, observer: Arc) { + *self.http_metrics.write() = observer; + } + + /// Snapshot the current observer. Internal helper so we drop the read + /// lock before the request hot-path actually calls `observe`. + #[inline] + fn http_metrics(&self) -> Arc { + Arc::clone(&self.http_metrics.read()) + } + + /// Unified post-request bookkeeping: tracing log + observer callback. + /// + /// `path` should be the interpolated request path (no query string). + /// `status_str` is the HTTP status code rendered as a string, or `"0"` + /// for pre-response transport errors. + #[inline] + fn record_request( + &self, + method: &str, + path: &str, + status_str: &str, + duration: Duration, + ) { + debug!( + method = method, + url = %format!("{}{}", self.base_url, path), + status = status_str, + duration_ms = %duration.as_millis(), + "API request completed" + ); + self.http_metrics() + .observe(method, path, status_str, duration); + } + + /// Convenience: call [`record_request`](Self::record_request) with a + /// successful response's status code. + #[inline] + fn record_response(&self, method: &str, path: &str, status: StatusCode, duration: Duration) { + self.record_request(method, path, status.as_str(), duration); + } + + /// Send a prepared request, recording the outcome (success *and* transport + /// failures) to the `http_metrics` observer and the `debug!` tracing log. + /// + /// Transport-level failures (no HTTP status produced) are observed with + /// `status = "0"`, matching the convention of the canonical SDK metrics + /// harmonization plan. + async fn send_observed( + &self, + method: &str, + path: &str, + request: reqwest::RequestBuilder, + ) -> Result { + let start = Instant::now(); + let result = request.send().await; + let duration = start.elapsed(); + match &result { + Ok(resp) => self.record_response(method, path, resp.status(), duration), + Err(_) => self.record_request(method, path, "0", duration), + } + result.map_err(ConductorError::Http) + } + /// GET request pub async fn get(&self, path: &str) -> Result { self.request::<(), T>(reqwest::Method::GET, path, None) @@ -101,23 +179,12 @@ impl ApiClient { params: &[(&str, &str)], ) -> Result { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.get(&url); request = self.add_auth_header(request).await?; request = request.query(params); - let response = request.send().await?; - let duration = start.elapsed(); - - debug!( - method = "GET", - url = %url, - status = %response.status(), - duration_ms = %duration.as_millis(), - "API request completed" - ); - + let response = self.send_observed("GET", path, request).await?; self.handle_response(response).await } @@ -134,23 +201,13 @@ impl ApiClient { /// POST request returning raw text pub async fn post_text(&self, path: &str, body: &B) -> Result { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); - + let response = self.send_observed("POST", path, request).await?; let status = response.status(); - debug!( - method = "POST", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); if status.is_success() { Ok(response.text().await?) @@ -178,23 +235,13 @@ impl ApiClient { /// DELETE request with no response body pub async fn delete_no_content(&self, path: &str) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("DELETE", path, request).await?; let status = response.status(); - debug!( - method = "DELETE", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() || status == StatusCode::NO_CONTENT { Ok(()) } else { @@ -209,24 +256,14 @@ impl ApiClient { body: &B, ) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("DELETE", path, request).await?; let status = response.status(); - debug!( - method = "DELETE", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() || status == StatusCode::NO_CONTENT { Ok(()) } else { @@ -237,24 +274,14 @@ impl ApiClient { /// DELETE request with query parameters pub async fn delete_with_params(&self, path: &str, params: &[(&str, &str)]) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; request = request.query(params); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("DELETE", path, request).await?; let status = response.status(); - debug!( - method = "DELETE", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() || status == StatusCode::NO_CONTENT { Ok(()) } else { @@ -269,24 +296,14 @@ impl ApiClient { body: &B, ) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("POST", path, request).await?; let status = response.status(); - debug!( - method = "POST", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -297,45 +314,24 @@ impl ApiClient { /// POST request with no body pub async fn post_no_body(&self, path: &str) -> Result { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = request.send().await?; - let duration = start.elapsed(); - - debug!( - method = "POST", - url = %url, - status = %response.status(), - duration_ms = %duration.as_millis(), - "API request completed" - ); - + let response = self.send_observed("POST", path, request).await?; self.handle_response(response).await } /// POST request with no body and no response pub async fn post_no_body_no_response(&self, path: &str) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("POST", path, request).await?; let status = response.status(); - debug!( - method = "POST", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -346,24 +342,14 @@ impl ApiClient { /// PUT request with no response pub async fn put_no_response(&self, path: &str, body: &B) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("PUT", path, request).await?; let status = response.status(); - debug!( - method = "PUT", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -374,25 +360,15 @@ impl ApiClient { /// PUT request with raw text body pub async fn put_raw(&self, path: &str, body: &str) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("PUT", path, request).await?; let status = response.status(); - debug!( - method = "PUT", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -408,24 +384,13 @@ impl ApiClient { params: &[(&str, &str)], ) -> Result { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.query(params); request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); - - debug!( - method = "POST", - url = %url, - status = %response.status(), - duration_ms = %duration.as_millis(), - "API request completed" - ); - + let response = self.send_observed("POST", path, request).await?; self.handle_response(response).await } @@ -437,7 +402,6 @@ impl ApiClient { params: &[(&str, &str)], ) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; @@ -445,18 +409,9 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("POST", path, request).await?; let status = response.status(); - debug!( - method = "POST", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -472,7 +427,6 @@ impl ApiClient { params: &[(&str, &str)], ) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; @@ -480,18 +434,9 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("PUT", path, request).await?; let status = response.status(); - debug!( - method = "PUT", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -502,23 +447,13 @@ impl ApiClient { /// GET request with no response pub async fn get_no_response(&self, path: &str) -> Result<()> { let url = format!("{}{}", self.base_url, path); - let start = Instant::now(); let mut request = self.client.get(&url); request = self.add_auth_header(request).await?; - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed("GET", path, request).await?; let status = response.status(); - debug!( - method = "GET", - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - if status.is_success() { Ok(()) } else { @@ -534,9 +469,8 @@ impl ApiClient { body: Option<&B>, ) -> Result { let url = format!("{}{}", self.base_url, path); + let method_str = method.as_str().to_string(); - // First attempt - let start = Instant::now(); let mut request = self.client.request(method.clone(), &url); request = self.add_auth_header(request).await?; @@ -544,46 +478,22 @@ impl ApiClient { request = request.json(b); } - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed(&method_str, path, request).await?; let status = response.status(); - debug!( - method = %method, - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - // If 401, try refreshing token and retry once - if self.is_token_expired_error(status) { + if self.is_token_expired_error(status) && self.force_refresh_token().await.is_ok() { debug!(method = %method, url = %url, "Got 401, refreshing token and retrying"); - // Force refresh token - if self.force_refresh_token().await.is_ok() { - // Retry the request - let start = Instant::now(); - let mut request = self.client.request(method.clone(), &url); - request = self.add_auth_header(request).await?; + let mut request = self.client.request(method.clone(), &url); + request = self.add_auth_header(request).await?; - if let Some(b) = body { - request = request.json(b); - } - - let response = request.send().await?; - let duration = start.elapsed(); - - debug!( - method = %method, - url = %url, - status = %response.status(), - duration_ms = %duration.as_millis(), - "API request retry completed" - ); - - return self.handle_response(response).await; + if let Some(b) = body { + request = request.json(b); } + + let response = self.send_observed(&method_str, path, request).await?; + return self.handle_response(response).await; } self.handle_response(response).await @@ -597,50 +507,25 @@ impl ApiClient { body: &B, ) -> Result { let url = format!("{}{}", self.base_url, path); + let method_str = method.as_str().to_string(); - // First attempt - let start = Instant::now(); let mut request = self.client.request(method.clone(), &url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = request.send().await?; - let duration = start.elapsed(); + let response = self.send_observed(&method_str, path, request).await?; let status = response.status(); - debug!( - method = %method, - url = %url, - status = %status, - duration_ms = %duration.as_millis(), - "API request completed" - ); - // If 401, try refreshing token and retry once - if self.is_token_expired_error(status) { + if self.is_token_expired_error(status) && self.force_refresh_token().await.is_ok() { debug!(method = %method, url = %url, "Got 401, refreshing token and retrying"); - // Force refresh token - if self.force_refresh_token().await.is_ok() { - // Retry the request - let start = Instant::now(); - let mut request = self.client.request(method.clone(), &url); - request = self.add_auth_header(request).await?; - request = request.json(body); - - let response = request.send().await?; - let duration = start.elapsed(); - - debug!( - method = %method, - url = %url, - status = %response.status(), - duration_ms = %duration.as_millis(), - "API request retry completed" - ); - - return self.handle_response(response).await; - } + let mut request = self.client.request(method.clone(), &url); + request = self.add_auth_header(request).await?; + request = request.json(body); + + let response = self.send_observed(&method_str, path, request).await?; + return self.handle_response(response).await; } self.handle_response(response).await @@ -839,12 +724,15 @@ impl ApiClient { "keySecret": secret }); - let response = match self.client.post(&url).json(&body).send().await { + let response = match self + .send_observed("POST", "/token", self.client.post(&url).json(&body)) + .await + { Ok(resp) => resp, Err(e) => { *self.auth_failures.write().await += 1; error!(error = %e, "Network error during token refresh"); - return Err(e.into()); + return Err(e); } }; @@ -927,7 +815,10 @@ impl ApiClient { // Probe /token let url = format!("{}/token", self.base_url); let body = serde_json::json!({"keyId": "probe", "keySecret": "probe"}); - let is_oss = match self.client.post(&url).json(&body).send().await { + let is_oss = match self + .send_observed("POST", "/token", self.client.post(&url).json(&body)) + .await + { Ok(resp) => resp.status() == StatusCode::NOT_FOUND, Err(_) => false, }; diff --git a/src/http/mod.rs b/src/http/mod.rs index 02d6756..f37d512 100644 --- a/src/http/mod.rs +++ b/src/http/mod.rs @@ -2,5 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. mod api_client; +mod metrics; pub use api_client::ApiClient; +pub use metrics::{HttpMetricsObserver, NoopHttpMetricsObserver}; diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index 20190a7..7fc75d7 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -1,204 +1,298 @@ // Copyright {{.Year}} Conductor OSS // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. +//! Prometheus implementation of the canonical Conductor SDK metric catalog. +//! +//! Metric names, label names, label values, and types here are intentionally +//! identical to the Java, Go, and Python SDKs. See `sdk-metrics-harmonization.md` +//! in the `longrunning-wfstest` repo for the source-of-truth catalog. + use parking_lot::RwLock; use prometheus::{CounterVec, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry}; use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; use tracing::{error, info}; use crate::events::{ - PollCompleted, PollFailure, PollStarted, TaskExecutionCompleted, TaskExecutionFailure, - TaskExecutionStarted, TaskRunnerEventsListener, TaskUpdateFailure, + PollCompleted, PollFailure, PollSkippedPaused, PollStarted, TaskExecutionCompleted, + TaskExecutionFailure, TaskExecutionStarted, TaskRunnerEventsListener, TaskUpdateCompleted, + TaskUpdateFailure, ThreadUncaughtException, WorkflowStartFailure, WorkflowStarted, }; +use crate::http::HttpMetricsObserver; use super::MetricsSettings; -/// Prometheus metrics collector implementing the TaskRunnerEventsListener trait +/// Canonical time histogram buckets — identical to Java/Go/Python SDKs. +/// +/// These buckets are finer-grained at the millisecond range than Prometheus' +/// defaults, reflecting Conductor's sub-second worker poll/update latencies. +const SECONDS_BUCKETS: &[f64] = &[ + 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, +]; + +/// Prometheus metrics collector implementing the canonical Conductor SDK +/// metric catalog. +/// +/// Also implements [`HttpMetricsObserver`] so the same collector can be +/// installed into [`ApiClient`](crate::http::ApiClient) and receive HTTP-level +/// observations for `http_api_client_request_seconds`. pub struct MetricsCollector { settings: MetricsSettings, registry: Registry, - // Counters + // -- Counters -- task_poll_total: CounterVec, task_poll_error_total: CounterVec, + task_execution_started_total: CounterVec, task_execute_error_total: CounterVec, task_update_error_total: CounterVec, task_paused_total: CounterVec, - - // Histograms + task_ack_error_total: CounterVec, + task_ack_failed_total: CounterVec, + task_execution_queue_full_total: CounterVec, + external_payload_used_total: CounterVec, + thread_uncaught_exceptions_total: CounterVec, + workflow_start_error_total: CounterVec, + + // -- Histograms -- task_poll_time_seconds: HistogramVec, task_execute_time_seconds: HistogramVec, - #[allow(dead_code)] // Registered for future use when task update success events are added task_update_time_seconds: HistogramVec, + http_api_client_request_seconds: HistogramVec, - // Gauges + // -- Gauges -- task_result_size_bytes: GaugeVec, + workflow_input_size_bytes: GaugeVec, active_workers: GaugeVec, - // Internal tracking + // Internal tracking — keeps the `active_workers` gauge in sync with the + // real active-task count when started/completed/failed events arrive. active_task_counts: Arc>>, } +/// Helper: build a `CounterVec` with the canonical namespace and label set, +/// panicking with a clear message if construction or registration fails. +fn make_counter( + registry: &Registry, + namespace: &str, + name: &'static str, + help: &'static str, + labels: &[&str], +) -> CounterVec { + let counter = CounterVec::new(Opts::new(name, help).namespace(namespace), labels) + .unwrap_or_else(|e| panic!("Failed to create counter {name}: {e}")); + registry + .register(Box::new(counter.clone())) + .unwrap_or_else(|e| panic!("Failed to register counter {name}: {e}")); + counter +} + +/// Helper: build a histogram with the canonical SDK buckets. +fn make_histogram( + registry: &Registry, + namespace: &str, + name: &'static str, + help: &'static str, + labels: &[&str], +) -> HistogramVec { + let histogram = HistogramVec::new( + HistogramOpts::new(name, help) + .namespace(namespace) + .buckets(SECONDS_BUCKETS.to_vec()), + labels, + ) + .unwrap_or_else(|e| panic!("Failed to create histogram {name}: {e}")); + registry + .register(Box::new(histogram.clone())) + .unwrap_or_else(|e| panic!("Failed to register histogram {name}: {e}")); + histogram +} + +/// Helper: build a gauge vector. +fn make_gauge( + registry: &Registry, + namespace: &str, + name: &'static str, + help: &'static str, + labels: &[&str], +) -> GaugeVec { + let gauge = GaugeVec::new(Opts::new(name, help).namespace(namespace), labels) + .unwrap_or_else(|e| panic!("Failed to create gauge {name}: {e}")); + registry + .register(Box::new(gauge.clone())) + .unwrap_or_else(|e| panic!("Failed to register gauge {name}: {e}")); + gauge +} + impl MetricsCollector { /// Create a new metrics collector pub fn new(settings: MetricsSettings) -> Self { let registry = Registry::new(); - let namespace = &settings.namespace; - - // Create counters - let task_poll_total = CounterVec::new( - Opts::new("task_poll_total", "Total number of task poll attempts").namespace(namespace), - &["task_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_poll_total counter: {}", e); - }); - - let task_poll_error_total = CounterVec::new( - Opts::new("task_poll_error_total", "Total number of task poll errors") - .namespace(namespace), - &["task_type", "error_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_poll_error_total counter: {}", e); - }); - - let task_execute_error_total = CounterVec::new( - Opts::new( - "task_execute_error_total", - "Total number of task execution errors", - ) - .namespace(namespace), - &["task_type", "error_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_execute_error_total counter: {}", e); - }); - - let task_update_error_total = CounterVec::new( - Opts::new( - "task_update_error_total", - "Total number of task update errors", - ) - .namespace(namespace), - &["task_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_update_error_total counter: {}", e); - }); - - let task_paused_total = CounterVec::new( - Opts::new("task_paused_total", "Number of polls while worker paused") - .namespace(namespace), - &["task_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_paused_total counter: {}", e); - }); - - // Create histograms with default buckets - let buckets = vec![ - 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, - ]; - - let task_poll_time_seconds = HistogramVec::new( - HistogramOpts::new("task_poll_time_seconds", "Task poll latency in seconds") - .namespace(namespace) - .buckets(buckets.clone()), - &["task_type", "status"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_poll_time_seconds histogram: {}", e); - }); - - let task_execute_time_seconds = HistogramVec::new( - HistogramOpts::new( - "task_execute_time_seconds", - "Task execution time in seconds", - ) - .namespace(namespace) - .buckets(buckets.clone()), - &["task_type", "status"], - ) - .unwrap_or_else(|e| { - panic!( - "Failed to create task_execute_time_seconds histogram: {}", - e - ); - }); - - let task_update_time_seconds = HistogramVec::new( - HistogramOpts::new("task_update_time_seconds", "Task update latency in seconds") - .namespace(namespace) - .buckets(buckets), - &["task_type", "status"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_update_time_seconds histogram: {}", e); - }); - - // Create gauges - let task_result_size_bytes = GaugeVec::new( - Opts::new("task_result_size_bytes", "Size of task result payload").namespace(namespace), - &["task_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create task_result_size_bytes gauge: {}", e); - }); - - let active_workers = GaugeVec::new( - Opts::new("active_workers", "Number of active workers").namespace(namespace), - &["task_type"], - ) - .unwrap_or_else(|e| { - panic!("Failed to create active_workers gauge: {}", e); - }); - - // Register metrics - if let Err(e) = registry.register(Box::new(task_poll_total.clone())) { - panic!("Failed to register task_poll_total: {}", e); - } - if let Err(e) = registry.register(Box::new(task_poll_error_total.clone())) { - panic!("Failed to register task_poll_error_total: {}", e); - } - if let Err(e) = registry.register(Box::new(task_execute_error_total.clone())) { - panic!("Failed to register task_execute_error_total: {}", e); - } - if let Err(e) = registry.register(Box::new(task_update_error_total.clone())) { - panic!("Failed to register task_update_error_total: {}", e); - } - if let Err(e) = registry.register(Box::new(task_paused_total.clone())) { - panic!("Failed to register task_paused_total: {}", e); - } - if let Err(e) = registry.register(Box::new(task_poll_time_seconds.clone())) { - panic!("Failed to register task_poll_time_seconds: {}", e); - } - if let Err(e) = registry.register(Box::new(task_execute_time_seconds.clone())) { - panic!("Failed to register task_execute_time_seconds: {}", e); - } - if let Err(e) = registry.register(Box::new(task_update_time_seconds.clone())) { - panic!("Failed to register task_update_time_seconds: {}", e); - } - if let Err(e) = registry.register(Box::new(task_result_size_bytes.clone())) { - panic!("Failed to register task_result_size_bytes: {}", e); - } - if let Err(e) = registry.register(Box::new(active_workers.clone())) { - panic!("Failed to register active_workers: {}", e); - } + let ns = &settings.namespace; + + // Counters + let task_poll_total = make_counter( + ®istry, + ns, + "task_poll_total", + "Total number of task poll attempts", + &["taskType"], + ); + let task_poll_error_total = make_counter( + ®istry, + ns, + "task_poll_error_total", + "Total number of task poll errors", + &["taskType", "exception"], + ); + let task_execution_started_total = make_counter( + ®istry, + ns, + "task_execution_started_total", + "Count of task executions started", + &["taskType"], + ); + let task_execute_error_total = make_counter( + ®istry, + ns, + "task_execute_error_total", + "Total number of task execution errors", + &["taskType", "exception"], + ); + let task_update_error_total = make_counter( + ®istry, + ns, + "task_update_error_total", + "Total number of task update errors", + &["taskType", "exception"], + ); + let task_paused_total = make_counter( + ®istry, + ns, + "task_paused_total", + "Number of polls skipped because worker was paused", + &["taskType"], + ); + let task_ack_error_total = make_counter( + ®istry, + ns, + "task_ack_error_total", + "Count of task acknowledgement errors (surface-only in rust-sdk)", + &["taskType", "exception"], + ); + let task_ack_failed_total = make_counter( + ®istry, + ns, + "task_ack_failed_total", + "Count of task acknowledgement failures (surface-only in rust-sdk)", + &["taskType"], + ); + let task_execution_queue_full_total = make_counter( + ®istry, + ns, + "task_execution_queue_full_total", + "Count of executions dropped because the local execution queue was full \ + (surface-only in rust-sdk; tokio `Semaphore` never rejects)", + &["taskType"], + ); + let external_payload_used_total = make_counter( + ®istry, + ns, + "external_payload_used_total", + "Count of times an external payload store was used \ + (surface-only in rust-sdk; reserved for future large-payload support)", + &["entityName", "operation", "payloadType"], + ); + let thread_uncaught_exceptions_total = make_counter( + ®istry, + ns, + "thread_uncaught_exceptions_total", + "Count of panics escaping worker task bodies", + &["taskType", "exception"], + ); + let workflow_start_error_total = make_counter( + ®istry, + ns, + "workflow_start_error_total", + "Count of WorkflowClient::start_workflow failures", + &["workflowType", "exception"], + ); + + // Histograms + let task_poll_time_seconds = make_histogram( + ®istry, + ns, + "task_poll_time_seconds", + "Task poll latency in seconds", + &["taskType", "status"], + ); + let task_execute_time_seconds = make_histogram( + ®istry, + ns, + "task_execute_time_seconds", + "Task execution time in seconds", + &["taskType", "status"], + ); + let task_update_time_seconds = make_histogram( + ®istry, + ns, + "task_update_time_seconds", + "Task update latency in seconds", + &["taskType", "status"], + ); + let http_api_client_request_seconds = make_histogram( + ®istry, + ns, + "http_api_client_request_seconds", + "Conductor API HTTP client request latency in seconds", + &["method", "uri", "status"], + ); + + // Gauges + let task_result_size_bytes = make_gauge( + ®istry, + ns, + "task_result_size_bytes", + "Size of task result payload in bytes", + &["taskType"], + ); + let workflow_input_size_bytes = make_gauge( + ®istry, + ns, + "workflow_input_size_bytes", + "Size of workflow input payload in bytes at start_workflow time", + &["workflowType"], + ); + let active_workers = make_gauge( + ®istry, + ns, + "active_workers", + "Number of in-flight task executions", + &["taskType"], + ); Self { settings, registry, task_poll_total, task_poll_error_total, + task_execution_started_total, task_execute_error_total, task_update_error_total, task_paused_total, + task_ack_error_total, + task_ack_failed_total, + task_execution_queue_full_total, + external_payload_used_total, + thread_uncaught_exceptions_total, + workflow_start_error_total, task_poll_time_seconds, task_execute_time_seconds, task_update_time_seconds, + http_api_client_request_seconds, task_result_size_bytes, + workflow_input_size_bytes, active_workers, active_task_counts: Arc::new(RwLock::new(HashMap::new())), } @@ -225,18 +319,51 @@ impl MetricsCollector { }) } - /// Increment task paused counter - pub fn increment_task_paused(&self, task_type: &str) { - self.task_paused_total.with_label_values(&[task_type]).inc(); - } - - /// Set active worker count + /// Set the active worker count for a task type. pub fn set_active_workers(&self, task_type: &str, count: f64) { self.active_workers .with_label_values(&[task_type]) .set(count); } + /// Surface-only: increment `task_ack_error_total`. The current rust-sdk + /// does not perform a separate ack RPC (poll returns tasks directly), so + /// this counter is registered to keep the metric surface identical to + /// Java/Go/Python but is never incremented by the SDK itself. Kept + /// available for user code that performs its own acknowledgement flow. + pub fn increment_task_ack_error(&self, task_type: &str, exception: &str) { + self.task_ack_error_total + .with_label_values(&[task_type, exception]) + .inc(); + } + + /// Surface-only: increment `task_ack_failed_total`. + pub fn increment_task_ack_failed(&self, task_type: &str) { + self.task_ack_failed_total + .with_label_values(&[task_type]) + .inc(); + } + + /// Surface-only: increment `task_execution_queue_full_total`. + pub fn increment_task_execution_queue_full(&self, task_type: &str) { + self.task_execution_queue_full_total + .with_label_values(&[task_type]) + .inc(); + } + + /// Surface-only: increment `external_payload_used_total`. Reserved for + /// future large-payload external-storage support. + pub fn increment_external_payload_used( + &self, + entity_name: &str, + operation: &str, + payload_type: &str, + ) { + self.external_payload_used_total + .with_label_values(&[entity_name, operation, payload_type]) + .inc(); + } + /// Start HTTP metrics server (if configured) pub async fn start_http_server(&self) -> Option> { if let Some(port) = self.settings.http_port { @@ -271,22 +398,27 @@ impl MetricsCollector { let mut buffer = Vec::new(); match encoder.encode(&metric_families, &mut buffer) { - Ok(()) => { - Response::builder() - .status(200) - .header("Content-Type", "text/plain; charset=utf-8") - .body(Body::from(buffer)) - .unwrap_or_else(|e| { - error!(error = %e, "Failed to build metrics response"); - Response::new(Body::from("Internal Server Error")) - }) - } + Ok(()) => Response::builder() + .status(200) + .header( + "Content-Type", + "text/plain; charset=utf-8", + ) + .body(Body::from(buffer)) + .unwrap_or_else(|e| { + error!(error = %e, "Failed to build metrics response"); + Response::new(Body::from("Internal Server Error")) + }), Err(e) => { error!(error = %e, "Failed to encode metrics"); Response::builder() .status(500) .body(Body::from("Internal Server Error")) - .unwrap_or_else(|_| Response::new(Body::from("Internal Server Error"))) + .unwrap_or_else(|_| { + Response::new(Body::from( + "Internal Server Error", + )) + }) } } } else if req.uri().path() == health_path { @@ -326,6 +458,10 @@ impl MetricsCollector { } } +/// Canonical `status` label values. Match Java `Status.SUCCESS` / `Status.FAILURE`. +const STATUS_SUCCESS: &str = "SUCCESS"; +const STATUS_FAILURE: &str = "FAILURE"; + impl TaskRunnerEventsListener for MetricsCollector { fn on_poll_started(&self, event: &PollStarted) { self.task_poll_total @@ -335,22 +471,31 @@ impl TaskRunnerEventsListener for MetricsCollector { fn on_poll_completed(&self, event: &PollCompleted) { self.task_poll_time_seconds - .with_label_values(&[&event.task_type, "success"]) + .with_label_values(&[&event.task_type, STATUS_SUCCESS]) .observe(event.duration.as_secs_f64()); } fn on_poll_failure(&self, event: &PollFailure) { self.task_poll_time_seconds - .with_label_values(&[&event.task_type, "failure"]) + .with_label_values(&[&event.task_type, STATUS_FAILURE]) .observe(event.duration.as_secs_f64()); self.task_poll_error_total - .with_label_values(&[&event.task_type, "poll_error"]) + .with_label_values(&[&event.task_type, &event.exception]) + .inc(); + } + + fn on_poll_skipped_paused(&self, event: &PollSkippedPaused) { + self.task_paused_total + .with_label_values(&[&event.task_type]) .inc(); } fn on_task_execution_started(&self, event: &TaskExecutionStarted) { - // Track active tasks + self.task_execution_started_total + .with_label_values(&[&event.task_type]) + .inc(); + let mut counts = self.active_task_counts.write(); let count = counts.entry(event.task_type.clone()).or_insert(0); *count += 1; @@ -361,7 +506,7 @@ impl TaskRunnerEventsListener for MetricsCollector { fn on_task_execution_completed(&self, event: &TaskExecutionCompleted) { self.task_execute_time_seconds - .with_label_values(&[&event.task_type, "success"]) + .with_label_values(&[&event.task_type, STATUS_SUCCESS]) .observe(event.duration.as_secs_f64()); if let Some(size) = event.output_size_bytes { @@ -370,96 +515,209 @@ impl TaskRunnerEventsListener for MetricsCollector { .set(size as f64); } - // Track active tasks - let mut counts = self.active_task_counts.write(); - if let Some(count) = counts.get_mut(&event.task_type) { - *count = (*count - 1).max(0); - self.active_workers - .with_label_values(&[&event.task_type]) - .set(*count as f64); - } + self.decrement_active(&event.task_type); } fn on_task_execution_failure(&self, event: &TaskExecutionFailure) { self.task_execute_time_seconds - .with_label_values(&[&event.task_type, "failure"]) + .with_label_values(&[&event.task_type, STATUS_FAILURE]) .observe(event.duration.as_secs_f64()); self.task_execute_error_total - .with_label_values(&[&event.task_type, "execution_error"]) + .with_label_values(&[&event.task_type, &event.exception]) + .inc(); + + self.decrement_active(&event.task_type); + } + + fn on_task_update_completed(&self, event: &TaskUpdateCompleted) { + self.task_update_time_seconds + .with_label_values(&[&event.task_type, STATUS_SUCCESS]) + .observe(event.duration.as_secs_f64()); + } + + fn on_task_update_failure(&self, event: &TaskUpdateFailure) { + self.task_update_time_seconds + .with_label_values(&[&event.task_type, STATUS_FAILURE]) + .observe(event.duration.as_secs_f64()); + + self.task_update_error_total + .with_label_values(&[&event.task_type, &event.exception]) + .inc(); + } + + fn on_thread_uncaught_exception(&self, event: &ThreadUncaughtException) { + self.thread_uncaught_exceptions_total + .with_label_values(&[&event.task_type, &event.exception]) + .inc(); + } + + fn on_workflow_started(&self, event: &WorkflowStarted) { + self.workflow_input_size_bytes + .with_label_values(&[&event.workflow_type]) + .set(event.input_size_bytes as f64); + } + + fn on_workflow_start_failure(&self, event: &WorkflowStartFailure) { + self.workflow_start_error_total + .with_label_values(&[&event.workflow_type, &event.exception]) .inc(); + } +} - // Track active tasks +impl MetricsCollector { + #[inline] + fn decrement_active(&self, task_type: &str) { let mut counts = self.active_task_counts.write(); - if let Some(count) = counts.get_mut(&event.task_type) { + if let Some(count) = counts.get_mut(task_type) { *count = (*count - 1).max(0); self.active_workers - .with_label_values(&[&event.task_type]) + .with_label_values(&[task_type]) .set(*count as f64); } } +} - fn on_task_update_failure(&self, event: &TaskUpdateFailure) { - self.task_update_error_total - .with_label_values(&[&event.task_type]) - .inc(); +impl HttpMetricsObserver for MetricsCollector { + fn observe(&self, method: &str, uri: &str, status: &str, duration: Duration) { + self.http_api_client_request_seconds + .with_label_values(&[method, uri, status]) + .observe(duration.as_secs_f64()); } } #[cfg(test)] mod tests { use super::*; - use std::time::Duration; #[test] fn test_metrics_collector_creation() { - let settings = MetricsSettings::default(); - let collector = MetricsCollector::new(settings); + let collector = MetricsCollector::new(MetricsSettings::default()); - // Should be able to gather metrics (some labels need data first) let event = PollStarted::new("test_task", "worker-1", 10); collector.on_poll_started(&event); let output = collector.gather(); assert!(output.contains("task_poll_total")); + assert!( + !output.contains("conductor_task_poll_total"), + "default namespace should be empty" + ); } #[test] fn test_poll_metrics() { - let settings = MetricsSettings::default(); - let collector = MetricsCollector::new(settings); + let collector = MetricsCollector::new(MetricsSettings::default()); - let event = PollStarted::new("test_task", "worker-1", 10); - collector.on_poll_started(&event); - - let event = PollCompleted::new("test_task", "worker-1", Duration::from_millis(50), 5); - collector.on_poll_completed(&event); + collector.on_poll_started(&PollStarted::new("test_task", "worker-1", 10)); + collector.on_poll_completed(&PollCompleted::new( + "test_task", + "worker-1", + Duration::from_millis(50), + 5, + )); let output = collector.gather(); - assert!(output.contains("conductor_task_poll_total")); + assert!(output.contains("task_poll_total")); assert!(output.contains("test_task")); + assert!(output.contains("taskType=\"test_task\"")); + assert!(output.contains("status=\"SUCCESS\"")); } #[test] fn test_execution_metrics() { - let settings = MetricsSettings::default(); - let collector = MetricsCollector::new(settings); - - let start_event = TaskExecutionStarted::new("test_task", "task-1", "wf-1", "worker-1"); - collector.on_task_execution_started(&start_event); + let collector = MetricsCollector::new(MetricsSettings::default()); - let complete_event = TaskExecutionCompleted::new( + collector.on_task_execution_started(&TaskExecutionStarted::new( + "test_task", + "task-1", + "wf-1", + "worker-1", + )); + collector.on_task_execution_completed(&TaskExecutionCompleted::new( "test_task", "task-1", "wf-1", "worker-1", Duration::from_millis(100), Some(1024), + )); + + let output = collector.gather(); + assert!(output.contains("task_execute_time_seconds")); + assert!(output.contains("task_result_size_bytes")); + assert!(output.contains("task_execution_started_total")); + } + + #[test] + fn test_failure_metrics_use_exception_label() { + let collector = MetricsCollector::new(MetricsSettings::default()); + + collector.on_task_execution_failure(&TaskExecutionFailure::new( + "test_task", + "task-1", + "wf-1", + "worker-1", + Duration::from_millis(5), + "boom", + "Worker", + true, + )); + + let output = collector.gather(); + assert!(output.contains("task_execute_error_total")); + assert!(output.contains("exception=\"Worker\"")); + assert!(output.contains("status=\"FAILURE\"")); + } + + #[test] + fn test_workflow_metrics() { + let collector = MetricsCollector::new(MetricsSettings::default()); + + collector.on_workflow_started(&WorkflowStarted::new("wf_a", 1, 128)); + collector.on_workflow_start_failure(&WorkflowStartFailure::new("wf_b", "Server")); + + let output = collector.gather(); + assert!(output.contains("workflow_input_size_bytes")); + assert!(output.contains("workflowType=\"wf_a\"")); + assert!(output.contains("workflow_start_error_total")); + assert!(output.contains("workflowType=\"wf_b\"")); + assert!(output.contains("exception=\"Server\"")); + } + + #[test] + fn test_http_observer() { + let collector = MetricsCollector::new(MetricsSettings::default()); + + ::observe( + &collector, + "GET", + "/tasks/poll/batch/my_worker", + "200", + Duration::from_millis(12), ); - collector.on_task_execution_completed(&complete_event); let output = collector.gather(); - assert!(output.contains("conductor_task_execute_time_seconds")); - assert!(output.contains("conductor_task_result_size_bytes")); + assert!(output.contains("http_api_client_request_seconds")); + assert!(output.contains("method=\"GET\"")); + assert!(output.contains("uri=\"/tasks/poll/batch/my_worker\"")); + assert!(output.contains("status=\"200\"")); + } + + #[test] + fn test_poll_skipped_paused_metric() { + let collector = MetricsCollector::new(MetricsSettings::default()); + collector.on_poll_skipped_paused(&PollSkippedPaused::new("paused_task", "worker-1")); + let output = collector.gather(); + assert!(output.contains("task_paused_total")); + assert!(output.contains("paused_task")); + } + + #[test] + fn test_namespace_prefix_when_set() { + let collector = MetricsCollector::new(MetricsSettings::default().with_namespace("myapp")); + collector.on_poll_started(&PollStarted::new("test_task", "worker-1", 10)); + let output = collector.gather(); + assert!(output.contains("myapp_task_poll_total")); } } diff --git a/src/metrics/settings.rs b/src/metrics/settings.rs index 0cd7074..12d851e 100644 --- a/src/metrics/settings.rs +++ b/src/metrics/settings.rs @@ -21,7 +21,12 @@ pub struct MetricsSettings { /// Update interval for metrics pub update_interval: Duration, - /// Namespace prefix for all metrics + /// Optional namespace prefix for all metric names. Defaults to `""` so + /// that metric names emitted by this SDK match the canonical Conductor + /// SDK metric catalog used by the Java, Go, and Python SDKs (which do + /// not prefix metric names). Set this via [`Self::with_namespace`] if you + /// need to isolate Conductor SDK metrics from other metrics sharing the + /// same Prometheus registry. pub namespace: String, } @@ -33,7 +38,7 @@ impl Default for MetricsSettings { metrics_path: "/metrics".to_string(), health_path: "/health".to_string(), update_interval: Duration::from_secs(1), - namespace: "conductor".to_string(), + namespace: String::new(), } } } @@ -84,7 +89,7 @@ mod tests { let settings = MetricsSettings::default(); assert!(settings.enabled); assert_eq!(settings.metrics_path, "/metrics"); - assert_eq!(settings.namespace, "conductor"); + assert_eq!(settings.namespace, ""); } #[test] diff --git a/src/worker/task_handler.rs b/src/worker/task_handler.rs index d592481..026ff5b 100644 --- a/src/worker/task_handler.rs +++ b/src/worker/task_handler.rs @@ -66,11 +66,21 @@ impl TaskHandler { self.event_dispatcher.register(listener); } - /// Enable metrics collection + /// Enable metrics collection. + /// + /// Registers a [`MetricsCollector`] both as a task-runner event listener + /// (to populate task/workflow metrics) *and* as the + /// [`HttpMetricsObserver`](crate::http::HttpMetricsObserver) for the + /// underlying [`ApiClient`] (to populate + /// `http_api_client_request_seconds`). The observer swap is visible to + /// every `ApiClient` clone vended from this handler, including those + /// returned by [`conductor_client`](Self::conductor_client). pub fn enable_metrics(&mut self, settings: MetricsSettings) { let collector = Arc::new(MetricsCollector::new(settings)); self.event_dispatcher .register(collector.clone() as Arc); + self.api_client + .set_http_metrics(collector.clone() as Arc); self.metrics_collector = Some(collector); } @@ -94,9 +104,15 @@ impl TaskHandler { MetadataClient::new(self.api_client.clone()) } - /// Get the Conductor client + /// Get the Conductor client wired to this handler's event dispatcher. + /// + /// Sharing the dispatcher means `WorkflowStarted` / + /// `WorkflowStartFailure` events emitted by the returned client's + /// `WorkflowClient` flow into the same `MetricsCollector` that + /// [`enable_metrics`](Self::enable_metrics) installed. pub fn conductor_client(&self) -> ConductorClient { ConductorClient::from_api_client(self.api_client.clone()) + .with_event_dispatcher(self.event_dispatcher.clone()) } /// Get a schema client diff --git a/src/worker/task_runner.rs b/src/worker/task_runner.rs index e350ad0..f1a5527 100644 --- a/src/worker/task_runner.rs +++ b/src/worker/task_runner.rs @@ -2,10 +2,12 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use std::collections::HashSet; +use std::panic::AssertUnwindSafe; use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::{Duration, Instant}; +use futures::FutureExt; use tokio::sync::Semaphore; use tracing::{debug, error, info, warn}; @@ -13,8 +15,9 @@ use crate::client::TaskClient; use crate::configuration::{resolve_worker_config, WorkerConfig}; use crate::error::Result; use crate::events::{ - EventDispatcher, PollCompleted, PollFailure, PollStarted, TaskExecutionCompleted, - TaskExecutionFailure, TaskExecutionStarted, TaskUpdateFailure, + exception_label, EventDispatcher, PollCompleted, PollFailure, PollSkippedPaused, PollStarted, + TaskExecutionCompleted, TaskExecutionFailure, TaskExecutionStarted, TaskUpdateCompleted, + TaskUpdateFailure, ThreadUncaughtException, }; use crate::models::Task; @@ -193,6 +196,11 @@ impl TaskRunner { async fn run_once(&self) -> Result<()> { // Check if paused if self.paused.load(Ordering::SeqCst) { + self.event_dispatcher + .publish_poll_skipped_paused(&PollSkippedPaused::new( + &self.config.task_definition_name, + &self.config.worker_id, + )); tokio::time::sleep(Duration::from_millis(100)).await; return Ok(()); } @@ -268,13 +276,15 @@ impl TaskRunner { } } Err(e) => { - // Publish poll failure event + let exception = exception_label(&e); + self.event_dispatcher .publish_poll_failure(&PollFailure::new( &self.config.task_definition_name, &self.config.worker_id, poll_duration, e.to_string(), + exception, )); self.consecutive_empty_polls.fetch_add(1, Ordering::SeqCst); @@ -311,6 +321,8 @@ impl TaskRunner { let running_tasks = Arc::clone(&self.running_tasks); let spawned_task_count = Arc::clone(&self.spawned_task_count); + let task_type = self.config.task_definition_name.clone(); + tokio::spawn(async move { // Acquire semaphore permit FIRST - this is the actual concurrency control let _permit = match semaphore.acquire().await { @@ -328,13 +340,18 @@ impl TaskRunner { active_task_count.fetch_add(1, Ordering::SeqCst); running_tasks.lock().insert(task_id.clone()); - let result = Self::execute_and_update_task( + // Catch panics escaping the worker so we can publish + // `ThreadUncaughtException` and still clean up tracking state. + // Tokio's default panic behavior is to unwind the task; we need + // observability before the unwind reaches the reactor. + let outcome = AssertUnwindSafe(Self::execute_and_update_task( &worker, &task_client, &event_dispatcher, &config, task, - ) + )) + .catch_unwind() .await; // Cleanup: remove from tracking @@ -342,12 +359,25 @@ impl TaskRunner { active_task_count.fetch_sub(1, Ordering::SeqCst); spawned_task_count.fetch_sub(1, Ordering::SeqCst); - if let Err(e) = result { - error!( - task_id = %task_id, - error = %e, - "Task execution failed" - ); + match outcome { + Ok(Ok(())) => {} + Ok(Err(e)) => { + error!( + task_id = %task_id, + error = %e, + "Task execution failed" + ); + } + Err(_panic_payload) => { + error!( + task_id = %task_id, + task_type = %task_type, + "Uncaught panic in worker task" + ); + event_dispatcher.publish_thread_uncaught_exception( + &ThreadUncaughtException::new(&task_type, "Panic"), + ); + } } }); } @@ -414,8 +444,9 @@ impl TaskRunner { } Err(e) => { let error_msg = e.to_string(); + let exception = exception_label(&e); + let is_retryable = e.is_retryable(); - // Publish execution failure event event_dispatcher.publish_task_execution_failure(&TaskExecutionFailure::new( task_type, task_id, @@ -423,7 +454,8 @@ impl TaskRunner { &config.worker_id, exec_duration, &error_msg, - e.is_retryable(), + exception, + is_retryable, )); WorkerOutput::Failed(error_msg).into_task_result(&task, &config.worker_id) @@ -431,20 +463,33 @@ impl TaskRunner { }; // Update task with retry + let update_start = Instant::now(); match task_client.update_task_with_retry(&task_result, 4).await { Ok(_) => { + let update_duration = update_start.elapsed(); debug!(task_id = %task_id, "Task updated successfully"); + + event_dispatcher.publish_task_update_completed(&TaskUpdateCompleted::new( + task_type, + task_id, + workflow_id, + &config.worker_id, + update_duration, + )); } Err(e) => { + let update_duration = update_start.elapsed(); error!(task_id = %task_id, error = %e, "Failed to update task after retries"); + let exception = exception_label(&e); - // Publish task update failure event event_dispatcher.publish_task_update_failure(&TaskUpdateFailure::new( task_type, task_id, workflow_id, &config.worker_id, + update_duration, e.to_string(), + exception, 4, )); } From e719920e91d9088b27222da680cdacb2fafd475c Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Thu, 23 Apr 2026 13:10:17 -0600 Subject: [PATCH 02/15] metrics standardization --- METRICS.md | 153 +++++++++++++++++++++++++++++++ src/client/workflow_client.rs | 2 +- src/events/exception.rs | 108 ++++++++++++++++++++++ src/events/task_runner_events.rs | 4 +- src/http/api_client.rs | 22 ++++- src/http/metrics.rs | 45 +++++++++ src/metrics/collector.rs | 14 ++- 7 files changed, 339 insertions(+), 9 deletions(-) create mode 100644 METRICS.md create mode 100644 src/events/exception.rs create mode 100644 src/http/metrics.rs diff --git a/METRICS.md b/METRICS.md new file mode 100644 index 0000000..63c6f33 --- /dev/null +++ b/METRICS.md @@ -0,0 +1,153 @@ +# Metrics Documentation + +The Conductor Rust SDK includes built-in metrics collection using Prometheus to +monitor worker performance, API requests, and task execution. + +All metric names, label names, label values, and Prometheus types emitted by +this SDK match the canonical catalog in +[`sdk-metrics-harmonization.md`](https://github.com/conductor-oss/longrunning-wfstest/blob/main/sdk-metrics-harmonization.md). +Because the Rust SDK is unreleased, there are no legacy/deprecated metric +names to carry forward — the emitted surface is canonical on day one. + +## Table of Contents + +- [Quick Reference](#quick-reference) +- [Configuration](#configuration) +- [Intentional divergences](#intentional-divergences) +- [Examples](#examples) + +## Quick Reference + +### Canonical metrics emitted by the SDK + +| Metric | Type | Labels | Meaning | +|---|---|---|---| +| `task_poll_total` | Counter | `taskType` | Incremented for every poll request issued to the server. | +| `task_poll_error_total` | Counter | `taskType`, `exception` | Client-side poll failures. `exception` is the unqualified `ConductorError` variant name. | +| `task_execution_started_total` | Counter | `taskType` | Incremented when a polled task is dispatched to the user worker function. | +| `task_execute_error_total` | Counter | `taskType`, `exception` | User worker returned `Err(_)`. | +| `task_update_error_total` | Counter | `taskType`, `exception` | Task-result update back to the server failed after all retries. | +| `task_paused_total` | Counter | `taskType` | Poll skipped because the runner is paused. | +| `thread_uncaught_exceptions_total` | Counter | `exception` | Panic escaped a spawned worker task; `exception` is always `"Panic"`. | +| `workflow_start_error_total` | Counter | `workflowType`, `exception` | `WorkflowClient::start_workflow` failed client-side. | +| `task_ack_error_total` | Counter | `taskType`, `exception` | **Surface-only.** Not incremented by the internal runner (see [Intentional divergences](#intentional-divergences)). | +| `task_ack_failed_total` | Counter | `taskType` | **Surface-only.** Not incremented by the internal runner. | +| `task_execution_queue_full_total` | Counter | `taskType` | **Surface-only.** Not incremented by the internal runner. | +| `external_payload_used_total` | Counter | `entityName`, `operation`, `payloadType` | **Surface-only.** Reserved for future large-payload external-storage support. | +| `task_poll_time_seconds` | Histogram | `taskType`, `status` | Poll latency. `status ∈ {SUCCESS, FAILURE}`. | +| `task_execute_time_seconds` | Histogram | `taskType`, `status` | User worker function wall-clock. | +| `task_update_time_seconds` | Histogram | `taskType`, `status` | Latency of the `UpdateTask` call (including retries). | +| `http_api_client_request_seconds` | Histogram | `method`, `uri`, `status` | Latency of every Conductor API HTTP request. `status` is the HTTP status code as a string, or `"0"` for network errors. | +| `task_result_size_bytes` | Gauge | `taskType` | Last-seen serialized task-result size. | +| `workflow_input_size_bytes` | Gauge | `workflowType`, `version` | Last-seen serialized `StartWorkflowRequest.input` size. `version` is the workflow version as a string, or `""` when unset. | +| `active_workers` | Gauge | `taskType` | Current number of in-flight task executions. | + +The Histogram bucket set is the canonical +`(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0)` +seconds. + +### Label values + +- `status` on task time histograms: uppercase `"SUCCESS"` / `"FAILURE"`. +- `status` on `http_api_client_request_seconds`: HTTP status code rendered as + a string (e.g. `"200"`), or `"0"` when the transport layer fails before + receiving a status. +- `uri`: the full interpolated request path (server path prefix + endpoint + path) without query string (e.g. `/api/tasks/poll/batch/my_worker` when + the server URL is `http://host:8080/api`). See the note below about + Phase 4 path templating. +- `exception`: the unqualified `ConductorError` variant name + (`Http`, `Json`, `Auth`, `Server`, …), the short type name for non- + `ConductorError` errors, or `"Panic"` for uncaught panics. + +### `uri` label — interpolated path, not templated + +Like the Java / Go / Python SDKs in Phase 1 of the harmonization plan, the +`uri` label on `http_api_client_request_seconds` carries the **interpolated** +request path including the server URL's path prefix (e.g. +`/api/tasks/poll/batch/my_task` when `CONDUCTOR_SERVER_URL` ends in `/api`), +not the templated path (`/api/tasks/poll/batch/{taskType}`). +High-cardinality worker names or task IDs will therefore appear in the label. + +Operators who need bounded cardinality today should apply a Prometheus +`metric_relabel_configs` rule at scrape time that rewrites well-known +parametric path segments. Template extraction is tracked as **Phase 4** of the +canonical SDK metrics harmonization plan. + +## Configuration + +Metrics are wired up by calling [`TaskHandler::enable_metrics`]. This: + +- Registers a shared `MetricsCollector` as a `TaskRunnerEventsListener` for + task-level events. +- Installs the same `MetricsCollector` as the `HttpMetricsObserver` inside the + handler's `ApiClient`, capturing every HTTP request (including requests + made by `ConductorClient` instances vended via `TaskHandler::conductor_client()`). +- Optionally starts an HTTP scrape endpoint (`/metrics`, `/health`). + +Example: + +```rust +use conductor::{ + configuration::Configuration, + metrics::MetricsSettings, + worker::TaskHandler, +}; + +let config = Configuration::from_env(); +let mut handler = TaskHandler::new(config)?; + +handler.enable_metrics( + MetricsSettings::new() + .with_http_port(9991) + .with_metrics_path("/metrics"), +); + +// Workflow-start events will flow through the same dispatcher as tasks: +let conductor = handler.conductor_client(); +let workflow_client = conductor.workflow_client(); +``` + +By default `MetricsSettings::namespace` is `""`, so metric names appear +uncurried (e.g. `task_poll_total`, matching Java/Go/Python). Call +`.with_namespace("myapp")` to prefix names if you need to isolate Conductor +SDK metrics from other metrics in the same registry. + +## Intentional divergences + +Some asymmetries with the canonical catalog are kept by design rather than +papered over: + +| Metric | Status in Rust SDK | Reason | +|---|---|---| +| `task_ack_error_total`, `task_ack_failed_total` | Registered; never incremented by the internal runner. Public helpers `MetricsCollector::increment_task_ack_error` / `increment_task_ack_failed` exposed for user code. | Matches the Go SDK's runtime model: the batch-poll response itself acts as the ack, so there is no separate ack call for the SDK to instrument. | +| `task_execution_queue_full_total` | Registered; never incremented by the internal runner. | Rust's worker scheduling uses a `tokio::sync::Semaphore`; acquisition awaits rather than rejecting, so there is no "queue full" condition for the SDK to surface. | +| `external_payload_used_total` | Registered; never incremented by the internal runner. | The Rust client does not yet integrate with the external-payload-storage branch of the Conductor API. Helper method retained for user code that implements its own external-payload plumbing. | +| `worker_restart_total` | Not emitted. | Python-only metric: Python has a multi-process worker supervisor; Rust spawns Tokio tasks, so there is no equivalent "restart a subprocess" event. | +| `task_execution_completed_total` | Not emitted. | Canonical catalog exposes task execution completion only through `task_execute_time_seconds_count{status="SUCCESS"}`, which is already present. | +| `active_workers` labels | `{taskType}` | Matches canonical. | +| Metric name prefix | `""` (none) by default | Matches Java/Go/Python. Can be overridden via `MetricsSettings::with_namespace`. | + +## Examples + +See [`examples/metrics_example.rs`](./examples/metrics_example.rs) for a +runnable end-to-end demo that spins up workers, serves `/metrics` on a +configurable port, and exercises every metric in the catalog. + +```prometheus +# HTTP API client request latency +http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/my_worker",status="200",le="0.1"} 97 +http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/my_worker",status="200",le="+Inf"} 100 +http_api_client_request_seconds_count{method="GET",uri="/tasks/poll/batch/my_worker",status="200"} 100 +http_api_client_request_seconds_sum{method="GET",uri="/tasks/poll/batch/my_worker",status="200"} 8.21 + +# Task poll +task_poll_total{taskType="my_worker"} 124 + +# Task execute time (SUCCESS) +task_execute_time_seconds_bucket{taskType="my_worker",status="SUCCESS",le="0.25"} 42 +task_execute_time_seconds_count{taskType="my_worker",status="SUCCESS"} 42 + +# Workflow start error +workflow_start_error_total{workflowType="my_wf",exception="Server"} 2 +``` diff --git a/src/client/workflow_client.rs b/src/client/workflow_client.rs index 758f708..6d86b6c 100644 --- a/src/client/workflow_client.rs +++ b/src/client/workflow_client.rs @@ -65,7 +65,7 @@ impl WorkflowClient { self.events.publish_workflow_started(&WorkflowStarted::new( &request.name, - request.version.unwrap_or(1), + request.version, input_size_bytes, )); diff --git a/src/events/exception.rs b/src/events/exception.rs new file mode 100644 index 0000000..5435ace --- /dev/null +++ b/src/events/exception.rs @@ -0,0 +1,108 @@ +// Copyright {{.Year}} Conductor OSS +// Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +//! Bounded-cardinality exception label helpers for metrics. +//! +//! The canonical Conductor SDK metric catalog specifies that any `exception` +//! label on a metric should carry an exception *type name*, never a raw message +//! or stack trace. This mirrors Python's `type(err).__name__`, Go's +//! `fmt.Sprintf("%T", err)`, and Java's `e.getClass().getSimpleName()`. + +use crate::error::ConductorError; + +/// Return the canonical `exception` label value for a [`ConductorError`]. +/// +/// Returns the unqualified variant name (e.g. `"Http"`, `"Json"`, `"Auth"`), +/// which is stable, compact, and bounded in cardinality. +pub fn exception_label(err: &ConductorError) -> &'static str { + match err { + ConductorError::Http(_) => "Http", + ConductorError::Json(_) => "Json", + ConductorError::Config(_) => "Config", + ConductorError::Auth(_) => "Auth", + ConductorError::TaskExecution(_) => "TaskExecution", + ConductorError::TaskNotFound(_) => "TaskNotFound", + ConductorError::WorkflowNotFound(_) => "WorkflowNotFound", + ConductorError::Workflow(_) => "Workflow", + ConductorError::Worker(_) => "Worker", + ConductorError::Timeout(_) => "Timeout", + ConductorError::Server { .. } => "Server", + ConductorError::Api { .. } => "Api", + ConductorError::Internal(_) => "Internal", + ConductorError::Io(_) => "Io", + ConductorError::Channel(_) => "Channel", + } +} + +/// Return the canonical `exception` label value for any type. +/// +/// Uses [`std::any::type_name`] with the module path stripped, so generic +/// and nested types still produce a single, short label value. Intended for +/// values that aren't `ConductorError` — for those, prefer [`exception_label`] +/// which is guaranteed to be `&'static str` and doesn't allocate. +pub fn type_name_of(_value: &T) -> &'static str { + last_type_segment(std::any::type_name::()) +} + +/// Return the canonical `exception` label value for a panic payload produced +/// by [`std::panic::catch_unwind`] / [`futures::FutureExt::catch_unwind`]. +/// +/// Panic payloads are `Box` and don't carry a useful type +/// name by themselves, so we always report `"Panic"` to keep cardinality +/// bounded. Callers that need the panic message should log it separately. +pub fn exception_label_for_panic(_payload: &(dyn std::any::Any + Send)) -> &'static str { + "Panic" +} + +/// Strip everything before the final `::` from a Rust type path. +fn last_type_segment(full: &'static str) -> &'static str { + // Walk back from the end until we find `::` outside generic `<...>` nesting. + // Simpler approach: split on `<` first to get the base, then take the last + // `::`-delimited segment of that base. + let base = match full.find('<') { + Some(idx) => &full[..idx], + None => full, + }; + match base.rfind("::") { + Some(idx) => &base[idx + 2..], + None => base, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn conductor_error_variant_names() { + assert_eq!( + exception_label(&ConductorError::Auth("bad creds".into())), + "Auth" + ); + assert_eq!( + exception_label(&ConductorError::Worker("oops".into())), + "Worker" + ); + assert_eq!( + exception_label(&ConductorError::Server { + status: 500, + message: "boom".into(), + }), + "Server" + ); + } + + #[test] + fn strips_module_path() { + assert_eq!(last_type_segment("std::io::Error"), "Error"); + assert_eq!(last_type_segment("reqwest::Error"), "Error"); + assert_eq!(last_type_segment("Foo"), "Foo"); + assert_eq!(last_type_segment("core::option::Option"), "Option"); + } + + #[test] + fn panic_payload_label() { + let payload: Box = Box::new("panicked"); + assert_eq!(exception_label_for_panic(payload.as_ref()), "Panic"); + } +} diff --git a/src/events/task_runner_events.rs b/src/events/task_runner_events.rs index ea81cfc..369ed1d 100644 --- a/src/events/task_runner_events.rs +++ b/src/events/task_runner_events.rs @@ -418,7 +418,7 @@ impl TaskRunnerEvent for ThreadUncaughtException { #[derive(Debug, Clone)] pub struct WorkflowStarted { pub workflow_type: String, - pub version: i32, + pub version: Option, pub input_size_bytes: usize, pub timestamp: DateTime, } @@ -426,7 +426,7 @@ pub struct WorkflowStarted { impl WorkflowStarted { pub fn new( workflow_type: impl Into, - version: i32, + version: Option, input_size_bytes: usize, ) -> Self { Self { diff --git a/src/http/api_client.rs b/src/http/api_client.rs index 5f2556b..9d7ec86 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -48,6 +48,10 @@ pub struct ApiClient { client: Client, config: Arc>, base_url: String, + /// Path component of `base_url` (e.g. `"/api"`), prepended to endpoint + /// paths when recording the `uri` metric label so the label matches the + /// full request path as seen by all other SDKs. + base_path: String, /// Track consecutive auth failures for backoff auth_failures: Arc>, /// Last time we attempted token refresh (for backoff) @@ -79,10 +83,25 @@ impl ApiClient { let base_url = config.server_api_url.trim_end_matches('/').to_string(); + // Extract the path component of the server URL so it can be prepended + // to endpoint paths in metric labels. + // "http://host:8080/api" → "/api", "http://host:8080" → "" + let base_path = base_url + .find("://") + .and_then(|scheme_end| { + let after_scheme = scheme_end + 3; + base_url[after_scheme..] + .find('/') + .map(|slash| after_scheme + slash) + }) + .map(|abs_pos| base_url[abs_pos..].to_string()) + .unwrap_or_default(); + Ok(Self { client, config: Arc::new(RwLock::new(config)), base_url, + base_path, auth_failures: Arc::new(RwLock::new(0)), last_refresh_attempt: Arc::new(RwLock::new(None)), token_refresh_lock: Arc::new(Mutex::new(())), @@ -133,8 +152,9 @@ impl ApiClient { duration_ms = %duration.as_millis(), "API request completed" ); + let uri_label = format!("{}{}", self.base_path, path); self.http_metrics() - .observe(method, path, status_str, duration); + .observe(method, &uri_label, status_str, duration); } /// Convenience: call [`record_request`](Self::record_request) with a diff --git a/src/http/metrics.rs b/src/http/metrics.rs new file mode 100644 index 0000000..b886e38 --- /dev/null +++ b/src/http/metrics.rs @@ -0,0 +1,45 @@ +// Copyright {{.Year}} Conductor OSS +// Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +//! HTTP client metrics observer trait. +//! +//! The [`ApiClient`](super::ApiClient) invokes a trait object of +//! [`HttpMetricsObserver`] on every outbound request so that instrumentation +//! (e.g. the [`MetricsCollector`](crate::metrics::MetricsCollector) from the +//! `metrics` module) can record `http_api_client_request_seconds` without the +//! HTTP layer depending on the metrics layer. + +use std::sync::Arc; +use std::time::Duration; + +/// Observer invoked by [`ApiClient`](super::ApiClient) after every request +/// completes (either with a response or a transport error). +/// +/// Implementations should be fast and non-blocking — the observer runs on the +/// request hot-path. +pub trait HttpMetricsObserver: Send + Sync { + /// Record a completed HTTP request. + /// + /// - `method`: uppercase HTTP verb (e.g. `"GET"`). + /// - `uri`: interpolated request path, *without* query string (e.g. + /// `/tasks/poll/batch/my_worker`). Template extraction is tracked as + /// Phase 4 of the canonical SDK metrics harmonization plan. + /// - `status`: HTTP status code as a string, or `"0"` if the transport + /// failed before a status was received. + /// - `duration`: wall-clock time from send to response-received (or error). + fn observe(&self, method: &str, uri: &str, status: &str, duration: Duration); +} + +/// No-op observer installed by default. +pub struct NoopHttpMetricsObserver; + +impl HttpMetricsObserver for NoopHttpMetricsObserver { + fn observe(&self, _method: &str, _uri: &str, _status: &str, _duration: Duration) {} +} + +impl NoopHttpMetricsObserver { + /// Return a shared no-op observer instance. + pub fn arc() -> Arc { + Arc::new(Self) + } +} diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index 7fc75d7..5ea0479 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -209,7 +209,7 @@ impl MetricsCollector { ns, "thread_uncaught_exceptions_total", "Count of panics escaping worker task bodies", - &["taskType", "exception"], + &["exception"], ); let workflow_start_error_total = make_counter( ®istry, @@ -262,7 +262,7 @@ impl MetricsCollector { ns, "workflow_input_size_bytes", "Size of workflow input payload in bytes at start_workflow time", - &["workflowType"], + &["workflowType", "version"], ); let active_workers = make_gauge( ®istry, @@ -548,13 +548,17 @@ impl TaskRunnerEventsListener for MetricsCollector { fn on_thread_uncaught_exception(&self, event: &ThreadUncaughtException) { self.thread_uncaught_exceptions_total - .with_label_values(&[&event.task_type, &event.exception]) + .with_label_values(&[&event.exception]) .inc(); } fn on_workflow_started(&self, event: &WorkflowStarted) { + let version_str = event + .version + .map(|v| v.to_string()) + .unwrap_or_default(); self.workflow_input_size_bytes - .with_label_values(&[&event.workflow_type]) + .with_label_values(&[&event.workflow_type, &version_str]) .set(event.input_size_bytes as f64); } @@ -674,7 +678,7 @@ mod tests { fn test_workflow_metrics() { let collector = MetricsCollector::new(MetricsSettings::default()); - collector.on_workflow_started(&WorkflowStarted::new("wf_a", 1, 128)); + collector.on_workflow_started(&WorkflowStarted::new("wf_a", Some(1), 128)); collector.on_workflow_start_failure(&WorkflowStartFailure::new("wf_b", "Server")); let output = collector.gather(); From 162f1e5273aeac95cb13680d9f54ed5ac3101d07 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 27 Apr 2026 10:19:37 -0600 Subject: [PATCH 03/15] fix link to metrics standardization doc --- METRICS.md | 2 +- src/metrics/collector.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/METRICS.md b/METRICS.md index 63c6f33..c957711 100644 --- a/METRICS.md +++ b/METRICS.md @@ -5,7 +5,7 @@ monitor worker performance, API requests, and task execution. All metric names, label names, label values, and Prometheus types emitted by this SDK match the canonical catalog in -[`sdk-metrics-harmonization.md`](https://github.com/conductor-oss/longrunning-wfstest/blob/main/sdk-metrics-harmonization.md). +[`sdk-metrics-harmonization.md`](https://github.com/orkes-io/certification-cloud-util/blob/main/sdk-metrics-harmonization.md). Because the Rust SDK is unreleased, there are no legacy/deprecated metric names to carry forward — the emitted surface is canonical on day one. diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index 5ea0479..b9a3549 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -5,7 +5,7 @@ //! //! Metric names, label names, label values, and types here are intentionally //! identical to the Java, Go, and Python SDKs. See `sdk-metrics-harmonization.md` -//! in the `longrunning-wfstest` repo for the source-of-truth catalog. +//! at https://github.com/orkes-io/certification-cloud-util/blob/main/sdk-metrics-harmonization.md use parking_lot::RwLock; use prometheus::{CounterVec, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry}; From 8ad3dafc5166cf5ec19aaa9b0100624e15900b41 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Wed, 29 Apr 2026 10:24:34 -0600 Subject: [PATCH 04/15] implement canonical metrics, feature flagged to preserve legacy where legacy ones were released --- METRICS.md | 12 ++++---- src/metrics/collector.rs | 62 +++++++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/METRICS.md b/METRICS.md index c957711..5432748 100644 --- a/METRICS.md +++ b/METRICS.md @@ -38,13 +38,15 @@ names to carry forward — the emitted surface is canonical on day one. | `task_execute_time_seconds` | Histogram | `taskType`, `status` | User worker function wall-clock. | | `task_update_time_seconds` | Histogram | `taskType`, `status` | Latency of the `UpdateTask` call (including retries). | | `http_api_client_request_seconds` | Histogram | `method`, `uri`, `status` | Latency of every Conductor API HTTP request. `status` is the HTTP status code as a string, or `"0"` for network errors. | -| `task_result_size_bytes` | Gauge | `taskType` | Last-seen serialized task-result size. | -| `workflow_input_size_bytes` | Gauge | `workflowType`, `version` | Last-seen serialized `StartWorkflowRequest.input` size. `version` is the workflow version as a string, or `""` when unset. | +| `task_result_size_bytes` | Histogram | `taskType` | Serialized byte size of task result output. | +| `workflow_input_size_bytes` | Histogram | `workflowType`, `version` | Serialized byte size of workflow input. `version` is the workflow version as a string, or `""` when unset. | | `active_workers` | Gauge | `taskType` | Current number of in-flight task executions. | -The Histogram bucket set is the canonical -`(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0)` -seconds. +Time histograms use the canonical seconds bucket set: +`(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0)`. + +Size histograms use the canonical size bucket set: +`(100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000)` bytes. ### Label values diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index b9a3549..e6877f0 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -31,6 +31,9 @@ const SECONDS_BUCKETS: &[f64] = &[ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, ]; +/// Canonical size histogram buckets — identical to Java/Go/Python SDKs. +const SIZE_BUCKETS: &[f64] = &[100.0, 1_000.0, 10_000.0, 100_000.0, 1_000_000.0, 10_000_000.0]; + /// Prometheus metrics collector implementing the canonical Conductor SDK /// metric catalog. /// @@ -61,9 +64,11 @@ pub struct MetricsCollector { task_update_time_seconds: HistogramVec, http_api_client_request_seconds: HistogramVec, + // -- Size Histograms -- + task_result_size_bytes: HistogramVec, + workflow_input_size_bytes: HistogramVec, + // -- Gauges -- - task_result_size_bytes: GaugeVec, - workflow_input_size_bytes: GaugeVec, active_workers: GaugeVec, // Internal tracking — keeps the `active_workers` gauge in sync with the @@ -88,18 +93,19 @@ fn make_counter( counter } -/// Helper: build a histogram with the canonical SDK buckets. -fn make_histogram( +/// Helper: build a histogram with the given bucket set. +fn make_histogram_with_buckets( registry: &Registry, namespace: &str, name: &'static str, help: &'static str, labels: &[&str], + buckets: &[f64], ) -> HistogramVec { let histogram = HistogramVec::new( HistogramOpts::new(name, help) .namespace(namespace) - .buckets(SECONDS_BUCKETS.to_vec()), + .buckets(buckets.to_vec()), labels, ) .unwrap_or_else(|e| panic!("Failed to create histogram {name}: {e}")); @@ -109,6 +115,28 @@ fn make_histogram( histogram } +/// Helper: build a time histogram with the canonical seconds bucket set. +fn make_histogram( + registry: &Registry, + namespace: &str, + name: &'static str, + help: &'static str, + labels: &[&str], +) -> HistogramVec { + make_histogram_with_buckets(registry, namespace, name, help, labels, SECONDS_BUCKETS) +} + +/// Helper: build a size histogram with the canonical size bucket set. +fn make_size_histogram( + registry: &Registry, + namespace: &str, + name: &'static str, + help: &'static str, + labels: &[&str], +) -> HistogramVec { + make_histogram_with_buckets(registry, namespace, name, help, labels, SIZE_BUCKETS) +} + /// Helper: build a gauge vector. fn make_gauge( registry: &Registry, @@ -249,21 +277,23 @@ impl MetricsCollector { &["method", "uri", "status"], ); - // Gauges - let task_result_size_bytes = make_gauge( + // Size Histograms + let task_result_size_bytes = make_size_histogram( ®istry, ns, "task_result_size_bytes", - "Size of task result payload in bytes", + "Serialized byte size of task result output", &["taskType"], ); - let workflow_input_size_bytes = make_gauge( + let workflow_input_size_bytes = make_size_histogram( ®istry, ns, "workflow_input_size_bytes", - "Size of workflow input payload in bytes at start_workflow time", + "Serialized byte size of workflow input", &["workflowType", "version"], ); + + // Gauges let active_workers = make_gauge( ®istry, ns, @@ -512,7 +542,7 @@ impl TaskRunnerEventsListener for MetricsCollector { if let Some(size) = event.output_size_bytes { self.task_result_size_bytes .with_label_values(&[&event.task_type]) - .set(size as f64); + .observe(size as f64); } self.decrement_active(&event.task_type); @@ -559,7 +589,7 @@ impl TaskRunnerEventsListener for MetricsCollector { .unwrap_or_default(); self.workflow_input_size_bytes .with_label_values(&[&event.workflow_type, &version_str]) - .set(event.input_size_bytes as f64); + .observe(event.input_size_bytes as f64); } fn on_workflow_start_failure(&self, event: &WorkflowStartFailure) { @@ -649,7 +679,9 @@ mod tests { let output = collector.gather(); assert!(output.contains("task_execute_time_seconds")); - assert!(output.contains("task_result_size_bytes")); + assert!(output.contains("task_result_size_bytes_bucket")); + assert!(output.contains("task_result_size_bytes_count")); + assert!(output.contains("task_result_size_bytes_sum")); assert!(output.contains("task_execution_started_total")); } @@ -682,7 +714,9 @@ mod tests { collector.on_workflow_start_failure(&WorkflowStartFailure::new("wf_b", "Server")); let output = collector.gather(); - assert!(output.contains("workflow_input_size_bytes")); + assert!(output.contains("workflow_input_size_bytes_bucket")); + assert!(output.contains("workflow_input_size_bytes_count")); + assert!(output.contains("workflow_input_size_bytes_sum")); assert!(output.contains("workflowType=\"wf_a\"")); assert!(output.contains("workflow_start_error_total")); assert!(output.contains("workflowType=\"wf_b\"")); From 996bbe0fc0316e2bfd0d7f401861c90e43c73054 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 4 May 2026 11:52:45 -0600 Subject: [PATCH 05/15] apply cargo fmt --- src/client/workflow_client.rs | 6 +++++- src/http/api_client.rs | 8 +------- src/metrics/collector.rs | 16 ++++++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/client/workflow_client.rs b/src/client/workflow_client.rs index 6d86b6c..9f0333c 100644 --- a/src/client/workflow_client.rs +++ b/src/client/workflow_client.rs @@ -55,7 +55,11 @@ impl WorkflowClient { .map(|v| v.len()) .unwrap_or(0); - match self.api.post_text::("/workflow", request).await { + match self + .api + .post_text::("/workflow", request) + .await + { Ok(workflow_id) => { info!( workflow_name = %request.name, diff --git a/src/http/api_client.rs b/src/http/api_client.rs index 9d7ec86..e53364d 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -138,13 +138,7 @@ impl ApiClient { /// `status_str` is the HTTP status code rendered as a string, or `"0"` /// for pre-response transport errors. #[inline] - fn record_request( - &self, - method: &str, - path: &str, - status_str: &str, - duration: Duration, - ) { + fn record_request(&self, method: &str, path: &str, status_str: &str, duration: Duration) { debug!( method = method, url = %format!("{}{}", self.base_url, path), diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index e6877f0..93f7a17 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -5,7 +5,7 @@ //! //! Metric names, label names, label values, and types here are intentionally //! identical to the Java, Go, and Python SDKs. See `sdk-metrics-harmonization.md` -//! at https://github.com/orkes-io/certification-cloud-util/blob/main/sdk-metrics-harmonization.md +//! at use parking_lot::RwLock; use prometheus::{CounterVec, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry}; @@ -32,7 +32,14 @@ const SECONDS_BUCKETS: &[f64] = &[ ]; /// Canonical size histogram buckets — identical to Java/Go/Python SDKs. -const SIZE_BUCKETS: &[f64] = &[100.0, 1_000.0, 10_000.0, 100_000.0, 1_000_000.0, 10_000_000.0]; +const SIZE_BUCKETS: &[f64] = &[ + 100.0, + 1_000.0, + 10_000.0, + 100_000.0, + 1_000_000.0, + 10_000_000.0, +]; /// Prometheus metrics collector implementing the canonical Conductor SDK /// metric catalog. @@ -583,10 +590,7 @@ impl TaskRunnerEventsListener for MetricsCollector { } fn on_workflow_started(&self, event: &WorkflowStarted) { - let version_str = event - .version - .map(|v| v.to_string()) - .unwrap_or_default(); + let version_str = event.version.map(|v| v.to_string()).unwrap_or_default(); self.workflow_input_size_bytes .with_label_values(&[&event.workflow_type, &version_str]) .observe(event.input_size_bytes as f64); From f8b1bbd9f75adaff35cadfc2636490496ced140b Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Thu, 7 May 2026 09:26:47 -0600 Subject: [PATCH 06/15] update docs for metrics, point all references to metrics to one md file, note about historical comparison in design doc --- DESIGN.md | 47 +++++++++++++------------------------------- METRICS.md | 42 +++++++++++++++++++++++++++++++++++++++ WORKER_COMPARISON.md | 6 ++++++ docs/WORKER.md | 20 ++++++++++--------- 4 files changed, 73 insertions(+), 42 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index bb5820d..97f9625 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -892,44 +892,25 @@ handler.add_event_listener(Arc::new(MyListener)); ## Metrics -### MetricsSettings +The `MetricsCollector` in `src/metrics/collector.rs` implements the full +canonical Prometheus catalog using the `prometheus` crate. It is wired into +the worker framework via `TaskHandler::enable_metrics`, which registers it as +both a `TaskRunnerEventsListener` (for task-level events) and as the +`HttpMetricsObserver` on `ApiClient` (for HTTP request latency). An optional +built-in HTTP server exposes `/metrics` and `/health` endpoints for scraping. ```rust -use conductor::MetricsSettings; +use conductor::metrics::MetricsSettings; -let settings = MetricsSettings::default() - .with_http_port(9090) // Serve metrics on :9090/metrics - .with_metrics_path("/metrics") - .with_namespace("conductor") - .with_update_interval(Duration::from_secs(5)); +handler.enable_metrics( + MetricsSettings::new() + .with_http_port(9090) + .with_metrics_path("/metrics"), +); ``` -### Prometheus Metrics - -| Metric | Type | Labels | Description | -|--------|------|--------|-------------| -| `conductor_task_poll_total` | Counter | `task_type` | Total poll attempts | -| `conductor_task_poll_error_total` | Counter | `task_type`, `error_type` | Poll errors | -| `conductor_task_execute_error_total` | Counter | `task_type`, `error_type` | Execution errors | -| `conductor_task_update_error_total` | Counter | `task_type` | Update errors | -| `conductor_task_paused_total` | Counter | `task_type` | Polls while paused | -| `conductor_task_poll_time_seconds` | Histogram | `task_type`, `status` | Poll latency | -| `conductor_task_execute_time_seconds` | Histogram | `task_type`, `status` | Execution time | -| `conductor_task_result_size_bytes` | Gauge | `task_type` | Result payload size | -| `conductor_active_workers` | Gauge | `task_type` | Active worker count | - -### Accessing Metrics - -```rust -// Via HTTP endpoint (if configured) -// GET http://localhost:9090/metrics - -// Programmatically -if let Some(collector) = handler.metrics_collector() { - let metrics_text = collector.gather(); - println!("{}", metrics_text); -} -``` +See [METRICS.md](METRICS.md) for the complete metric catalog, labels, bucket +sets, intentional divergences, and example scrape output. --- diff --git a/METRICS.md b/METRICS.md index 5432748..b976831 100644 --- a/METRICS.md +++ b/METRICS.md @@ -15,6 +15,7 @@ names to carry forward — the emitted surface is canonical on day one. - [Configuration](#configuration) - [Intentional divergences](#intentional-divergences) - [Examples](#examples) +- [Troubleshooting](#troubleshooting) ## Quick Reference @@ -153,3 +154,44 @@ task_execute_time_seconds_count{taskType="my_worker",status="SUCCESS"} 42 # Workflow start error workflow_start_error_total{workflowType="my_wf",exception="Server"} 2 ``` + +## Troubleshooting + +### Metrics are empty + +- Verify that `TaskHandler::enable_metrics` is called before `handler.start()`. +- Verify workers have polled or executed at least one task. Metrics are created + lazily when the corresponding event occurs, so a freshly started worker with + no traffic will have no series. +- Confirm the scrape endpoint is reachable at the expected host and port + (default: `http://localhost:9991/metrics`). + +### Missing HTTP or workflow metrics + +- `http_api_client_request_seconds` is recorded by the `HttpMetricsObserver` + installed on `ApiClient` by `enable_metrics`. If `enable_metrics` is not + called, no HTTP metrics are emitted. +- `workflow_start_error_total` and `workflow_input_size_bytes` require the + `WorkflowClient` to be obtained via `handler.conductor_client()` so that + events flow through the shared `EventDispatcher`. A standalone + `ConductorClient` created separately from the handler will not emit these + metrics. + +### High cardinality + +- The `uri` label on `http_api_client_request_seconds` carries the + interpolated request path, which may include worker names or task IDs. + Operators who need bounded cardinality should apply a Prometheus + `metric_relabel_configs` rule at scrape time. See the + [uri label note](#uri-label--interpolated-path-not-templated) above. +- Avoid embedding user identifiers or unbounded values in task type, workflow + type, or external payload labels. + +### No legacy/canonical gating + +Unlike the Python, Go, Java, JavaScript, and Ruby SDKs, the Rust SDK has no +released legacy metrics surface. It ships the canonical catalog directly with +no `WORKER_CANONICAL_METRICS` environment variable and no factory/switchout +pattern. If you operate a mixed fleet of Conductor workers across multiple +SDKs, the other SDKs require `WORKER_CANONICAL_METRICS=true` to emit the +same metric names and shapes that the Rust SDK emits by default. diff --git a/WORKER_COMPARISON.md b/WORKER_COMPARISON.md index d8b786d..9c07a6c 100644 --- a/WORKER_COMPARISON.md +++ b/WORKER_COMPARISON.md @@ -111,6 +111,12 @@ let tasks = task_client.batch_poll(..., available_slots, ...).await?; ### Prometheus Metrics +> **Note:** The table below is a historical snapshot from when the Python SDK +> was emitting its pre-harmonization (legacy) metrics surface. Worker metrics +> have since been harmonized across all Conductor SDKs under a single canonical +> catalog. For the complete and current Rust SDK metrics catalog, see +> [METRICS.md](METRICS.md). + | Metric | Python | Rust | |--------|--------|------| | `task_poll_total` | ✅ | ✅ | diff --git a/docs/WORKER.md b/docs/WORKER.md index 971d53e..c439974 100644 --- a/docs/WORKER.md +++ b/docs/WORKER.md @@ -471,18 +471,20 @@ let worker = FnWorker::new("batch_processor", |task: Task| async move { use conductor::metrics::MetricsSettings; let mut handler = TaskHandler::new(config)?; -handler.enable_metrics(MetricsSettings { - port: 9090, - enabled: true, -}); +handler.enable_metrics( + MetricsSettings::new() + .with_http_port(9090) + .with_metrics_path("/metrics"), +); ``` -Metrics available at `http://localhost:9090/metrics`: +Metrics are available at `http://localhost:9090/metrics`. The SDK emits the +full canonical Prometheus catalog (counters, histograms, and gauges) covering +worker polling, task execution, task result updates, HTTP API client latency, +and more. -- `conductor_worker_tasks_polled_total` - Total tasks polled -- `conductor_worker_tasks_executed_total` - Total tasks executed -- `conductor_worker_task_duration_seconds` - Task execution duration -- `conductor_worker_poll_errors_total` - Poll errors +See [METRICS.md](../METRICS.md) for the complete metric catalog, label +definitions, bucket sets, and configuration details. ### Event Listeners From 6b93bc223b0a17cf2e95d817d22c75eee0418a64 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Thu, 7 May 2026 11:15:04 -0600 Subject: [PATCH 07/15] add or update a changelog --- CHANGELOG.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d01b98d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- **Metrics harmonization** - canonical metric surface aligned with the cross-SDK catalog (no `WORKER_CANONICAL_METRICS` env var; the Rust SDK is unreleased so the emitted surface is canonical on day one) + - `MetricsCollector` now emits the harmonized cross-SDK catalog: counters (`task_poll_total`, `task_poll_error_total`, `task_execution_started_total`, `task_execute_error_total`, `task_update_error_total`, `task_paused_total`, `task_ack_error_total`, `task_ack_failed_total`, `task_execution_queue_full_total`, `external_payload_used_total`, `thread_uncaught_exceptions_total`, `workflow_start_error_total`), histograms (`task_poll_time_seconds`, `task_execute_time_seconds`, `task_update_time_seconds`, `http_api_client_request_seconds`, `task_result_size_bytes`, `workflow_input_size_bytes`), and `active_workers{taskType}` gauge. Time buckets `0.001…10s`; size buckets `100…10_000_000` bytes; labels are camelCase. + - `HttpMetricsObserver` trait and `NoopHttpMetricsObserver`. `MetricsCollector` implements `HttpMetricsObserver`; `TaskHandler::enable_metrics` automatically installs it on the underlying `ApiClient`. Transport failures record `status="0"` to match the cross-SDK convention. + - `events::exception::exception_label(&ConductorError)` produces bounded-cardinality `&'static str` labels (`"Http"`, `"Json"`, `"Server"`, etc.) used everywhere the canonical `exception` label is emitted. + - New event types in `events::task_runner_events`: `PollSkippedPaused`, `TaskUpdateCompleted`, `ThreadUncaughtException`, `WorkflowStarted`, `WorkflowStartFailure`. `WorkflowClient` and `ConductorClient` emit workflow events through the dispatcher. + +### Changed + +- **Metrics harmonization** - label renames; no legacy mode (other Conductor SDKs that did release metrics — Python, Go, Java, JavaScript, Ruby — ship a gated switch via `WORKER_CANONICAL_METRICS`; Rust skips the gate) + - Metric labels renamed to camelCase (`task_type → taskType`, `error_type → exception`, plus `version`, `method`, `uri`, `status`, `entityName`, `operation`, `payloadType`). The pre-harmonization metrics that existed on `main` (snake_case labels, `conductor_*` prefix, mismatched buckets) are not preserved. + - Default `MetricsSettings::namespace` is now `""` (was implicitly `"conductor"`) to align with canonical naming. + - New top-level `METRICS.md` with the canonical catalog, bucket sets, label conventions, configuration via `TaskHandler::enable_metrics`, an "Intentional divergences" table for Rust-specific omissions (`task_ack_*`, `task_execution_queue_full_total`, `external_payload_used_total` are registered but not incremented; `worker_restart_total` and `task_execution_completed_total` are not emitted), and an explicit "No legacy/canonical gating" section. + - `DESIGN.md`, `docs/WORKER.md`, and `WORKER_COMPARISON.md` point to `METRICS.md`. From 6ea9efd6410db2531966a360183bec4d8d1403c4 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Fri, 15 May 2026 12:30:52 -0600 Subject: [PATCH 08/15] generate traffic for metrics that have uris in the test harness worker, correct the cardinality explosion --- CHANGELOG.md | 15 +- METRICS.md | 82 ++++++--- harness/src/main.rs | 55 ++++-- harness/src/workflow_governor.rs | 21 ++- harness/src/workflow_status_probe.rs | 87 +++++++++ src/client/authorization_client.rs | 117 ++++++------ src/client/event_client.rs | 24 ++- src/client/integration_client.rs | 47 +++-- src/client/metadata_client.rs | 12 +- src/client/orkes_metadata_client.rs | 18 +- src/client/prompt_client.rs | 31 ++-- src/client/scheduler_client.rs | 39 ++-- src/client/schema_client.rs | 14 +- src/client/secret_client.rs | 22 ++- src/client/task_client.rs | 66 +++++-- src/client/workflow_client.rs | 115 +++++++++--- src/http/api_client.rs | 254 ++++++++++++++++++--------- src/http/metrics.rs | 12 +- src/http/mod.rs | 2 +- 19 files changed, 670 insertions(+), 363 deletions(-) create mode 100644 harness/src/workflow_status_probe.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index d01b98d..c2aa778 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,16 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **Metrics harmonization** - canonical metric surface aligned with the cross-SDK catalog (no `WORKER_CANONICAL_METRICS` env var; the Rust SDK is unreleased so the emitted surface is canonical on day one) - - `MetricsCollector` now emits the harmonized cross-SDK catalog: counters (`task_poll_total`, `task_poll_error_total`, `task_execution_started_total`, `task_execute_error_total`, `task_update_error_total`, `task_paused_total`, `task_ack_error_total`, `task_ack_failed_total`, `task_execution_queue_full_total`, `external_payload_used_total`, `thread_uncaught_exceptions_total`, `workflow_start_error_total`), histograms (`task_poll_time_seconds`, `task_execute_time_seconds`, `task_update_time_seconds`, `http_api_client_request_seconds`, `task_result_size_bytes`, `workflow_input_size_bytes`), and `active_workers{taskType}` gauge. Time buckets `0.001…10s`; size buckets `100…10_000_000` bytes; labels are camelCase. - - `HttpMetricsObserver` trait and `NoopHttpMetricsObserver`. `MetricsCollector` implements `HttpMetricsObserver`; `TaskHandler::enable_metrics` automatically installs it on the underlying `ApiClient`. Transport failures record `status="0"` to match the cross-SDK convention. - - `events::exception::exception_label(&ConductorError)` produces bounded-cardinality `&'static str` labels (`"Http"`, `"Json"`, `"Server"`, etc.) used everywhere the canonical `exception` label is emitted. - - New event types in `events::task_runner_events`: `PollSkippedPaused`, `TaskUpdateCompleted`, `ThreadUncaughtException`, `WorkflowStarted`, `WorkflowStartFailure`. `WorkflowClient` and `ConductorClient` emit workflow events through the dispatcher. +- Canonical metrics: harmonized metric surface aligned with the cross-SDK catalog -- see [METRICS.md](METRICS.md) for the full catalog, configuration, and implementation details +- Bounded `uri` label on `http_api_client_request_seconds`: uses path templates (e.g. `/workflow/{workflowId}`) instead of fully-resolved paths, preventing metric cardinality explosion from dynamic IDs +- `WorkflowStatusProbe` in harness: opt-in probe (via `HARNESS_PROBE_RATE_PER_SEC`) that exercises UUID-bearing endpoints to validate template URI metrics ### Changed -- **Metrics harmonization** - label renames; no legacy mode (other Conductor SDKs that did release metrics — Python, Go, Java, JavaScript, Ruby — ship a gated switch via `WORKER_CANONICAL_METRICS`; Rust skips the gate) - - Metric labels renamed to camelCase (`task_type → taskType`, `error_type → exception`, plus `version`, `method`, `uri`, `status`, `entityName`, `operation`, `payloadType`). The pre-harmonization metrics that existed on `main` (snake_case labels, `conductor_*` prefix, mismatched buckets) are not preserved. - - Default `MetricsSettings::namespace` is now `""` (was implicitly `"conductor"`) to align with canonical naming. - - New top-level `METRICS.md` with the canonical catalog, bucket sets, label conventions, configuration via `TaskHandler::enable_metrics`, an "Intentional divergences" table for Rust-specific omissions (`task_ack_*`, `task_execution_queue_full_total`, `external_payload_used_total` are registered but not incremented; `worker_restart_total` and `task_execution_completed_total` are not emitted), and an explicit "No legacy/canonical gating" section. - - `DESIGN.md`, `docs/WORKER.md`, and `WORKER_COMPARISON.md` point to `METRICS.md`. +- The Rust SDK is unreleased, so the emitted metric surface is canonical on day one; there is no legacy mode or migration path +- `ApiClient` public methods accept `impl Into` to pair resolved paths with bounded-cardinality metric templates -- see [METRICS.md](METRICS.md#detailed-technical-notes) diff --git a/METRICS.md b/METRICS.md index b976831..be6a3e6 100644 --- a/METRICS.md +++ b/METRICS.md @@ -16,6 +16,7 @@ names to carry forward — the emitted surface is canonical on day one. - [Intentional divergences](#intentional-divergences) - [Examples](#examples) - [Troubleshooting](#troubleshooting) +- [Detailed Technical Notes](#detailed-technical-notes) ## Quick Reference @@ -55,27 +56,27 @@ Size histograms use the canonical size bucket set: - `status` on `http_api_client_request_seconds`: HTTP status code rendered as a string (e.g. `"200"`), or `"0"` when the transport layer fails before receiving a status. -- `uri`: the full interpolated request path (server path prefix + endpoint - path) without query string (e.g. `/api/tasks/poll/batch/my_worker` when - the server URL is `http://host:8080/api`). See the note below about - Phase 4 path templating. +- `uri`: the API-relative path template without the server URL's path prefix + (e.g. `/tasks/poll/batch/{taskType}`, not `/api/tasks/poll/batch/my_worker`). + Dynamic path segments retain their `{placeholder}` tokens so that metric + label cardinality is bounded. See the + [path template note](#uri-label--path-templates) below. - `exception`: the unqualified `ConductorError` variant name (`Http`, `Json`, `Auth`, `Server`, …), the short type name for non- `ConductorError` errors, or `"Panic"` for uncaught panics. -### `uri` label — interpolated path, not templated +### `uri` label — path templates -Like the Java / Go / Python SDKs in Phase 1 of the harmonization plan, the -`uri` label on `http_api_client_request_seconds` carries the **interpolated** -request path including the server URL's path prefix (e.g. -`/api/tasks/poll/batch/my_task` when `CONDUCTOR_SERVER_URL` ends in `/api`), -not the templated path (`/api/tasks/poll/batch/{taskType}`). -High-cardinality worker names or task IDs will therefore appear in the label. +The `uri` label on `http_api_client_request_seconds` carries the **path +template** (e.g. `/tasks/poll/batch/{taskType}`) rather than the +fully-resolved request path. The server URL's path prefix (e.g. `/api`) is +never included. This keeps metric cardinality bounded regardless of how many +unique workflow IDs, task types, or other dynamic path segments flow through +the SDK. -Operators who need bounded cardinality today should apply a Prometheus -`metric_relabel_configs` rule at scrape time that rewrites well-known -parametric path segments. Template extraction is tracked as **Phase 4** of the -canonical SDK metrics harmonization plan. +All Conductor SDKs (Go, Java, Python, Ruby, Rust) now follow this convention. +See [Detailed Technical Notes](#detailed-technical-notes) at the end of this +document for per-SDK implementation details. ## Configuration @@ -138,11 +139,11 @@ runnable end-to-end demo that spins up workers, serves `/metrics` on a configurable port, and exercises every metric in the catalog. ```prometheus -# HTTP API client request latency -http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/my_worker",status="200",le="0.1"} 97 -http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/my_worker",status="200",le="+Inf"} 100 -http_api_client_request_seconds_count{method="GET",uri="/tasks/poll/batch/my_worker",status="200"} 100 -http_api_client_request_seconds_sum{method="GET",uri="/tasks/poll/batch/my_worker",status="200"} 8.21 +# HTTP API client request latency (uri is the path template, not the resolved path) +http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/{taskType}",status="200",le="0.1"} 97 +http_api_client_request_seconds_bucket{method="GET",uri="/tasks/poll/batch/{taskType}",status="200",le="+Inf"} 100 +http_api_client_request_seconds_count{method="GET",uri="/tasks/poll/batch/{taskType}",status="200"} 100 +http_api_client_request_seconds_sum{method="GET",uri="/tasks/poll/batch/{taskType}",status="200"} 8.21 # Task poll task_poll_total{taskType="my_worker"} 124 @@ -179,11 +180,10 @@ workflow_start_error_total{workflowType="my_wf",exception="Server"} 2 ### High cardinality -- The `uri` label on `http_api_client_request_seconds` carries the - interpolated request path, which may include worker names or task IDs. - Operators who need bounded cardinality should apply a Prometheus - `metric_relabel_configs` rule at scrape time. See the - [uri label note](#uri-label--interpolated-path-not-templated) above. +- The `uri` label on `http_api_client_request_seconds` uses path templates + (e.g. `/workflow/{workflowId}`) to keep cardinality bounded. If you see + fully-resolved paths in your metrics, verify that HTTP requests are going + through the SDK's `ApiClient` rather than a standalone HTTP client. - Avoid embedding user identifiers or unbounded values in task type, workflow type, or external payload labels. @@ -195,3 +195,35 @@ no `WORKER_CANONICAL_METRICS` environment variable and no factory/switchout pattern. If you operate a mixed fleet of Conductor workers across multiple SDKs, the other SDKs require `WORKER_CANONICAL_METRICS=true` to emit the same metric names and shapes that the Rust SDK emits by default. + +--- + +## Detailed Technical Notes + +### Path template `uri` label — cross-SDK implementation + +All Conductor SDKs preserve the API resource path template before path +parameter substitution and use it as the `uri` label on +`http_api_client_request_seconds`. This prevents cardinality explosion from +dynamic path segments (UUIDs, task type names, etc.) and excludes the server +URL's base path prefix. + +Each SDK implements this using the mechanism most natural to its HTTP stack: + +| SDK | Mechanism | Where template is captured | Where template is consumed | +|---|---|---|---| +| **Go** | `context.WithValue` with `pathTemplateKey` / `rawPathKey` | Each API resource method calls `metrics.WithPathTemplate(ctx, template)` before building the resolved URL. `executeCall` sets `WithRawPath` as fallback. | `metricsRoundTripper.RoundTrip` reads template from context; prefers template > rawPath > URL path. | +| **Java** | OkHttp `Request.tag(PathTemplateTag.class)` | `ConductorClient.buildRequest()` saves the un-substituted path as a `PathTemplateTag` on the request before replacing path params. | `ApiClientMetricsInterceptor` reads the tag at response time; falls back to `request.url().encodedPath()`. | +| **Python** | `metric_uri` keyword argument | `api_client.__call_api_no_retry()` saves `resource_path` before substitution and passes it as `metric_uri` through the call chain. | `CanonicalMetricsCollector.record_api_request_time()` prefers `metric_uri` over the resolved `uri`. | +| **Ruby** | `metric_uri` keyword argument | `ApiClient#call_api_no_retry` saves `resource_path` before substitution and passes it as `metric_uri:` to `RestClient#request`. | `RestClient#emit_http_event` uses `metric_uri` when present; falls back to `URI.parse(url).request_uri`. | +| **Rust** | `ApiPath` struct with `impl Into>` on public `ApiClient` methods | Static paths pass a plain `&str` (the `From<&str>` impl uses the same string for both path and metric label). Dynamic paths use `ApiPath::templated(&path, "/template/{id}")`. | `ApiClient::record_request` passes `metric_uri` directly to `HttpMetricsObserver::observe` as the `uri` label. | + +In all cases the template string is the API-relative resource path (e.g. +`/workflow/{workflowId}`), never the fully-qualified URL or the base-path- +prefixed path. This means: + +- `/workflow/{workflowId}` rather than `/api/workflow/abc-123-def` +- `/tasks/poll/batch/{taskType}` rather than `/tasks/poll/batch/my_worker` + +Endpoints without path parameters (e.g. `/tasks/search`) use the raw resource +path directly, which is already a stable template. diff --git a/harness/src/main.rs b/harness/src/main.rs index a514337..fe72f76 100644 --- a/harness/src/main.rs +++ b/harness/src/main.rs @@ -3,6 +3,7 @@ mod simulated_task_worker; mod workflow_governor; +mod workflow_status_probe; use std::process; use std::sync::Arc; @@ -15,6 +16,7 @@ use conductor::worker::TaskHandler; use simulated_task_worker::SimulatedTaskWorker; use workflow_governor::WorkflowGovernor; +use workflow_status_probe::WorkflowStatusProbe; const WORKFLOW_NAME: &str = "rust_simulated_tasks_workflow"; @@ -129,6 +131,7 @@ async fn main() { let workflows_per_sec = env_int_or_default("HARNESS_WORKFLOWS_PER_SEC", 2); let batch_size = env_int_or_default("HARNESS_BATCH_SIZE", 20); let poll_interval_ms = env_int_or_default("HARNESS_POLL_INTERVAL_MS", 100); + let probe_rate = env_int_or_default("HARNESS_PROBE_RATE_PER_SEC", 0); let mut handler = match TaskHandler::new(config) { Ok(h) => h, @@ -166,25 +169,53 @@ async fn main() { process::exit(1); } - let governor = Arc::new(WorkflowGovernor::new( - handler.conductor_client().workflow_client(), - WORKFLOW_NAME.to_string(), - workflows_per_sec, - )); + let workflow_client = handler.conductor_client().workflow_client(); - let governor_handle = tokio::spawn({ - let governor = Arc::clone(&governor); - async move { - governor.run().await; - } - }); + // Build governor, optionally wired to the status probe + let probe_handle = if probe_rate > 0 { + let (tx, rx) = tokio::sync::mpsc::channel::(512); + + let probe = WorkflowStatusProbe::new(workflow_client.clone(), rx, probe_rate); + let handle = tokio::spawn(async move { probe.run().await }); + + let governor = Arc::new( + WorkflowGovernor::new(workflow_client, WORKFLOW_NAME.to_string(), workflows_per_sec) + .with_id_sink(tx), + ); + let governor_handle = tokio::spawn({ + let governor = Arc::clone(&governor); + async move { governor.run().await } + }); + + println!( + "WorkflowStatusProbe enabled at {}/sec", + probe_rate, + ); + + Some((governor_handle, handle)) + } else { + let governor = Arc::new(WorkflowGovernor::new( + workflow_client, + WORKFLOW_NAME.to_string(), + workflows_per_sec, + )); + let governor_handle = tokio::spawn({ + let governor = Arc::clone(&governor); + async move { governor.run().await } + }); + + Some((governor_handle, tokio::spawn(async {}))) + }; tokio::signal::ctrl_c() .await .expect("Failed to listen for ctrl-c"); println!("Shutting down..."); - governor_handle.abort(); + if let Some((gov, probe)) = probe_handle { + gov.abort(); + probe.abort(); + } if let Err(e) = handler.stop().await { eprintln!("Error stopping workers: {}", e); diff --git a/harness/src/workflow_governor.rs b/harness/src/workflow_governor.rs index 6ffe762..c489689 100644 --- a/harness/src/workflow_governor.rs +++ b/harness/src/workflow_governor.rs @@ -3,12 +3,14 @@ use conductor::client::WorkflowClient; use conductor::models::StartWorkflowRequest; +use tokio::sync::mpsc; use tokio::time::{self, Duration}; pub struct WorkflowGovernor { workflow_client: WorkflowClient, workflow_name: String, workflows_per_second: usize, + id_sink: Option>, } impl WorkflowGovernor { @@ -21,9 +23,15 @@ impl WorkflowGovernor { workflow_client, workflow_name, workflows_per_second, + id_sink: None, } } + pub fn with_id_sink(mut self, tx: mpsc::Sender) -> Self { + self.id_sink = Some(tx); + self + } + pub async fn run(&self) { println!( "WorkflowGovernor started: workflow={}, rate={}/sec", @@ -41,9 +49,16 @@ impl WorkflowGovernor { async fn start_batch(&self) { for _ in 0..self.workflows_per_second { let request = StartWorkflowRequest::new(&self.workflow_name).with_version(1); - if let Err(e) = self.workflow_client.start_workflow(&request).await { - println!("Governor: error starting workflows: {}", e); - return; + match self.workflow_client.start_workflow(&request).await { + Ok(workflow_id) => { + if let Some(ref tx) = self.id_sink { + let _ = tx.try_send(workflow_id); + } + } + Err(e) => { + println!("Governor: error starting workflows: {}", e); + return; + } } } println!( diff --git a/harness/src/workflow_status_probe.rs b/harness/src/workflow_status_probe.rs new file mode 100644 index 0000000..ece6829 --- /dev/null +++ b/harness/src/workflow_status_probe.rs @@ -0,0 +1,87 @@ +// Copyright 2024 Conductor OSS +// Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. + +use std::collections::VecDeque; +use conductor::client::WorkflowClient; +use tokio::sync::mpsc; +use tokio::time::{self, Duration}; + +const MAX_IDS: usize = 256; + +/// Exercises UUID-bearing workflow endpoints to generate high-cardinality +/// traffic, validating that the path template system keeps the `uri` metric +/// label bounded. Off by default; enabled via `HARNESS_PROBE_RATE_PER_SEC`. +pub struct WorkflowStatusProbe { + workflow_client: WorkflowClient, + rx: mpsc::Receiver, + rate_per_sec: usize, +} + +impl WorkflowStatusProbe { + pub fn new( + workflow_client: WorkflowClient, + rx: mpsc::Receiver, + rate_per_sec: usize, + ) -> Self { + Self { + workflow_client, + rx, + rate_per_sec, + } + } + + pub async fn run(mut self) { + println!( + "WorkflowStatusProbe started: rate={}/sec", + self.rate_per_sec, + ); + + let mut ids: VecDeque = VecDeque::with_capacity(MAX_IDS); + let mut tick_count: u64 = 0; + let mut interval = time::interval(Duration::from_secs(1)); + + loop { + interval.tick().await; + + // Drain any new IDs from the governor + while let Ok(id) = self.rx.try_recv() { + if ids.len() >= MAX_IDS { + ids.pop_front(); + } + ids.push_back(id); + } + + if ids.is_empty() { + continue; + } + + for i in 0..self.rate_per_sec { + let idx = ((tick_count as usize) * self.rate_per_sec + i) % ids.len(); + let id = &ids[idx]; + + // Alternate between get_workflow and get_workflow_status + if (tick_count as usize + i) % 2 == 0 { + match self.workflow_client.get_workflow(id, false).await { + Ok(_) => {} + Err(e) => { + println!("Probe: get_workflow error: {}", e); + } + } + } else { + match self + .workflow_client + .get_workflow_status(id, false, false) + .await + { + Ok(_) => {} + Err(e) => { + println!("Probe: get_workflow_status error: {}", e); + } + } + } + } + + tick_count += 1; + } + } +} diff --git a/src/client/authorization_client.rs b/src/client/authorization_client.rs index a4cd1ff..33e80a4 100644 --- a/src/client/authorization_client.rs +++ b/src/client/authorization_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{ AccessKey, AccessType, ConductorApplication, ConductorUser, CreateOrUpdateApplicationRequest, CreatedAccessKey, GrantedPermission, Group, MetadataTag, SubjectRef, TargetRef, @@ -31,20 +31,18 @@ impl AuthorizationClient { &self, request: &CreateOrUpdateApplicationRequest, ) -> Result { - let path = "/applications"; - self.api.post(path, request).await + self.api.post("/applications", request).await } /// Get an application by ID pub async fn get_application(&self, application_id: &str) -> Result { let path = format!("/applications/{}", application_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/applications/{applicationId}")).await } /// List all applications pub async fn list_applications(&self) -> Result> { - let path = "/applications"; - self.api.get(path).await + self.api.get("/applications").await } /// Update an application @@ -54,19 +52,19 @@ impl AuthorizationClient { application_id: &str, ) -> Result { let path = format!("/applications/{}", application_id); - self.api.put(&path, request).await + self.api.put(ApiPath::templated(&path, "/applications/{applicationId}"), request).await } /// Delete an application pub async fn delete_application(&self, application_id: &str) -> Result<()> { let path = format!("/applications/{}", application_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}")).await } /// Get application ID by access key ID pub async fn get_app_by_access_key_id(&self, access_key_id: &str) -> Result { let path = format!("/applications/key/{}", access_key_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/applications/key/{accessKeyId}")).await } /// Add a role to application user @@ -76,7 +74,7 @@ impl AuthorizationClient { role: &str, ) -> Result<()> { let path = format!("/applications/{}/roles/{}", application_id, role); - self.api.post_no_body_no_response(&path).await + self.api.post_no_body_no_response(ApiPath::templated(&path, "/applications/{applicationId}/roles/{role}")).await } /// Remove a role from application user @@ -86,7 +84,7 @@ impl AuthorizationClient { role: &str, ) -> Result<()> { let path = format!("/applications/{}/roles/{}", application_id, role); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}/roles/{role}")).await } /// Set tags for an application @@ -96,13 +94,13 @@ impl AuthorizationClient { application_id: &str, ) -> Result<()> { let path = format!("/applications/{}/tags", application_id); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/applications/{applicationId}/tags"), tags).await } /// Get tags for an application pub async fn get_application_tags(&self, application_id: &str) -> Result> { let path = format!("/applications/{}/tags", application_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/applications/{applicationId}/tags")).await } /// Delete tags from an application @@ -112,19 +110,19 @@ impl AuthorizationClient { application_id: &str, ) -> Result<()> { let path = format!("/applications/{}/tags", application_id); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/applications/{applicationId}/tags"), tags).await } /// Create an access key for an application pub async fn create_access_key(&self, application_id: &str) -> Result { let path = format!("/applications/{}/accessKeys", application_id); - self.api.post_no_body(&path).await + self.api.post_no_body(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys")).await } /// Get access keys for an application pub async fn get_access_keys(&self, application_id: &str) -> Result> { let path = format!("/applications/{}/accessKeys", application_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys")).await } /// Toggle the status of an access key @@ -137,13 +135,13 @@ impl AuthorizationClient { "/applications/{}/accessKeys/{}/status", application_id, key_id ); - self.api.post_no_body(&path).await + self.api.post_no_body(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys/{keyId}/status")).await } /// Delete an access key pub async fn delete_access_key(&self, application_id: &str, key_id: &str) -> Result<()> { let path = format!("/applications/{}/accessKeys/{}", application_id, key_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys/{keyId}")).await } // =========================== @@ -157,29 +155,28 @@ impl AuthorizationClient { user_id: &str, ) -> Result { let path = format!("/users/{}", user_id); - self.api.put(&path, request).await + self.api.put(ApiPath::templated(&path, "/users/{userId}"), request).await } /// Get a user by ID pub async fn get_user(&self, user_id: &str) -> Result { let path = format!("/users/{}", user_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/users/{userId}")).await } /// List all users pub async fn list_users(&self, apps: bool) -> Result> { - let path = "/users"; if apps { - self.api.get_with_params(path, &[("apps", "true")]).await + self.api.get_with_params("/users", &[("apps", "true")]).await } else { - self.api.get(path).await + self.api.get("/users").await } } /// Delete a user pub async fn delete_user(&self, user_id: &str) -> Result<()> { let path = format!("/users/{}", user_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/users/{userId}")).await } /// Get permissions granted to a user @@ -188,7 +185,7 @@ impl AuthorizationClient { user_id: &str, ) -> Result> { let path = format!("/users/{}/permissions", user_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/users/{userId}/permissions")).await } /// Check if user has permissions over a target @@ -202,7 +199,7 @@ impl AuthorizationClient { "/users/{}/permissions/{}/{}", user_id, target_type, target_id ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/users/{userId}/permissions/{targetType}/{targetId}")).await } // =========================== @@ -216,25 +213,24 @@ impl AuthorizationClient { group_id: &str, ) -> Result { let path = format!("/groups/{}", group_id); - self.api.put(&path, request).await + self.api.put(ApiPath::templated(&path, "/groups/{groupId}"), request).await } /// Get a group by ID pub async fn get_group(&self, group_id: &str) -> Result { let path = format!("/groups/{}", group_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/groups/{groupId}")).await } /// List all groups pub async fn list_groups(&self) -> Result> { - let path = "/groups"; - self.api.get(path).await + self.api.get("/groups").await } /// Delete a group pub async fn delete_group(&self, group_id: &str) -> Result<()> { let path = format!("/groups/{}", group_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/groups/{groupId}")).await } /// Get permissions granted to a group @@ -243,37 +239,37 @@ impl AuthorizationClient { group_id: &str, ) -> Result> { let path = format!("/groups/{}/permissions", group_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/groups/{groupId}/permissions")).await } /// Add a user to a group pub async fn add_user_to_group(&self, group_id: &str, user_id: &str) -> Result<()> { let path = format!("/groups/{}/users/{}", group_id, user_id); - self.api.post_no_body_no_response(&path).await + self.api.post_no_body_no_response(ApiPath::templated(&path, "/groups/{groupId}/users/{userId}")).await } /// Add multiple users to a group pub async fn add_users_to_group(&self, group_id: &str, user_ids: &[String]) -> Result<()> { let path = format!("/groups/{}/users", group_id); - self.api.post_no_response(&path, user_ids).await + self.api.post_no_response(ApiPath::templated(&path, "/groups/{groupId}/users"), user_ids).await } /// Get all users in a group pub async fn get_users_in_group(&self, group_id: &str) -> Result> { let path = format!("/groups/{}/users", group_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/groups/{groupId}/users")).await } /// Remove a user from a group pub async fn remove_user_from_group(&self, group_id: &str, user_id: &str) -> Result<()> { let path = format!("/groups/{}/users/{}", group_id, user_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/groups/{groupId}/users/{userId}")).await } /// Remove multiple users from a group pub async fn remove_users_from_group(&self, group_id: &str, user_ids: &[String]) -> Result<()> { let path = format!("/groups/{}/users", group_id); - self.api.delete_with_body(&path, user_ids).await + self.api.delete_with_body(ApiPath::templated(&path, "/groups/{groupId}/users"), user_ids).await } // =========================== @@ -287,13 +283,12 @@ impl AuthorizationClient { target: &TargetRef, access: &[AccessType], ) -> Result<()> { - let path = "/auth/authorization"; let body = serde_json::json!({ "subject": subject, "target": target, "access": access }); - self.api.post_no_response(path, &body).await + self.api.post_no_response("/auth/authorization", &body).await } /// Get permissions for a target @@ -308,7 +303,7 @@ impl AuthorizationClient { target_type_str.trim_matches('"'), target.id ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/auth/authorization/{targetType}/{targetId}")).await } /// Remove permissions from a subject over a target @@ -318,13 +313,12 @@ impl AuthorizationClient { target: &TargetRef, access: &[AccessType], ) -> Result<()> { - let path = "/auth/authorization"; let body = serde_json::json!({ "subject": subject, "target": target, "access": access }); - self.api.delete_with_body(path, &body).await + self.api.delete_with_body("/auth/authorization", &body).await } // =========================== @@ -333,38 +327,33 @@ impl AuthorizationClient { /// List all roles pub async fn list_all_roles(&self) -> Result> { - let path = "/roles"; - self.api.get(path).await + self.api.get("/roles").await } /// List system roles pub async fn list_system_roles(&self) -> Result> { - let path = "/roles/system"; - self.api.get(path).await + self.api.get("/roles/system").await } /// List custom roles pub async fn list_custom_roles(&self) -> Result> { - let path = "/roles/custom"; - self.api.get(path).await + self.api.get("/roles/custom").await } /// List available permissions pub async fn list_available_permissions(&self) -> Result> { - let path = "/roles/permissions"; - self.api.get(path).await + self.api.get("/roles/permissions").await } /// Create a custom role pub async fn create_role(&self, request: &serde_json::Value) -> Result { - let path = "/roles"; - self.api.post(path, request).await + self.api.post("/roles", request).await } /// Get a role by name pub async fn get_role(&self, role_name: &str) -> Result { let path = format!("/roles/{}", role_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/roles/{roleName}")).await } /// Update a custom role @@ -374,13 +363,13 @@ impl AuthorizationClient { request: &serde_json::Value, ) -> Result { let path = format!("/roles/{}", role_name); - self.api.put(&path, request).await + self.api.put(ApiPath::templated(&path, "/roles/{roleName}"), request).await } /// Delete a custom role pub async fn delete_role(&self, role_name: &str) -> Result<()> { let path = format!("/roles/{}", role_name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/roles/{roleName}")).await } // =========================== @@ -389,8 +378,7 @@ impl AuthorizationClient { /// Get user info from the current token pub async fn get_user_info_from_token(&self) -> Result { - let path = "/auth/userInfo"; - self.api.get(path).await + self.api.get("/auth/userInfo").await } /// Generate a token using access key credentials @@ -399,12 +387,11 @@ impl AuthorizationClient { key_id: &str, key_secret: &str, ) -> Result { - let path = "/token"; let body = serde_json::json!({ "keyId": key_id, "keySecret": key_secret }); - self.api.post(path, &body).await + self.api.post("/token", &body).await } // =========================== @@ -416,20 +403,18 @@ impl AuthorizationClient { &self, auth_config: &serde_json::Value, ) -> Result { - let path = "/api-gateway/auth-config"; - self.api.post(path, auth_config).await + self.api.post("/api-gateway/auth-config", auth_config).await } /// Get API Gateway authentication configuration by ID pub async fn get_gateway_auth_config(&self, config_id: &str) -> Result { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}")).await } /// List all API Gateway authentication configurations pub async fn list_gateway_auth_configs(&self) -> Result> { - let path = "/api-gateway/auth-config"; - self.api.get(path).await + self.api.get("/api-gateway/auth-config").await } /// Update API Gateway authentication configuration @@ -439,13 +424,13 @@ impl AuthorizationClient { auth_config: &serde_json::Value, ) -> Result { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.put(&path, auth_config).await + self.api.put(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}"), auth_config).await } /// Delete API Gateway authentication configuration pub async fn delete_gateway_auth_config(&self, config_id: &str) -> Result<()> { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}")).await } } diff --git a/src/client/event_client.rs b/src/client/event_client.rs index 2bdc467..183a316 100644 --- a/src/client/event_client.rs +++ b/src/client/event_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use serde::{Deserialize, Serialize}; /// Client for event operations (queue configurations) @@ -73,7 +73,7 @@ impl EventClient { "/event/queue/config/{}/{}", queue_config.queue_type, queue_config.queue_name ); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}")).await } /// Get a Kafka queue configuration by topic @@ -91,7 +91,7 @@ impl EventClient { queue_name: &str, ) -> Result { let path = format!("/event/queue/config/{}/{}", queue_type, queue_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}")).await } /// Create or update a queue configuration @@ -101,14 +101,13 @@ impl EventClient { queue_config.queue_type, queue_config.queue_name ); self.api - .put_no_response(&path, &queue_config.configuration) + .put_no_response(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}"), &queue_config.configuration) .await } /// Get all queue configurations pub async fn get_all_queue_configurations(&self) -> Result> { - let path = "/event/queue/config"; - self.api.get(path).await + self.api.get("/event/queue/config").await } /// Get event handlers for a specific event @@ -122,31 +121,28 @@ impl EventClient { urlencoding::encode(event), active_only ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/event/{event}")).await } /// Get all event handlers pub async fn get_all_event_handlers(&self) -> Result> { - let path = "/event"; - self.api.get(path).await + self.api.get("/event").await } /// Register an event handler pub async fn register_event_handler(&self, event_handler: &serde_json::Value) -> Result<()> { - let path = "/event"; - self.api.post_no_response(path, event_handler).await + self.api.post_no_response("/event", event_handler).await } /// Update an event handler pub async fn update_event_handler(&self, event_handler: &serde_json::Value) -> Result<()> { - let path = "/event"; - self.api.put_no_response(path, event_handler).await + self.api.put_no_response("/event", event_handler).await } /// Remove an event handler pub async fn remove_event_handler(&self, name: &str) -> Result<()> { let path = format!("/event/{}", name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/event/{event}")).await } } diff --git a/src/client/integration_client.rs b/src/client/integration_client.rs index 622cc2d..66bd9f4 100644 --- a/src/client/integration_client.rs +++ b/src/client/integration_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{ Integration, IntegrationApi, IntegrationApiUpdate, IntegrationUpdate, MetadataTag, PromptTemplate, @@ -31,7 +31,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/prompt/{}", ai_integration, model_name, prompt_name ); - self.api.post_no_body_no_response(&path).await + self.api.post_no_body_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/prompt/{promptName}")).await } /// Delete a specific integration API @@ -44,13 +44,13 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}")).await } /// Delete an integration pub async fn delete_integration(&self, integration_name: &str) -> Result<()> { let path = format!("/integrations/provider/{}", integration_name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/integrations/provider/{name}")).await } /// Get an integration API @@ -63,7 +63,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}")).await } /// Get all APIs for an integration @@ -72,19 +72,18 @@ impl IntegrationClient { integration_name: &str, ) -> Result> { let path = format!("/integrations/provider/{}/integration", integration_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration")).await } /// Get an integration pub async fn get_integration(&self, integration_name: &str) -> Result { let path = format!("/integrations/provider/{}", integration_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}")).await } /// Get all integrations pub async fn get_integrations(&self) -> Result> { - let path = "/integrations/provider"; - self.api.get(path).await + self.api.get("/integrations/provider").await } /// Get prompts associated with an integration @@ -97,7 +96,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/prompt", ai_integration, model_name ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/prompt")).await } /// Get token usage for an integration API @@ -110,7 +109,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/metrics", integration_name, api_name ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/metrics")).await } /// Get token usage for an integration provider @@ -119,7 +118,7 @@ impl IntegrationClient { name: &str, ) -> Result { let path = format!("/integrations/provider/{}/metrics", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/metrics")).await } /// Save (create or update) an integration API @@ -133,7 +132,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.put_no_response(&path, api_details).await + self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}"), api_details).await } /// Save (create or update) an integration @@ -143,7 +142,7 @@ impl IntegrationClient { integration_details: &IntegrationUpdate, ) -> Result<()> { let path = format!("/integrations/provider/{}", integration_name); - self.api.put_no_response(&path, integration_details).await + self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}"), integration_details).await } // Tags @@ -159,7 +158,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags"), tags).await } /// Delete a tag from an integration provider @@ -169,7 +168,7 @@ impl IntegrationClient { name: &str, ) -> Result<()> { let path = format!("/integrations/provider/{}/tags", name); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/integrations/provider/{name}/tags"), tags).await } /// Set tags for an integration @@ -183,7 +182,7 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags"), tags).await } /// Set tags for an integration provider @@ -193,7 +192,7 @@ impl IntegrationClient { name: &str, ) -> Result<()> { let path = format!("/integrations/provider/{}/tags", name); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/tags"), tags).await } /// Get tags for an integration @@ -206,13 +205,13 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags")).await } /// Get tags for an integration provider pub async fn get_tags_for_integration_provider(&self, name: &str) -> Result> { let path = format!("/integrations/provider/{}/tags", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/tags")).await } /// Get available APIs for an integration provider @@ -221,19 +220,17 @@ impl IntegrationClient { integration_name: &str, ) -> Result> { let path = format!("/integrations/provider/{}/models", integration_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/models")).await } /// Get all integration provider definitions pub async fn get_integration_provider_defs(&self) -> Result> { - let path = "/integrations/def"; - self.api.get(path).await + self.api.get("/integrations/def").await } /// Get all providers and their integrations pub async fn get_providers_and_integrations(&self) -> Result { - let path = "/integrations"; - self.api.get(path).await + self.api.get("/integrations").await } } diff --git a/src/client/metadata_client.rs b/src/client/metadata_client.rs index df4515a..0bf1c2c 100644 --- a/src/client/metadata_client.rs +++ b/src/client/metadata_client.rs @@ -4,7 +4,7 @@ use tracing::{debug, info}; use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{TaskDef, WorkflowDef}; /// Client for metadata operations (workflow and task definitions) @@ -82,13 +82,13 @@ impl MetadataClient { format!("/metadata/workflow/{}", name) }; - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/metadata/workflow/{name}")).await } /// Get all versions of a workflow definition pub async fn get_all_workflow_def_versions(&self, name: &str) -> Result> { let path = format!("/metadata/workflow/{}/versions", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/metadata/workflow/{name}/versions")).await } /// Get all workflow definitions @@ -104,7 +104,7 @@ impl MetadataClient { /// Delete a workflow definition pub async fn delete_workflow_def(&self, name: &str, version: i32) -> Result<()> { let path = format!("/metadata/workflow/{}/{}", name, version); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/metadata/workflow/{name}/{version}")).await } // ==================== Task Definitions ==================== @@ -145,7 +145,7 @@ impl MetadataClient { /// Get a task definition by name pub async fn get_task_def(&self, name: &str) -> Result { let path = format!("/metadata/taskdefs/{}", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/metadata/taskdefs/{name}")).await } /// Get all task definitions @@ -156,7 +156,7 @@ impl MetadataClient { /// Delete a task definition pub async fn delete_task_def(&self, name: &str) -> Result<()> { let path = format!("/metadata/taskdefs/{}", name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/metadata/taskdefs/{name}")).await } /// Check if a task definition exists diff --git a/src/client/orkes_metadata_client.rs b/src/client/orkes_metadata_client.rs index 83b135c..b0339cb 100644 --- a/src/client/orkes_metadata_client.rs +++ b/src/client/orkes_metadata_client.rs @@ -5,7 +5,7 @@ use std::ops::Deref; use tracing::{debug, info}; use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::MetadataTag; use super::MetadataClient; @@ -62,7 +62,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.post_no_response(&path, &[tag]).await?; + self.api.post_no_response(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), &[tag]).await?; info!( workflow_name = %workflow_name, @@ -76,7 +76,7 @@ impl OrkesMetadataClient { /// Get all tags for a workflow definition pub async fn get_workflow_tags(&self, workflow_name: &str) -> Result> { let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags")).await } /// Set tags for a workflow definition (replaces existing tags) @@ -88,7 +88,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.put_no_response(&path, tags).await?; + self.api.put_no_response(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), tags).await?; info!( workflow_name = %workflow_name, @@ -108,7 +108,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.delete_with_body(&path, &[tag]).await?; + self.api.delete_with_body(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), &[tag]).await?; info!( workflow_name = %workflow_name, @@ -130,7 +130,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.post_no_response(&path, &[tag]).await?; + self.api.post_no_response(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), &[tag]).await?; info!( task_name = %task_name, @@ -144,7 +144,7 @@ impl OrkesMetadataClient { /// Get all tags for a task definition pub async fn get_task_tags(&self, task_name: &str) -> Result> { let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags")).await } /// Set tags for a task definition (replaces existing tags) @@ -156,7 +156,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.put_no_response(&path, tags).await?; + self.api.put_no_response(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), tags).await?; info!( task_name = %task_name, @@ -176,7 +176,7 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.delete_with_body(&path, &[tag]).await?; + self.api.delete_with_body(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), &[tag]).await?; info!( task_name = %task_name, diff --git a/src/client/prompt_client.rs b/src/client/prompt_client.rs index cf559db..b8ba39a 100644 --- a/src/client/prompt_client.rs +++ b/src/client/prompt_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{MetadataTag, PromptTemplate}; use std::collections::HashMap; @@ -70,7 +70,7 @@ impl PromptClient { let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); self.api - .post_raw_with_params(&path, prompt_template, ¶ms_ref) + .post_raw_with_params(ApiPath::templated(&path, "/prompts/{promptName}"), prompt_template, ¶ms_ref) .await } @@ -102,7 +102,7 @@ impl PromptClient { let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); self.api - .put_raw_with_params(&path, prompt_template, ¶ms_ref) + .put_raw_with_params(ApiPath::templated(&path, "/prompts/{promptName}/{version}"), prompt_template, ¶ms_ref) .await } @@ -113,14 +113,14 @@ impl PromptClient { /// * `new_version` - If true, creates new versions for existing prompts; if false, updates existing versions pub async fn save_prompts(&self, prompts: &[PromptTemplate], new_version: bool) -> Result<()> { let path = format!("/prompts?newVersion={}", new_version); - let _: serde_json::Value = self.api.post(&path, prompts).await?; + let _: serde_json::Value = self.api.post(ApiPath::templated(&path, "/prompts"), prompts).await?; Ok(()) } /// Retrieves the latest version of a prompt template by name pub async fn get_prompt(&self, prompt_name: &str) -> Result { let path = format!("/prompts/{}", prompt_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/prompts/{promptName}")).await } /// Retrieves a specific version of a prompt template @@ -134,7 +134,7 @@ impl PromptClient { version: i32, ) -> Result { let path = format!("/prompts/{}/{}", prompt_name, version); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/{version}")).await } /// Retrieves all versions of a specific prompt template @@ -146,19 +146,18 @@ impl PromptClient { /// List of all versions of the prompt template, ordered by version number pub async fn get_all_prompt_versions(&self, prompt_name: &str) -> Result> { let path = format!("/prompts/{}/versions", prompt_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/versions")).await } /// Retrieves all prompt templates (latest versions only) pub async fn get_prompts(&self) -> Result> { - let path = "/prompts"; - self.api.get(path).await + self.api.get("/prompts").await } /// Deletes all versions of a prompt template pub async fn delete_prompt(&self, prompt_name: &str) -> Result<()> { let path = format!("/prompts/{}", prompt_name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}")).await } /// Deletes a specific version of a prompt template @@ -168,7 +167,7 @@ impl PromptClient { /// * `version` - The version number to delete pub async fn delete_prompt_version(&self, prompt_name: &str, version: i32) -> Result<()> { let path = format!("/prompts/{}/{}", prompt_name, version); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}/{version}")).await } // ==================== Tag management ==================== @@ -179,7 +178,7 @@ impl PromptClient { prompt_name: &str, ) -> Result> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/tags")).await } /// Adds or updates tags for a prompt template @@ -189,7 +188,7 @@ impl PromptClient { tags: &[MetadataTag], ) -> Result<()> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/prompts/{promptName}/tags"), tags).await } /// Deletes specific tags from a prompt template @@ -199,7 +198,7 @@ impl PromptClient { tags: &[MetadataTag], ) -> Result<()> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/prompts/{promptName}/tags"), tags).await } // ==================== Testing ==================== @@ -230,8 +229,6 @@ impl PromptClient { top_p: f32, stop_words: Option<&[String]>, ) -> Result { - let path = "/prompts/test"; - let mut body = serde_json::json!({ "prompt": prompt_text, "promptVariables": variables, @@ -245,7 +242,7 @@ impl PromptClient { body["stopWords"] = serde_json::json!(sw); } - self.api.post(path, &body).await + self.api.post("/prompts/test", &body).await } } diff --git a/src/client/scheduler_client.rs b/src/client/scheduler_client.rs index 5ee27ff..d9a7fb9 100644 --- a/src/client/scheduler_client.rs +++ b/src/client/scheduler_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{ MetadataTag, SaveScheduleRequest, SearchResultWorkflowScheduleExecution, WorkflowSchedule, }; @@ -21,14 +21,13 @@ impl SchedulerClient { /// Save (create or update) a schedule pub async fn save_schedule(&self, request: &SaveScheduleRequest) -> Result<()> { - let path = "/scheduler/schedules"; - self.api.post_no_response(path, request).await + self.api.post_no_response("/scheduler/schedules", request).await } /// Get a schedule by name pub async fn get_schedule(&self, name: &str) -> Result { let path = format!("/scheduler/schedules/{}", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/scheduler/schedules/{name}")).await } /// Get all schedules, optionally filtered by workflow name @@ -36,13 +35,12 @@ impl SchedulerClient { &self, workflow_name: Option<&str>, ) -> Result> { - let path = "/scheduler/schedules"; if let Some(wf_name) = workflow_name { self.api - .get_with_params(path, &[("workflowName", wf_name)]) + .get_with_params("/scheduler/schedules", &[("workflowName", wf_name)]) .await } else { - self.api.get(path).await + self.api.get("/scheduler/schedules").await } } @@ -54,7 +52,6 @@ impl SchedulerClient { schedule_end_time: Option, limit: Option, ) -> Result> { - let path = "/scheduler/nextFewSchedules"; let mut params: Vec<(&str, String)> = vec![("cronExpression", cron_expression.to_string())]; if let Some(start) = schedule_start_time { @@ -68,37 +65,35 @@ impl SchedulerClient { } let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params(path, ¶ms_ref).await + self.api.get_with_params("/scheduler/nextFewSchedules", ¶ms_ref).await } /// Delete a schedule pub async fn delete_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}", name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/scheduler/schedules/{name}")).await } /// Pause a schedule pub async fn pause_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/pause", name); - self.api.get_no_response(&path).await + self.api.get_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/pause")).await } /// Pause all schedules pub async fn pause_all_schedules(&self) -> Result<()> { - let path = "/scheduler/admin/pause"; - self.api.get_no_response(path).await + self.api.get_no_response("/scheduler/admin/pause").await } /// Resume a schedule pub async fn resume_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/resume", name); - self.api.get_no_response(&path).await + self.api.get_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/resume")).await } /// Resume all schedules pub async fn resume_all_schedules(&self) -> Result<()> { - let path = "/scheduler/admin/resume"; - self.api.get_no_response(path).await + self.api.get_no_response("/scheduler/admin/resume").await } /// Search schedule executions @@ -110,7 +105,6 @@ impl SchedulerClient { free_text: Option<&str>, query: Option<&str>, ) -> Result { - let path = "/scheduler/search/executions"; let mut params: Vec<(&str, String)> = Vec::new(); if let Some(s) = start { @@ -130,31 +124,30 @@ impl SchedulerClient { } let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params(path, ¶ms_ref).await + self.api.get_with_params("/scheduler/search/executions", ¶ms_ref).await } /// Requeue all execution records pub async fn requeue_all_execution_records(&self) -> Result<()> { - let path = "/scheduler/admin/requeue"; - self.api.get_no_response(path).await + self.api.get_no_response("/scheduler/admin/requeue").await } /// Set tags for a schedule pub async fn set_scheduler_tags(&self, tags: &[MetadataTag], name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), tags).await } /// Get tags for a schedule pub async fn get_scheduler_tags(&self, name: &str) -> Result> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags")).await } /// Delete tags from a schedule pub async fn delete_scheduler_tags(&self, tags: &[MetadataTag], name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), tags).await } } diff --git a/src/client/schema_client.rs b/src/client/schema_client.rs index 433c9f2..197035b 100644 --- a/src/client/schema_client.rs +++ b/src/client/schema_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::SchemaDef; /// Client for managing schema definitions @@ -19,36 +19,34 @@ impl SchemaClient { /// Register a new schema pub async fn register_schema(&self, schema: &SchemaDef) -> Result<()> { - let path = "/schema"; - self.api.post_no_response(path, schema).await + self.api.post_no_response("/schema", schema).await } /// Get a schema by name and version pub async fn get_schema(&self, schema_name: &str, version: i32) -> Result { let path = format!("/schema/{}", schema_name); self.api - .get_with_params(&path, &[("version", &version.to_string())]) + .get_with_params(ApiPath::templated(&path, "/schema/{schemaName}"), &[("version", &version.to_string())]) .await } /// Get all schemas pub async fn get_all_schemas(&self) -> Result> { - let path = "/schema"; - self.api.get(path).await + self.api.get("/schema").await } /// Delete a schema by name and version pub async fn delete_schema(&self, schema_name: &str, version: i32) -> Result<()> { let path = format!("/schema/{}", schema_name); self.api - .delete_with_params(&path, &[("version", &version.to_string())]) + .delete_with_params(ApiPath::templated(&path, "/schema/{schemaName}"), &[("version", &version.to_string())]) .await } /// Delete all versions of a schema by name pub async fn delete_schema_by_name(&self, schema_name: &str) -> Result<()> { let path = format!("/schema/{}/all", schema_name); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/schema/{schemaName}/all")).await } } diff --git a/src/client/secret_client.rs b/src/client/secret_client.rs index 8a45156..1e57f9a 100644 --- a/src/client/secret_client.rs +++ b/src/client/secret_client.rs @@ -2,7 +2,7 @@ // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. use crate::error::Result; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::MetadataTag; use std::collections::HashSet; @@ -21,58 +21,56 @@ impl SecretClient { /// Store a secret pub async fn put_secret(&self, key: &str, value: &str) -> Result<()> { let path = format!("/secrets/{}", key); - self.api.put_raw(&path, value).await + self.api.put_raw(ApiPath::templated(&path, "/secrets/{key}"), value).await } /// Get a secret value pub async fn get_secret(&self, key: &str) -> Result { let path = format!("/secrets/{}", key); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/secrets/{key}")).await } /// List all secret names pub async fn list_all_secret_names(&self) -> Result> { - let path = "/secrets"; - let names: Vec = self.api.get(path).await?; + let names: Vec = self.api.get("/secrets").await?; Ok(names.into_iter().collect()) } /// List secrets that the user can grant access to pub async fn list_secrets_that_user_can_grant_access_to(&self) -> Result> { - let path = "/secrets"; self.api - .get_with_params(path, &[("grantable", "true")]) + .get_with_params("/secrets", &[("grantable", "true")]) .await } /// Delete a secret pub async fn delete_secret(&self, key: &str) -> Result<()> { let path = format!("/secrets/{}", key); - self.api.delete_no_content(&path).await + self.api.delete_no_content(ApiPath::templated(&path, "/secrets/{key}")).await } /// Check if a secret exists pub async fn secret_exists(&self, key: &str) -> Result { let path = format!("/secrets/{}/exists", key); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/secrets/{key}/exists")).await } /// Set tags for a secret pub async fn set_secret_tags(&self, tags: &[MetadataTag], key: &str) -> Result<()> { let path = format!("/secrets/{}/tags", key); - self.api.put_no_response(&path, tags).await + self.api.put_no_response(ApiPath::templated(&path, "/secrets/{key}/tags"), tags).await } /// Get tags for a secret pub async fn get_secret_tags(&self, key: &str) -> Result> { let path = format!("/secrets/{}/tags", key); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/secrets/{key}/tags")).await } /// Delete tags from a secret pub async fn delete_secret_tags(&self, tags: &[MetadataTag], key: &str) -> Result<()> { let path = format!("/secrets/{}/tags", key); - self.api.delete_with_body(&path, tags).await + self.api.delete_with_body(ApiPath::templated(&path, "/secrets/{key}/tags"), tags).await } } diff --git a/src/client/task_client.rs b/src/client/task_client.rs index 789f9a9..6bcf819 100644 --- a/src/client/task_client.rs +++ b/src/client/task_client.rs @@ -5,7 +5,7 @@ use std::time::Duration; use tracing::{debug, warn}; use crate::error::{ConductorError, Result}; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{Task, TaskResult}; /// Client for task operations (polling and updates) @@ -37,7 +37,10 @@ impl TaskClient { } let path = format!("/tasks/poll/{}", task_type); - let result: Option = self.api.get_with_params(&path, ¶ms).await?; + let result: Option = self + .api + .get_with_params(ApiPath::templated(&path, "/tasks/poll/{taskType}"), ¶ms) + .await?; Ok(result) } @@ -74,7 +77,10 @@ impl TaskClient { "Batch polling tasks" ); - let tasks: Vec = self.api.get_with_params(&path, ¶ms).await?; + let tasks: Vec = self + .api + .get_with_params(ApiPath::templated(&path, "/tasks/poll/batch/{taskType}"), ¶ms) + .await?; debug!( task_type = task_type, @@ -146,7 +152,7 @@ impl TaskClient { /// Get task by ID pub async fn get_task(&self, task_id: &str) -> Result { let path = format!("/tasks/{}", task_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/tasks/{taskId}")).await } /// Get tasks in progress for a task type @@ -168,13 +174,15 @@ impl TaskClient { } let path = format!("/tasks/in_progress/{}", task_type); - self.api.get_with_params(&path, ¶ms).await + self.api + .get_with_params(ApiPath::templated(&path, "/tasks/in_progress/{taskType}"), ¶ms) + .await } /// Add a log to a task pub async fn add_task_log(&self, task_id: &str, log: &str) -> Result<()> { let path = format!("/tasks/{}/log", task_id); - let _: serde_json::Value = self.api.post(&path, &log).await?; + let _: serde_json::Value = self.api.post(ApiPath::templated(&path, "/tasks/{taskId}/log"), &log).await?; Ok(()) } @@ -184,7 +192,7 @@ impl TaskClient { task_id: &str, ) -> Result> { let path = format!("/tasks/{}/log", task_id); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/tasks/{taskId}/log")).await } /// Get task queue sizes @@ -192,15 +200,16 @@ impl TaskClient { &self, task_types: &[&str], ) -> Result> { - let path = "/tasks/queue/sizes"; let params: Vec<(&str, &str)> = task_types.iter().map(|t| ("taskType", *t)).collect(); - self.api.get_with_params(path, ¶ms).await + self.api.get_with_params("/tasks/queue/sizes", ¶ms).await } /// Remove task from queue pub async fn remove_task_from_queue(&self, task_type: &str, task_id: &str) -> Result<()> { let path = format!("/tasks/queue/{}/{}", task_type, task_id); - self.api.delete_no_content(&path).await + self.api + .delete_no_content(ApiPath::templated(&path, "/tasks/queue/{taskType}/{taskId}")) + .await } /// Update task by reference name @@ -224,7 +233,12 @@ impl TaskClient { } // POST with output as body - self.api.post_text(&path, &output).await + self.api + .post_text( + ApiPath::templated(&path, "/tasks/{workflowId}/{taskRefName}/{status}"), + &output, + ) + .await } /// Update task synchronously and return the updated workflow @@ -248,7 +262,13 @@ impl TaskClient { } let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.post_with_params(&path, &output, ¶ms_ref).await + self.api + .post_with_params( + ApiPath::templated(&path, "/tasks/{workflowId}/{taskRefName}/{status}/sync"), + &output, + ¶ms_ref, + ) + .await } /// Get queue size for a specific task type @@ -260,13 +280,14 @@ impl TaskClient { /// Get poll data for a task type pub async fn get_task_poll_data(&self, task_type: &str) -> Result> { let path = format!("/tasks/queue/polldata/{}", task_type); - self.api.get(&path).await + self.api + .get(ApiPath::templated(&path, "/tasks/queue/polldata/{taskType}")) + .await } /// Get all poll data pub async fn get_all_poll_data(&self) -> Result> { - let path = "/tasks/queue/polldata/all"; - self.api.get(path).await + self.api.get("/tasks/queue/polldata/all").await } /// Get poll data (alias for get_task_poll_data) @@ -293,7 +314,9 @@ impl TaskClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params("/tasks/search", ¶ms).await + self.api + .get_with_params("/tasks/search", ¶ms) + .await } /// Search for tasks V2 (returns full task objects) @@ -315,13 +338,20 @@ impl TaskClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params("/tasks/search-v2", ¶ms).await + self.api + .get_with_params("/tasks/search-v2", ¶ms) + .await } /// Requeue pending tasks pub async fn requeue_pending_tasks(&self, task_type: &str) -> Result { let path = format!("/tasks/queue/requeue/{}", task_type); - self.api.post_text(&path, &serde_json::Value::Null).await + self.api + .post_text( + ApiPath::templated(&path, "/tasks/queue/requeue/{taskType}"), + &serde_json::Value::Null, + ) + .await } } diff --git a/src/client/workflow_client.rs b/src/client/workflow_client.rs index 9f0333c..61046c3 100644 --- a/src/client/workflow_client.rs +++ b/src/client/workflow_client.rs @@ -7,7 +7,7 @@ use tracing::{debug, info}; use crate::error::Result; use crate::events::{exception_label, EventDispatcher, WorkflowStartFailure, WorkflowStarted}; -use crate::http::ApiClient; +use crate::http::{ApiClient, ApiPath}; use crate::models::{StartWorkflowRequest, Workflow, WorkflowDef}; /// Client for workflow operations @@ -122,7 +122,10 @@ impl WorkflowClient { request_id, }; - let workflow: Workflow = self.api.post(&path, &exec_request).await?; + let workflow: Workflow = self + .api + .post(ApiPath::templated(&path, "/workflow/execute/{name}/{version}"), &exec_request) + .await?; info!( workflow_name = %request.name, @@ -137,7 +140,7 @@ impl WorkflowClient { /// Get workflow by ID pub async fn get_workflow(&self, workflow_id: &str, include_tasks: bool) -> Result { let path = format!("/workflow/{}?includeTasks={}", workflow_id, include_tasks); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/workflow/{workflowId}")).await } /// Get workflow status @@ -151,7 +154,7 @@ impl WorkflowClient { "/workflow/{}/status?includeOutput={}&includeVariables={}", workflow_id, include_output, include_variables ); - self.api.get(&path).await + self.api.get(ApiPath::templated(&path, "/workflow/{workflowId}/status")).await } /// Terminate a running workflow @@ -170,20 +173,31 @@ impl WorkflowClient { path.push_str(&format!("&reason={}", urlencoding::encode(r))); } - self.api.delete_no_content(&path).await + self.api + .delete_no_content(ApiPath::templated(&path, "/workflow/{workflowId}")) + .await } /// Pause a running workflow pub async fn pause_workflow(&self, workflow_id: &str) -> Result<()> { let path = format!("/workflow/{}/pause", workflow_id); - let _: serde_json::Value = self.api.put(&path, &serde_json::Value::Null).await?; + let _: serde_json::Value = self + .api + .put(ApiPath::templated(&path, "/workflow/{workflowId}/pause"), &serde_json::Value::Null) + .await?; Ok(()) } /// Resume a paused workflow pub async fn resume_workflow(&self, workflow_id: &str) -> Result<()> { let path = format!("/workflow/{}/resume", workflow_id); - let _: serde_json::Value = self.api.put(&path, &serde_json::Value::Null).await?; + let _: serde_json::Value = self + .api + .put( + ApiPath::templated(&path, "/workflow/{workflowId}/resume"), + &serde_json::Value::Null, + ) + .await?; Ok(()) } @@ -197,7 +211,13 @@ impl WorkflowClient { "/workflow/{}/retry?resumeSubworkflowTasks={}", workflow_id, resume_subworkflow_tasks ); - let _: serde_json::Value = self.api.post(&path, &serde_json::Value::Null).await?; + let _: serde_json::Value = self + .api + .post( + ApiPath::templated(&path, "/workflow/{workflowId}/retry"), + &serde_json::Value::Null, + ) + .await?; Ok(()) } @@ -207,7 +227,13 @@ impl WorkflowClient { "/workflow/{}/restart?useLatestDefinitions={}", workflow_id, use_latest_def ); - let _: serde_json::Value = self.api.post(&path, &serde_json::Value::Null).await?; + let _: serde_json::Value = self + .api + .post( + ApiPath::templated(&path, "/workflow/{workflowId}/restart"), + &serde_json::Value::Null, + ) + .await?; Ok(()) } @@ -237,7 +263,9 @@ impl WorkflowClient { workflow_input, }; - self.api.post(&path, &request).await + self.api + .post(ApiPath::templated(&path, "/workflow/{workflowId}/rerun"), &request) + .await } /// Update workflow variables @@ -247,7 +275,9 @@ impl WorkflowClient { variables: HashMap, ) -> Result { let path = format!("/workflow/{}/variables", workflow_id); - self.api.post(&path, &variables).await + self.api + .post(ApiPath::templated(&path, "/workflow/{workflowId}/variables"), &variables) + .await } /// Skip a task in a running workflow @@ -257,7 +287,13 @@ impl WorkflowClient { #[derive(serde::Serialize)] struct SkipRequest {} - let _: serde_json::Value = self.api.put(&path, &SkipRequest {}).await?; + let _: serde_json::Value = self + .api + .put( + ApiPath::templated(&path, "/workflow/{workflowId}/skiptask/{taskRefName}"), + &SkipRequest {}, + ) + .await?; Ok(()) } @@ -280,7 +316,9 @@ impl WorkflowClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params("/workflow/search", ¶ms).await + self.api + .get_with_params("/workflow/search", ¶ms) + .await } /// Search for workflows V2 (returns full workflow objects) @@ -321,8 +359,7 @@ impl WorkflowClient { &self, workflow_ids: &[String], ) -> Result> { - let path = "/workflow/bulk/pause"; - self.api.put(path, workflow_ids).await + self.api.put("/workflow/bulk/pause", workflow_ids).await } /// Bulk resume workflows @@ -330,8 +367,7 @@ impl WorkflowClient { &self, workflow_ids: &[String], ) -> Result> { - let path = "/workflow/bulk/resume"; - self.api.put(path, workflow_ids).await + self.api.put("/workflow/bulk/resume", workflow_ids).await } /// Bulk restart workflows @@ -344,7 +380,9 @@ impl WorkflowClient { "/workflow/bulk/restart?useLatestDefinitions={}", use_latest_def ); - self.api.post(&path, workflow_ids).await + self.api + .post(ApiPath::templated(&path, "/workflow/bulk/restart"), workflow_ids) + .await } /// Bulk retry workflows @@ -352,8 +390,7 @@ impl WorkflowClient { &self, workflow_ids: &[String], ) -> Result> { - let path = "/workflow/bulk/retry"; - self.api.post(path, workflow_ids).await + self.api.post("/workflow/bulk/retry", workflow_ids).await } /// Bulk terminate workflows @@ -366,7 +403,9 @@ impl WorkflowClient { if let Some(r) = reason { path.push_str(&format!("?reason={}", urlencoding::encode(r))); } - self.api.post(&path, workflow_ids).await + self.api + .post(ApiPath::templated(&path, "/workflow/bulk/terminate"), workflow_ids) + .await } /// Get running workflows by name @@ -408,18 +447,24 @@ impl WorkflowClient { ); } - self.api.get(&path).await + self.api + .get(ApiPath::templated(&path, "/workflow/running/{workflowName}")) + .await } /// Delete a workflow execution pub async fn delete_workflow(&self, workflow_id: &str, archive: bool) -> Result<()> { let path = format!("/workflow/{}?archiveWorkflow={}", workflow_id, archive); - self.api.delete_no_content(&path).await + self.api + .delete_no_content(ApiPath::templated(&path, "/workflow/{workflowId}")) + .await } /// Test a workflow (dry run) pub async fn test_workflow(&self, request: &TestWorkflowRequest) -> Result { - self.api.post("/workflow/test", request).await + self.api + .post("/workflow/test", request) + .await } /// Remove/delete a workflow @@ -439,7 +484,12 @@ impl WorkflowClient { "/workflow/{}/correlated?includeClosed={}&includeTasks={}", workflow_name, include_completed, include_tasks ); - self.api.post(&path, correlation_ids).await + self.api + .post( + ApiPath::templated(&path, "/workflow/{workflowName}/correlated"), + correlation_ids, + ) + .await } /// Get workflows by correlation IDs in batch @@ -453,7 +503,9 @@ impl WorkflowClient { "/workflow/correlated/batch?includeClosed={}&includeTasks={}", include_completed, include_tasks ); - self.api.post(&path, batch_request).await + self.api + .post(ApiPath::templated(&path, "/workflow/correlated/batch"), batch_request) + .await } /// Update workflow state @@ -481,7 +533,9 @@ impl WorkflowClient { path.push_str(¶ms.join("&")); } - self.api.post(&path, update_request).await + self.api + .post(ApiPath::templated(&path, "/workflow/{workflowId}/state"), update_request) + .await } /// Execute workflow with return strategy @@ -524,7 +578,12 @@ impl WorkflowClient { path.push_str(¶ms.join("&")); } - self.api.post(&path, request).await + self.api + .post( + ApiPath::templated(&path, "/workflow/execute/{name}/{version}"), + request, + ) + .await } } diff --git a/src/http/api_client.rs b/src/http/api_client.rs index e53364d..bab3d96 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -21,6 +21,43 @@ struct TokenResponse { /// Maximum consecutive auth failures before stopping retry attempts const MAX_AUTH_FAILURES: u32 = 5; +/// Pairs a resolved request path with a bounded-cardinality metric template. +/// +/// For **static** endpoints (no dynamic segments) simply pass a `&str` — +/// the [`From<&str>`] impl uses the same string for both the request path +/// and the metric label. +/// +/// For **dynamic** endpoints use [`ApiPath::templated`] to supply the +/// resolved path alongside its template: +/// +/// ```ignore +/// let path = format!("/workflow/{}", workflow_id); +/// api.get(ApiPath::templated(&path, "/workflow/{workflowId}")).await +/// ``` +pub struct ApiPath<'a> { + pub(crate) path: &'a str, + pub(crate) metric_uri: &'a str, +} + +impl<'a> ApiPath<'a> { + /// Use when the resolved path differs from the metric template. + pub fn templated(path: &'a str, template: &'a str) -> Self { + Self { + path, + metric_uri: template, + } + } +} + +impl<'a> From<&'a str> for ApiPath<'a> { + fn from(path: &'a str) -> Self { + Self { + path, + metric_uri: path, + } + } +} + /// HTTP API client for Conductor server /// /// Thread-safe and cloneable. Multiple clones share the same connection pool @@ -48,10 +85,6 @@ pub struct ApiClient { client: Client, config: Arc>, base_url: String, - /// Path component of `base_url` (e.g. `"/api"`), prepended to endpoint - /// paths when recording the `uri` metric label so the label matches the - /// full request path as seen by all other SDKs. - base_path: String, /// Track consecutive auth failures for backoff auth_failures: Arc>, /// Last time we attempted token refresh (for backoff) @@ -83,25 +116,10 @@ impl ApiClient { let base_url = config.server_api_url.trim_end_matches('/').to_string(); - // Extract the path component of the server URL so it can be prepended - // to endpoint paths in metric labels. - // "http://host:8080/api" → "/api", "http://host:8080" → "" - let base_path = base_url - .find("://") - .and_then(|scheme_end| { - let after_scheme = scheme_end + 3; - base_url[after_scheme..] - .find('/') - .map(|slash| after_scheme + slash) - }) - .map(|abs_pos| base_url[abs_pos..].to_string()) - .unwrap_or_default(); - Ok(Self { client, config: Arc::new(RwLock::new(config)), base_url, - base_path, auth_failures: Arc::new(RwLock::new(0)), last_refresh_attempt: Arc::new(RwLock::new(None)), token_refresh_lock: Arc::new(Mutex::new(())), @@ -134,11 +152,20 @@ impl ApiClient { /// Unified post-request bookkeeping: tracing log + observer callback. /// - /// `path` should be the interpolated request path (no query string). + /// `path` is the interpolated request path (for the tracing log). + /// `metric_uri` is the bounded-cardinality path template used as the + /// `uri` label (e.g. `/tasks/poll/batch/{taskType}`). /// `status_str` is the HTTP status code rendered as a string, or `"0"` /// for pre-response transport errors. #[inline] - fn record_request(&self, method: &str, path: &str, status_str: &str, duration: Duration) { + fn record_request( + &self, + method: &str, + path: &str, + metric_uri: &str, + status_str: &str, + duration: Duration, + ) { debug!( method = method, url = %format!("{}{}", self.base_url, path), @@ -146,16 +173,22 @@ impl ApiClient { duration_ms = %duration.as_millis(), "API request completed" ); - let uri_label = format!("{}{}", self.base_path, path); self.http_metrics() - .observe(method, &uri_label, status_str, duration); + .observe(method, metric_uri, status_str, duration); } /// Convenience: call [`record_request`](Self::record_request) with a /// successful response's status code. #[inline] - fn record_response(&self, method: &str, path: &str, status: StatusCode, duration: Duration) { - self.record_request(method, path, status.as_str(), duration); + fn record_response( + &self, + method: &str, + path: &str, + metric_uri: &str, + status: StatusCode, + duration: Duration, + ) { + self.record_request(method, path, metric_uri, status.as_str(), duration); } /// Send a prepared request, recording the outcome (success *and* transport @@ -168,59 +201,71 @@ impl ApiClient { &self, method: &str, path: &str, + metric_uri: &str, request: reqwest::RequestBuilder, ) -> Result { let start = Instant::now(); let result = request.send().await; let duration = start.elapsed(); match &result { - Ok(resp) => self.record_response(method, path, resp.status(), duration), - Err(_) => self.record_request(method, path, "0", duration), + Ok(resp) => self.record_response(method, path, metric_uri, resp.status(), duration), + Err(_) => self.record_request(method, path, metric_uri, "0", duration), } result.map_err(ConductorError::Http) } /// GET request - pub async fn get(&self, path: &str) -> Result { - self.request::<(), T>(reqwest::Method::GET, path, None) + pub async fn get( + &self, + path: impl Into>, + ) -> Result { + let p = path.into(); + self.request::<(), T>(reqwest::Method::GET, p.path, p.metric_uri, None) .await } /// GET request with query parameters pub async fn get_with_params( &self, - path: &str, + path: impl Into>, params: &[(&str, &str)], ) -> Result { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.get(&url); request = self.add_auth_header(request).await?; request = request.query(params); - let response = self.send_observed("GET", path, request).await?; + let response = self.send_observed("GET", p.path, p.metric_uri, request).await?; self.handle_response(response).await } /// POST request pub async fn post( &self, - path: &str, + path: impl Into>, body: &B, ) -> Result { - self.request_with_body(reqwest::Method::POST, path, body) + let p = path.into(); + self.request_with_body(reqwest::Method::POST, p.path, p.metric_uri, body) .await } /// POST request returning raw text - pub async fn post_text(&self, path: &str, body: &B) -> Result { - let url = format!("{}{}", self.base_url, path); + pub async fn post_text( + &self, + path: impl Into>, + body: &B, + ) -> Result { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -233,27 +278,36 @@ impl ApiClient { /// PUT request pub async fn put( &self, - path: &str, + path: impl Into>, body: &B, ) -> Result { - self.request_with_body(reqwest::Method::PUT, path, body) + let p = path.into(); + self.request_with_body(reqwest::Method::PUT, p.path, p.metric_uri, body) .await } /// DELETE request - pub async fn delete(&self, path: &str) -> Result { - self.request::<(), T>(reqwest::Method::DELETE, path, None) + pub async fn delete( + &self, + path: impl Into>, + ) -> Result { + let p = path.into(); + self.request::<(), T>(reqwest::Method::DELETE, p.path, p.metric_uri, None) .await } /// DELETE request with no response body - pub async fn delete_no_content(&self, path: &str) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn delete_no_content( + &self, + path: impl Into>, + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("DELETE", path, request).await?; + let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -266,16 +320,17 @@ impl ApiClient { /// DELETE request with body pub async fn delete_with_body( &self, - path: &str, + path: impl Into>, body: &B, ) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("DELETE", path, request).await?; + let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -286,14 +341,19 @@ impl ApiClient { } /// DELETE request with query parameters - pub async fn delete_with_params(&self, path: &str, params: &[(&str, &str)]) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn delete_with_params( + &self, + path: impl Into>, + params: &[(&str, &str)], + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; request = request.query(params); - let response = self.send_observed("DELETE", path, request).await?; + let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -306,16 +366,17 @@ impl ApiClient { /// POST request with no response pub async fn post_no_response( &self, - path: &str, + path: impl Into>, body: &B, ) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -326,24 +387,32 @@ impl ApiClient { } /// POST request with no body - pub async fn post_no_body(&self, path: &str) -> Result { - let url = format!("{}{}", self.base_url, path); + pub async fn post_no_body( + &self, + path: impl Into>, + ) -> Result { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; self.handle_response(response).await } /// POST request with no body and no response - pub async fn post_no_body_no_response(&self, path: &str) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn post_no_body_no_response( + &self, + path: impl Into>, + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -354,14 +423,19 @@ impl ApiClient { } /// PUT request with no response - pub async fn put_no_response(&self, path: &str, body: &B) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn put_no_response( + &self, + path: impl Into>, + body: &B, + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("PUT", path, request).await?; + let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -372,15 +446,20 @@ impl ApiClient { } /// PUT request with raw text body - pub async fn put_raw(&self, path: &str, body: &str) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn put_raw( + &self, + path: impl Into>, + body: &str, + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("PUT", path, request).await?; + let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -393,29 +472,31 @@ impl ApiClient { /// POST request with JSON body and query parameters pub async fn post_with_params( &self, - path: &str, + path: impl Into>, body: &B, params: &[(&str, &str)], ) -> Result { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; request = request.query(params); request = request.json(body); - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; self.handle_response(response).await } /// POST request with raw text body and query parameters pub async fn post_raw_with_params( &self, - path: &str, + path: impl Into>, body: &str, params: &[(&str, &str)], ) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; @@ -423,7 +504,7 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("POST", path, request).await?; + let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -436,11 +517,12 @@ impl ApiClient { /// PUT request with raw text body and query parameters pub async fn put_raw_with_params( &self, - path: &str, + path: impl Into>, body: &str, params: &[(&str, &str)], ) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.put(&url); request = self.add_auth_header(request).await?; @@ -448,7 +530,7 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("PUT", path, request).await?; + let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -459,13 +541,17 @@ impl ApiClient { } /// GET request with no response - pub async fn get_no_response(&self, path: &str) -> Result<()> { - let url = format!("{}{}", self.base_url, path); + pub async fn get_no_response( + &self, + path: impl Into>, + ) -> Result<()> { + let p = path.into(); + let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.get(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("GET", path, request).await?; + let response = self.send_observed("GET", p.path, p.metric_uri, request).await?; let status = response.status(); if status.is_success() { @@ -480,6 +566,7 @@ impl ApiClient { &self, method: reqwest::Method, path: &str, + metric_uri: &str, body: Option<&B>, ) -> Result { let url = format!("{}{}", self.base_url, path); @@ -492,7 +579,7 @@ impl ApiClient { request = request.json(b); } - let response = self.send_observed(&method_str, path, request).await?; + let response = self.send_observed(&method_str, path, metric_uri, request).await?; let status = response.status(); // If 401, try refreshing token and retry once @@ -506,7 +593,7 @@ impl ApiClient { request = request.json(b); } - let response = self.send_observed(&method_str, path, request).await?; + let response = self.send_observed(&method_str, path, metric_uri, request).await?; return self.handle_response(response).await; } @@ -518,6 +605,7 @@ impl ApiClient { &self, method: reqwest::Method, path: &str, + metric_uri: &str, body: &B, ) -> Result { let url = format!("{}{}", self.base_url, path); @@ -527,7 +615,7 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed(&method_str, path, request).await?; + let response = self.send_observed(&method_str, path, metric_uri, request).await?; let status = response.status(); // If 401, try refreshing token and retry once @@ -538,7 +626,7 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed(&method_str, path, request).await?; + let response = self.send_observed(&method_str, path, metric_uri, request).await?; return self.handle_response(response).await; } @@ -739,7 +827,7 @@ impl ApiClient { }); let response = match self - .send_observed("POST", "/token", self.client.post(&url).json(&body)) + .send_observed("POST", "/token", "/token", self.client.post(&url).json(&body)) .await { Ok(resp) => resp, @@ -830,7 +918,7 @@ impl ApiClient { let url = format!("{}/token", self.base_url); let body = serde_json::json!({"keyId": "probe", "keySecret": "probe"}); let is_oss = match self - .send_observed("POST", "/token", self.client.post(&url).json(&body)) + .send_observed("POST", "/token", "/token", self.client.post(&url).json(&body)) .await { Ok(resp) => resp.status() == StatusCode::NOT_FOUND, diff --git a/src/http/metrics.rs b/src/http/metrics.rs index b886e38..eb11db5 100644 --- a/src/http/metrics.rs +++ b/src/http/metrics.rs @@ -8,6 +8,11 @@ //! (e.g. the [`MetricsCollector`](crate::metrics::MetricsCollector) from the //! `metrics` module) can record `http_api_client_request_seconds` without the //! HTTP layer depending on the metrics layer. +//! +//! The `uri` value passed to [`HttpMetricsObserver::observe`] is a +//! bounded-cardinality **path template** (e.g. `/tasks/poll/batch/{taskType}`) +//! rather than the interpolated request path. The server base-path prefix +//! (e.g. `/api`) is not included. use std::sync::Arc; use std::time::Duration; @@ -21,9 +26,10 @@ pub trait HttpMetricsObserver: Send + Sync { /// Record a completed HTTP request. /// /// - `method`: uppercase HTTP verb (e.g. `"GET"`). - /// - `uri`: interpolated request path, *without* query string (e.g. - /// `/tasks/poll/batch/my_worker`). Template extraction is tracked as - /// Phase 4 of the canonical SDK metrics harmonization plan. + /// - `uri`: bounded-cardinality path template, *without* query string + /// (e.g. `/tasks/poll/batch/{taskType}`). Dynamic segments such as + /// workflow IDs or task names are replaced by `{placeholder}` tokens. + /// The server base-path prefix (e.g. `/api`) is **not** included. /// - `status`: HTTP status code as a string, or `"0"` if the transport /// failed before a status was received. /// - `duration`: wall-clock time from send to response-received (or error). diff --git a/src/http/mod.rs b/src/http/mod.rs index f37d512..6ad5180 100644 --- a/src/http/mod.rs +++ b/src/http/mod.rs @@ -4,5 +4,5 @@ mod api_client; mod metrics; -pub use api_client::ApiClient; +pub use api_client::{ApiClient, ApiPath}; pub use metrics::{HttpMetricsObserver, NoopHttpMetricsObserver}; From f134422619677d70579195ad6f14bef63adb5457 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Fri, 15 May 2026 12:39:52 -0600 Subject: [PATCH 09/15] lint --- harness/src/main.rs | 13 +- harness/src/workflow_status_probe.rs | 2 +- src/client/authorization_client.rs | 205 ++++++++++++++++++++++----- src/client/event_client.rs | 27 +++- src/client/integration_client.rs | 126 +++++++++++++--- src/client/metadata_client.rs | 26 +++- src/client/orkes_metadata_client.rs | 56 ++++++-- src/client/prompt_client.rs | 55 +++++-- src/client/scheduler_client.rs | 55 +++++-- src/client/schema_client.rs | 14 +- src/client/secret_client.rs | 28 +++- src/client/task_client.rs | 45 ++++-- src/client/workflow_client.rs | 61 +++++--- src/http/api_client.rs | 117 +++++++++------ 14 files changed, 643 insertions(+), 187 deletions(-) diff --git a/harness/src/main.rs b/harness/src/main.rs index fe72f76..bd317f6 100644 --- a/harness/src/main.rs +++ b/harness/src/main.rs @@ -179,18 +179,19 @@ async fn main() { let handle = tokio::spawn(async move { probe.run().await }); let governor = Arc::new( - WorkflowGovernor::new(workflow_client, WORKFLOW_NAME.to_string(), workflows_per_sec) - .with_id_sink(tx), + WorkflowGovernor::new( + workflow_client, + WORKFLOW_NAME.to_string(), + workflows_per_sec, + ) + .with_id_sink(tx), ); let governor_handle = tokio::spawn({ let governor = Arc::clone(&governor); async move { governor.run().await } }); - println!( - "WorkflowStatusProbe enabled at {}/sec", - probe_rate, - ); + println!("WorkflowStatusProbe enabled at {}/sec", probe_rate,); Some((governor_handle, handle)) } else { diff --git a/harness/src/workflow_status_probe.rs b/harness/src/workflow_status_probe.rs index ece6829..ce6b555 100644 --- a/harness/src/workflow_status_probe.rs +++ b/harness/src/workflow_status_probe.rs @@ -1,8 +1,8 @@ // Copyright 2024 Conductor OSS // Licensed under the Apache License, Version 2.0. See LICENSE in the project root for license information. -use std::collections::VecDeque; use conductor::client::WorkflowClient; +use std::collections::VecDeque; use tokio::sync::mpsc; use tokio::time::{self, Duration}; diff --git a/src/client/authorization_client.rs b/src/client/authorization_client.rs index 33e80a4..afa57c1 100644 --- a/src/client/authorization_client.rs +++ b/src/client/authorization_client.rs @@ -37,7 +37,9 @@ impl AuthorizationClient { /// Get an application by ID pub async fn get_application(&self, application_id: &str) -> Result { let path = format!("/applications/{}", application_id); - self.api.get(ApiPath::templated(&path, "/applications/{applicationId}")).await + self.api + .get(ApiPath::templated(&path, "/applications/{applicationId}")) + .await } /// List all applications @@ -52,19 +54,28 @@ impl AuthorizationClient { application_id: &str, ) -> Result { let path = format!("/applications/{}", application_id); - self.api.put(ApiPath::templated(&path, "/applications/{applicationId}"), request).await + self.api + .put( + ApiPath::templated(&path, "/applications/{applicationId}"), + request, + ) + .await } /// Delete an application pub async fn delete_application(&self, application_id: &str) -> Result<()> { let path = format!("/applications/{}", application_id); - self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}")) + .await } /// Get application ID by access key ID pub async fn get_app_by_access_key_id(&self, access_key_id: &str) -> Result { let path = format!("/applications/key/{}", access_key_id); - self.api.get(ApiPath::templated(&path, "/applications/key/{accessKeyId}")).await + self.api + .get(ApiPath::templated(&path, "/applications/key/{accessKeyId}")) + .await } /// Add a role to application user @@ -74,7 +85,12 @@ impl AuthorizationClient { role: &str, ) -> Result<()> { let path = format!("/applications/{}/roles/{}", application_id, role); - self.api.post_no_body_no_response(ApiPath::templated(&path, "/applications/{applicationId}/roles/{role}")).await + self.api + .post_no_body_no_response(ApiPath::templated( + &path, + "/applications/{applicationId}/roles/{role}", + )) + .await } /// Remove a role from application user @@ -84,7 +100,12 @@ impl AuthorizationClient { role: &str, ) -> Result<()> { let path = format!("/applications/{}/roles/{}", application_id, role); - self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}/roles/{role}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/applications/{applicationId}/roles/{role}", + )) + .await } /// Set tags for an application @@ -94,13 +115,23 @@ impl AuthorizationClient { application_id: &str, ) -> Result<()> { let path = format!("/applications/{}/tags", application_id); - self.api.put_no_response(ApiPath::templated(&path, "/applications/{applicationId}/tags"), tags).await + self.api + .put_no_response( + ApiPath::templated(&path, "/applications/{applicationId}/tags"), + tags, + ) + .await } /// Get tags for an application pub async fn get_application_tags(&self, application_id: &str) -> Result> { let path = format!("/applications/{}/tags", application_id); - self.api.get(ApiPath::templated(&path, "/applications/{applicationId}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/applications/{applicationId}/tags", + )) + .await } /// Delete tags from an application @@ -110,19 +141,34 @@ impl AuthorizationClient { application_id: &str, ) -> Result<()> { let path = format!("/applications/{}/tags", application_id); - self.api.delete_with_body(ApiPath::templated(&path, "/applications/{applicationId}/tags"), tags).await + self.api + .delete_with_body( + ApiPath::templated(&path, "/applications/{applicationId}/tags"), + tags, + ) + .await } /// Create an access key for an application pub async fn create_access_key(&self, application_id: &str) -> Result { let path = format!("/applications/{}/accessKeys", application_id); - self.api.post_no_body(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys")).await + self.api + .post_no_body(ApiPath::templated( + &path, + "/applications/{applicationId}/accessKeys", + )) + .await } /// Get access keys for an application pub async fn get_access_keys(&self, application_id: &str) -> Result> { let path = format!("/applications/{}/accessKeys", application_id); - self.api.get(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys")).await + self.api + .get(ApiPath::templated( + &path, + "/applications/{applicationId}/accessKeys", + )) + .await } /// Toggle the status of an access key @@ -135,13 +181,23 @@ impl AuthorizationClient { "/applications/{}/accessKeys/{}/status", application_id, key_id ); - self.api.post_no_body(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys/{keyId}/status")).await + self.api + .post_no_body(ApiPath::templated( + &path, + "/applications/{applicationId}/accessKeys/{keyId}/status", + )) + .await } /// Delete an access key pub async fn delete_access_key(&self, application_id: &str, key_id: &str) -> Result<()> { let path = format!("/applications/{}/accessKeys/{}", application_id, key_id); - self.api.delete_no_content(ApiPath::templated(&path, "/applications/{applicationId}/accessKeys/{keyId}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/applications/{applicationId}/accessKeys/{keyId}", + )) + .await } // =========================== @@ -155,19 +211,25 @@ impl AuthorizationClient { user_id: &str, ) -> Result { let path = format!("/users/{}", user_id); - self.api.put(ApiPath::templated(&path, "/users/{userId}"), request).await + self.api + .put(ApiPath::templated(&path, "/users/{userId}"), request) + .await } /// Get a user by ID pub async fn get_user(&self, user_id: &str) -> Result { let path = format!("/users/{}", user_id); - self.api.get(ApiPath::templated(&path, "/users/{userId}")).await + self.api + .get(ApiPath::templated(&path, "/users/{userId}")) + .await } /// List all users pub async fn list_users(&self, apps: bool) -> Result> { if apps { - self.api.get_with_params("/users", &[("apps", "true")]).await + self.api + .get_with_params("/users", &[("apps", "true")]) + .await } else { self.api.get("/users").await } @@ -176,7 +238,9 @@ impl AuthorizationClient { /// Delete a user pub async fn delete_user(&self, user_id: &str) -> Result<()> { let path = format!("/users/{}", user_id); - self.api.delete_no_content(ApiPath::templated(&path, "/users/{userId}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/users/{userId}")) + .await } /// Get permissions granted to a user @@ -185,7 +249,9 @@ impl AuthorizationClient { user_id: &str, ) -> Result> { let path = format!("/users/{}/permissions", user_id); - self.api.get(ApiPath::templated(&path, "/users/{userId}/permissions")).await + self.api + .get(ApiPath::templated(&path, "/users/{userId}/permissions")) + .await } /// Check if user has permissions over a target @@ -199,7 +265,12 @@ impl AuthorizationClient { "/users/{}/permissions/{}/{}", user_id, target_type, target_id ); - self.api.get(ApiPath::templated(&path, "/users/{userId}/permissions/{targetType}/{targetId}")).await + self.api + .get(ApiPath::templated( + &path, + "/users/{userId}/permissions/{targetType}/{targetId}", + )) + .await } // =========================== @@ -213,13 +284,17 @@ impl AuthorizationClient { group_id: &str, ) -> Result { let path = format!("/groups/{}", group_id); - self.api.put(ApiPath::templated(&path, "/groups/{groupId}"), request).await + self.api + .put(ApiPath::templated(&path, "/groups/{groupId}"), request) + .await } /// Get a group by ID pub async fn get_group(&self, group_id: &str) -> Result { let path = format!("/groups/{}", group_id); - self.api.get(ApiPath::templated(&path, "/groups/{groupId}")).await + self.api + .get(ApiPath::templated(&path, "/groups/{groupId}")) + .await } /// List all groups @@ -230,7 +305,9 @@ impl AuthorizationClient { /// Delete a group pub async fn delete_group(&self, group_id: &str) -> Result<()> { let path = format!("/groups/{}", group_id); - self.api.delete_no_content(ApiPath::templated(&path, "/groups/{groupId}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/groups/{groupId}")) + .await } /// Get permissions granted to a group @@ -239,37 +316,61 @@ impl AuthorizationClient { group_id: &str, ) -> Result> { let path = format!("/groups/{}/permissions", group_id); - self.api.get(ApiPath::templated(&path, "/groups/{groupId}/permissions")).await + self.api + .get(ApiPath::templated(&path, "/groups/{groupId}/permissions")) + .await } /// Add a user to a group pub async fn add_user_to_group(&self, group_id: &str, user_id: &str) -> Result<()> { let path = format!("/groups/{}/users/{}", group_id, user_id); - self.api.post_no_body_no_response(ApiPath::templated(&path, "/groups/{groupId}/users/{userId}")).await + self.api + .post_no_body_no_response(ApiPath::templated( + &path, + "/groups/{groupId}/users/{userId}", + )) + .await } /// Add multiple users to a group pub async fn add_users_to_group(&self, group_id: &str, user_ids: &[String]) -> Result<()> { let path = format!("/groups/{}/users", group_id); - self.api.post_no_response(ApiPath::templated(&path, "/groups/{groupId}/users"), user_ids).await + self.api + .post_no_response( + ApiPath::templated(&path, "/groups/{groupId}/users"), + user_ids, + ) + .await } /// Get all users in a group pub async fn get_users_in_group(&self, group_id: &str) -> Result> { let path = format!("/groups/{}/users", group_id); - self.api.get(ApiPath::templated(&path, "/groups/{groupId}/users")).await + self.api + .get(ApiPath::templated(&path, "/groups/{groupId}/users")) + .await } /// Remove a user from a group pub async fn remove_user_from_group(&self, group_id: &str, user_id: &str) -> Result<()> { let path = format!("/groups/{}/users/{}", group_id, user_id); - self.api.delete_no_content(ApiPath::templated(&path, "/groups/{groupId}/users/{userId}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/groups/{groupId}/users/{userId}", + )) + .await } /// Remove multiple users from a group pub async fn remove_users_from_group(&self, group_id: &str, user_ids: &[String]) -> Result<()> { let path = format!("/groups/{}/users", group_id); - self.api.delete_with_body(ApiPath::templated(&path, "/groups/{groupId}/users"), user_ids).await + self.api + .delete_with_body( + ApiPath::templated(&path, "/groups/{groupId}/users"), + user_ids, + ) + .await } // =========================== @@ -288,7 +389,9 @@ impl AuthorizationClient { "target": target, "access": access }); - self.api.post_no_response("/auth/authorization", &body).await + self.api + .post_no_response("/auth/authorization", &body) + .await } /// Get permissions for a target @@ -303,7 +406,12 @@ impl AuthorizationClient { target_type_str.trim_matches('"'), target.id ); - self.api.get(ApiPath::templated(&path, "/auth/authorization/{targetType}/{targetId}")).await + self.api + .get(ApiPath::templated( + &path, + "/auth/authorization/{targetType}/{targetId}", + )) + .await } /// Remove permissions from a subject over a target @@ -318,7 +426,9 @@ impl AuthorizationClient { "target": target, "access": access }); - self.api.delete_with_body("/auth/authorization", &body).await + self.api + .delete_with_body("/auth/authorization", &body) + .await } // =========================== @@ -353,7 +463,9 @@ impl AuthorizationClient { /// Get a role by name pub async fn get_role(&self, role_name: &str) -> Result { let path = format!("/roles/{}", role_name); - self.api.get(ApiPath::templated(&path, "/roles/{roleName}")).await + self.api + .get(ApiPath::templated(&path, "/roles/{roleName}")) + .await } /// Update a custom role @@ -363,13 +475,17 @@ impl AuthorizationClient { request: &serde_json::Value, ) -> Result { let path = format!("/roles/{}", role_name); - self.api.put(ApiPath::templated(&path, "/roles/{roleName}"), request).await + self.api + .put(ApiPath::templated(&path, "/roles/{roleName}"), request) + .await } /// Delete a custom role pub async fn delete_role(&self, role_name: &str) -> Result<()> { let path = format!("/roles/{}", role_name); - self.api.delete_no_content(ApiPath::templated(&path, "/roles/{roleName}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/roles/{roleName}")) + .await } // =========================== @@ -409,7 +525,12 @@ impl AuthorizationClient { /// Get API Gateway authentication configuration by ID pub async fn get_gateway_auth_config(&self, config_id: &str) -> Result { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.get(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}")).await + self.api + .get(ApiPath::templated( + &path, + "/api-gateway/auth-config/{configId}", + )) + .await } /// List all API Gateway authentication configurations @@ -424,13 +545,23 @@ impl AuthorizationClient { auth_config: &serde_json::Value, ) -> Result { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.put(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}"), auth_config).await + self.api + .put( + ApiPath::templated(&path, "/api-gateway/auth-config/{configId}"), + auth_config, + ) + .await } /// Delete API Gateway authentication configuration pub async fn delete_gateway_auth_config(&self, config_id: &str) -> Result<()> { let path = format!("/api-gateway/auth-config/{}", config_id); - self.api.delete_no_content(ApiPath::templated(&path, "/api-gateway/auth-config/{configId}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/api-gateway/auth-config/{configId}", + )) + .await } } diff --git a/src/client/event_client.rs b/src/client/event_client.rs index 183a316..d932c49 100644 --- a/src/client/event_client.rs +++ b/src/client/event_client.rs @@ -73,7 +73,12 @@ impl EventClient { "/event/queue/config/{}/{}", queue_config.queue_type, queue_config.queue_name ); - self.api.delete_no_content(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/event/queue/config/{queueType}/{queueName}", + )) + .await } /// Get a Kafka queue configuration by topic @@ -91,7 +96,12 @@ impl EventClient { queue_name: &str, ) -> Result { let path = format!("/event/queue/config/{}/{}", queue_type, queue_name); - self.api.get(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}")).await + self.api + .get(ApiPath::templated( + &path, + "/event/queue/config/{queueType}/{queueName}", + )) + .await } /// Create or update a queue configuration @@ -101,7 +111,10 @@ impl EventClient { queue_config.queue_type, queue_config.queue_name ); self.api - .put_no_response(ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}"), &queue_config.configuration) + .put_no_response( + ApiPath::templated(&path, "/event/queue/config/{queueType}/{queueName}"), + &queue_config.configuration, + ) .await } @@ -121,7 +134,9 @@ impl EventClient { urlencoding::encode(event), active_only ); - self.api.get(ApiPath::templated(&path, "/event/{event}")).await + self.api + .get(ApiPath::templated(&path, "/event/{event}")) + .await } /// Get all event handlers @@ -142,7 +157,9 @@ impl EventClient { /// Remove an event handler pub async fn remove_event_handler(&self, name: &str) -> Result<()> { let path = format!("/event/{}", name); - self.api.delete_no_content(ApiPath::templated(&path, "/event/{event}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/event/{event}")) + .await } } diff --git a/src/client/integration_client.rs b/src/client/integration_client.rs index 66bd9f4..9bbd360 100644 --- a/src/client/integration_client.rs +++ b/src/client/integration_client.rs @@ -31,7 +31,12 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/prompt/{}", ai_integration, model_name, prompt_name ); - self.api.post_no_body_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/prompt/{promptName}")).await + self.api + .post_no_body_no_response(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/prompt/{promptName}", + )) + .await } /// Delete a specific integration API @@ -44,13 +49,20 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.delete_no_content(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}", + )) + .await } /// Delete an integration pub async fn delete_integration(&self, integration_name: &str) -> Result<()> { let path = format!("/integrations/provider/{}", integration_name); - self.api.delete_no_content(ApiPath::templated(&path, "/integrations/provider/{name}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/integrations/provider/{name}")) + .await } /// Get an integration API @@ -63,7 +75,12 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}", + )) + .await } /// Get all APIs for an integration @@ -72,13 +89,20 @@ impl IntegrationClient { integration_name: &str, ) -> Result> { let path = format!("/integrations/provider/{}/integration", integration_name); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration", + )) + .await } /// Get an integration pub async fn get_integration(&self, integration_name: &str) -> Result { let path = format!("/integrations/provider/{}", integration_name); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}")).await + self.api + .get(ApiPath::templated(&path, "/integrations/provider/{name}")) + .await } /// Get all integrations @@ -96,7 +120,12 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/prompt", ai_integration, model_name ); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/prompt")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/prompt", + )) + .await } /// Get token usage for an integration API @@ -109,7 +138,12 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/metrics", integration_name, api_name ); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/metrics")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/metrics", + )) + .await } /// Get token usage for an integration provider @@ -118,7 +152,12 @@ impl IntegrationClient { name: &str, ) -> Result { let path = format!("/integrations/provider/{}/metrics", name); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/metrics")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/metrics", + )) + .await } /// Save (create or update) an integration API @@ -132,7 +171,12 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}", integration_name, api_name ); - self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}"), api_details).await + self.api + .put_no_response( + ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}"), + api_details, + ) + .await } /// Save (create or update) an integration @@ -142,7 +186,12 @@ impl IntegrationClient { integration_details: &IntegrationUpdate, ) -> Result<()> { let path = format!("/integrations/provider/{}", integration_name); - self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}"), integration_details).await + self.api + .put_no_response( + ApiPath::templated(&path, "/integrations/provider/{name}"), + integration_details, + ) + .await } // Tags @@ -158,7 +207,15 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.delete_with_body(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags"), tags).await + self.api + .delete_with_body( + ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/tags", + ), + tags, + ) + .await } /// Delete a tag from an integration provider @@ -168,7 +225,12 @@ impl IntegrationClient { name: &str, ) -> Result<()> { let path = format!("/integrations/provider/{}/tags", name); - self.api.delete_with_body(ApiPath::templated(&path, "/integrations/provider/{name}/tags"), tags).await + self.api + .delete_with_body( + ApiPath::templated(&path, "/integrations/provider/{name}/tags"), + tags, + ) + .await } /// Set tags for an integration @@ -182,7 +244,15 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags"), tags).await + self.api + .put_no_response( + ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/tags", + ), + tags, + ) + .await } /// Set tags for an integration provider @@ -192,7 +262,12 @@ impl IntegrationClient { name: &str, ) -> Result<()> { let path = format!("/integrations/provider/{}/tags", name); - self.api.put_no_response(ApiPath::templated(&path, "/integrations/provider/{name}/tags"), tags).await + self.api + .put_no_response( + ApiPath::templated(&path, "/integrations/provider/{name}/tags"), + tags, + ) + .await } /// Get tags for an integration @@ -205,13 +280,23 @@ impl IntegrationClient { "/integrations/provider/{}/integration/{}/tags", integration_name, api_name ); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/integration/{apiName}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/integration/{apiName}/tags", + )) + .await } /// Get tags for an integration provider pub async fn get_tags_for_integration_provider(&self, name: &str) -> Result> { let path = format!("/integrations/provider/{}/tags", name); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/tags", + )) + .await } /// Get available APIs for an integration provider @@ -220,7 +305,12 @@ impl IntegrationClient { integration_name: &str, ) -> Result> { let path = format!("/integrations/provider/{}/models", integration_name); - self.api.get(ApiPath::templated(&path, "/integrations/provider/{name}/models")).await + self.api + .get(ApiPath::templated( + &path, + "/integrations/provider/{name}/models", + )) + .await } /// Get all integration provider definitions diff --git a/src/client/metadata_client.rs b/src/client/metadata_client.rs index 0bf1c2c..acbb6d0 100644 --- a/src/client/metadata_client.rs +++ b/src/client/metadata_client.rs @@ -82,13 +82,20 @@ impl MetadataClient { format!("/metadata/workflow/{}", name) }; - self.api.get(ApiPath::templated(&path, "/metadata/workflow/{name}")).await + self.api + .get(ApiPath::templated(&path, "/metadata/workflow/{name}")) + .await } /// Get all versions of a workflow definition pub async fn get_all_workflow_def_versions(&self, name: &str) -> Result> { let path = format!("/metadata/workflow/{}/versions", name); - self.api.get(ApiPath::templated(&path, "/metadata/workflow/{name}/versions")).await + self.api + .get(ApiPath::templated( + &path, + "/metadata/workflow/{name}/versions", + )) + .await } /// Get all workflow definitions @@ -104,7 +111,12 @@ impl MetadataClient { /// Delete a workflow definition pub async fn delete_workflow_def(&self, name: &str, version: i32) -> Result<()> { let path = format!("/metadata/workflow/{}/{}", name, version); - self.api.delete_no_content(ApiPath::templated(&path, "/metadata/workflow/{name}/{version}")).await + self.api + .delete_no_content(ApiPath::templated( + &path, + "/metadata/workflow/{name}/{version}", + )) + .await } // ==================== Task Definitions ==================== @@ -145,7 +157,9 @@ impl MetadataClient { /// Get a task definition by name pub async fn get_task_def(&self, name: &str) -> Result { let path = format!("/metadata/taskdefs/{}", name); - self.api.get(ApiPath::templated(&path, "/metadata/taskdefs/{name}")).await + self.api + .get(ApiPath::templated(&path, "/metadata/taskdefs/{name}")) + .await } /// Get all task definitions @@ -156,7 +170,9 @@ impl MetadataClient { /// Delete a task definition pub async fn delete_task_def(&self, name: &str) -> Result<()> { let path = format!("/metadata/taskdefs/{}", name); - self.api.delete_no_content(ApiPath::templated(&path, "/metadata/taskdefs/{name}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/metadata/taskdefs/{name}")) + .await } /// Check if a task definition exists diff --git a/src/client/orkes_metadata_client.rs b/src/client/orkes_metadata_client.rs index b0339cb..e7324ba 100644 --- a/src/client/orkes_metadata_client.rs +++ b/src/client/orkes_metadata_client.rs @@ -62,7 +62,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.post_no_response(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), &[tag]).await?; + self.api + .post_no_response( + ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), + &[tag], + ) + .await?; info!( workflow_name = %workflow_name, @@ -76,7 +81,12 @@ impl OrkesMetadataClient { /// Get all tags for a workflow definition pub async fn get_workflow_tags(&self, workflow_name: &str) -> Result> { let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.get(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/metadata/workflow/{workflowName}/tags", + )) + .await } /// Set tags for a workflow definition (replaces existing tags) @@ -88,7 +98,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.put_no_response(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), tags).await?; + self.api + .put_no_response( + ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), + tags, + ) + .await?; info!( workflow_name = %workflow_name, @@ -108,7 +123,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/workflow/{}/tags", workflow_name); - self.api.delete_with_body(ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), &[tag]).await?; + self.api + .delete_with_body( + ApiPath::templated(&path, "/metadata/workflow/{workflowName}/tags"), + &[tag], + ) + .await?; info!( workflow_name = %workflow_name, @@ -130,7 +150,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.post_no_response(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), &[tag]).await?; + self.api + .post_no_response( + ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), + &[tag], + ) + .await?; info!( task_name = %task_name, @@ -144,7 +169,12 @@ impl OrkesMetadataClient { /// Get all tags for a task definition pub async fn get_task_tags(&self, task_name: &str) -> Result> { let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.get(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/metadata/taskdefs/{taskName}/tags", + )) + .await } /// Set tags for a task definition (replaces existing tags) @@ -156,7 +186,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.put_no_response(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), tags).await?; + self.api + .put_no_response( + ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), + tags, + ) + .await?; info!( task_name = %task_name, @@ -176,7 +211,12 @@ impl OrkesMetadataClient { ); let path = format!("/metadata/taskdefs/{}/tags", task_name); - self.api.delete_with_body(ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), &[tag]).await?; + self.api + .delete_with_body( + ApiPath::templated(&path, "/metadata/taskdefs/{taskName}/tags"), + &[tag], + ) + .await?; info!( task_name = %task_name, diff --git a/src/client/prompt_client.rs b/src/client/prompt_client.rs index b8ba39a..34dc80c 100644 --- a/src/client/prompt_client.rs +++ b/src/client/prompt_client.rs @@ -70,7 +70,11 @@ impl PromptClient { let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); self.api - .post_raw_with_params(ApiPath::templated(&path, "/prompts/{promptName}"), prompt_template, ¶ms_ref) + .post_raw_with_params( + ApiPath::templated(&path, "/prompts/{promptName}"), + prompt_template, + ¶ms_ref, + ) .await } @@ -102,7 +106,11 @@ impl PromptClient { let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); self.api - .put_raw_with_params(ApiPath::templated(&path, "/prompts/{promptName}/{version}"), prompt_template, ¶ms_ref) + .put_raw_with_params( + ApiPath::templated(&path, "/prompts/{promptName}/{version}"), + prompt_template, + ¶ms_ref, + ) .await } @@ -113,14 +121,19 @@ impl PromptClient { /// * `new_version` - If true, creates new versions for existing prompts; if false, updates existing versions pub async fn save_prompts(&self, prompts: &[PromptTemplate], new_version: bool) -> Result<()> { let path = format!("/prompts?newVersion={}", new_version); - let _: serde_json::Value = self.api.post(ApiPath::templated(&path, "/prompts"), prompts).await?; + let _: serde_json::Value = self + .api + .post(ApiPath::templated(&path, "/prompts"), prompts) + .await?; Ok(()) } /// Retrieves the latest version of a prompt template by name pub async fn get_prompt(&self, prompt_name: &str) -> Result { let path = format!("/prompts/{}", prompt_name); - self.api.get(ApiPath::templated(&path, "/prompts/{promptName}")).await + self.api + .get(ApiPath::templated(&path, "/prompts/{promptName}")) + .await } /// Retrieves a specific version of a prompt template @@ -134,7 +147,9 @@ impl PromptClient { version: i32, ) -> Result { let path = format!("/prompts/{}/{}", prompt_name, version); - self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/{version}")).await + self.api + .get(ApiPath::templated(&path, "/prompts/{promptName}/{version}")) + .await } /// Retrieves all versions of a specific prompt template @@ -146,7 +161,9 @@ impl PromptClient { /// List of all versions of the prompt template, ordered by version number pub async fn get_all_prompt_versions(&self, prompt_name: &str) -> Result> { let path = format!("/prompts/{}/versions", prompt_name); - self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/versions")).await + self.api + .get(ApiPath::templated(&path, "/prompts/{promptName}/versions")) + .await } /// Retrieves all prompt templates (latest versions only) @@ -157,7 +174,9 @@ impl PromptClient { /// Deletes all versions of a prompt template pub async fn delete_prompt(&self, prompt_name: &str) -> Result<()> { let path = format!("/prompts/{}", prompt_name); - self.api.delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}")) + .await } /// Deletes a specific version of a prompt template @@ -167,7 +186,9 @@ impl PromptClient { /// * `version` - The version number to delete pub async fn delete_prompt_version(&self, prompt_name: &str, version: i32) -> Result<()> { let path = format!("/prompts/{}/{}", prompt_name, version); - self.api.delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}/{version}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/prompts/{promptName}/{version}")) + .await } // ==================== Tag management ==================== @@ -178,7 +199,9 @@ impl PromptClient { prompt_name: &str, ) -> Result> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.get(ApiPath::templated(&path, "/prompts/{promptName}/tags")).await + self.api + .get(ApiPath::templated(&path, "/prompts/{promptName}/tags")) + .await } /// Adds or updates tags for a prompt template @@ -188,7 +211,12 @@ impl PromptClient { tags: &[MetadataTag], ) -> Result<()> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.put_no_response(ApiPath::templated(&path, "/prompts/{promptName}/tags"), tags).await + self.api + .put_no_response( + ApiPath::templated(&path, "/prompts/{promptName}/tags"), + tags, + ) + .await } /// Deletes specific tags from a prompt template @@ -198,7 +226,12 @@ impl PromptClient { tags: &[MetadataTag], ) -> Result<()> { let path = format!("/prompts/{}/tags", prompt_name); - self.api.delete_with_body(ApiPath::templated(&path, "/prompts/{promptName}/tags"), tags).await + self.api + .delete_with_body( + ApiPath::templated(&path, "/prompts/{promptName}/tags"), + tags, + ) + .await } // ==================== Testing ==================== diff --git a/src/client/scheduler_client.rs b/src/client/scheduler_client.rs index d9a7fb9..03d9e07 100644 --- a/src/client/scheduler_client.rs +++ b/src/client/scheduler_client.rs @@ -21,13 +21,17 @@ impl SchedulerClient { /// Save (create or update) a schedule pub async fn save_schedule(&self, request: &SaveScheduleRequest) -> Result<()> { - self.api.post_no_response("/scheduler/schedules", request).await + self.api + .post_no_response("/scheduler/schedules", request) + .await } /// Get a schedule by name pub async fn get_schedule(&self, name: &str) -> Result { let path = format!("/scheduler/schedules/{}", name); - self.api.get(ApiPath::templated(&path, "/scheduler/schedules/{name}")).await + self.api + .get(ApiPath::templated(&path, "/scheduler/schedules/{name}")) + .await } /// Get all schedules, optionally filtered by workflow name @@ -65,19 +69,28 @@ impl SchedulerClient { } let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params("/scheduler/nextFewSchedules", ¶ms_ref).await + self.api + .get_with_params("/scheduler/nextFewSchedules", ¶ms_ref) + .await } /// Delete a schedule pub async fn delete_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}", name); - self.api.delete_no_content(ApiPath::templated(&path, "/scheduler/schedules/{name}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/scheduler/schedules/{name}")) + .await } /// Pause a schedule pub async fn pause_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/pause", name); - self.api.get_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/pause")).await + self.api + .get_no_response(ApiPath::templated( + &path, + "/scheduler/schedules/{name}/pause", + )) + .await } /// Pause all schedules @@ -88,7 +101,12 @@ impl SchedulerClient { /// Resume a schedule pub async fn resume_schedule(&self, name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/resume", name); - self.api.get_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/resume")).await + self.api + .get_no_response(ApiPath::templated( + &path, + "/scheduler/schedules/{name}/resume", + )) + .await } /// Resume all schedules @@ -124,7 +142,9 @@ impl SchedulerClient { } let params_ref: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api.get_with_params("/scheduler/search/executions", ¶ms_ref).await + self.api + .get_with_params("/scheduler/search/executions", ¶ms_ref) + .await } /// Requeue all execution records @@ -135,19 +155,34 @@ impl SchedulerClient { /// Set tags for a schedule pub async fn set_scheduler_tags(&self, tags: &[MetadataTag], name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.put_no_response(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), tags).await + self.api + .put_no_response( + ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), + tags, + ) + .await } /// Get tags for a schedule pub async fn get_scheduler_tags(&self, name: &str) -> Result> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.get(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags")).await + self.api + .get(ApiPath::templated( + &path, + "/scheduler/schedules/{name}/tags", + )) + .await } /// Delete tags from a schedule pub async fn delete_scheduler_tags(&self, tags: &[MetadataTag], name: &str) -> Result<()> { let path = format!("/scheduler/schedules/{}/tags", name); - self.api.delete_with_body(ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), tags).await + self.api + .delete_with_body( + ApiPath::templated(&path, "/scheduler/schedules/{name}/tags"), + tags, + ) + .await } } diff --git a/src/client/schema_client.rs b/src/client/schema_client.rs index 197035b..9a9572b 100644 --- a/src/client/schema_client.rs +++ b/src/client/schema_client.rs @@ -26,7 +26,10 @@ impl SchemaClient { pub async fn get_schema(&self, schema_name: &str, version: i32) -> Result { let path = format!("/schema/{}", schema_name); self.api - .get_with_params(ApiPath::templated(&path, "/schema/{schemaName}"), &[("version", &version.to_string())]) + .get_with_params( + ApiPath::templated(&path, "/schema/{schemaName}"), + &[("version", &version.to_string())], + ) .await } @@ -39,14 +42,19 @@ impl SchemaClient { pub async fn delete_schema(&self, schema_name: &str, version: i32) -> Result<()> { let path = format!("/schema/{}", schema_name); self.api - .delete_with_params(ApiPath::templated(&path, "/schema/{schemaName}"), &[("version", &version.to_string())]) + .delete_with_params( + ApiPath::templated(&path, "/schema/{schemaName}"), + &[("version", &version.to_string())], + ) .await } /// Delete all versions of a schema by name pub async fn delete_schema_by_name(&self, schema_name: &str) -> Result<()> { let path = format!("/schema/{}/all", schema_name); - self.api.delete_no_content(ApiPath::templated(&path, "/schema/{schemaName}/all")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/schema/{schemaName}/all")) + .await } } diff --git a/src/client/secret_client.rs b/src/client/secret_client.rs index 1e57f9a..ab09244 100644 --- a/src/client/secret_client.rs +++ b/src/client/secret_client.rs @@ -21,13 +21,17 @@ impl SecretClient { /// Store a secret pub async fn put_secret(&self, key: &str, value: &str) -> Result<()> { let path = format!("/secrets/{}", key); - self.api.put_raw(ApiPath::templated(&path, "/secrets/{key}"), value).await + self.api + .put_raw(ApiPath::templated(&path, "/secrets/{key}"), value) + .await } /// Get a secret value pub async fn get_secret(&self, key: &str) -> Result { let path = format!("/secrets/{}", key); - self.api.get(ApiPath::templated(&path, "/secrets/{key}")).await + self.api + .get(ApiPath::templated(&path, "/secrets/{key}")) + .await } /// List all secret names @@ -46,31 +50,41 @@ impl SecretClient { /// Delete a secret pub async fn delete_secret(&self, key: &str) -> Result<()> { let path = format!("/secrets/{}", key); - self.api.delete_no_content(ApiPath::templated(&path, "/secrets/{key}")).await + self.api + .delete_no_content(ApiPath::templated(&path, "/secrets/{key}")) + .await } /// Check if a secret exists pub async fn secret_exists(&self, key: &str) -> Result { let path = format!("/secrets/{}/exists", key); - self.api.get(ApiPath::templated(&path, "/secrets/{key}/exists")).await + self.api + .get(ApiPath::templated(&path, "/secrets/{key}/exists")) + .await } /// Set tags for a secret pub async fn set_secret_tags(&self, tags: &[MetadataTag], key: &str) -> Result<()> { let path = format!("/secrets/{}/tags", key); - self.api.put_no_response(ApiPath::templated(&path, "/secrets/{key}/tags"), tags).await + self.api + .put_no_response(ApiPath::templated(&path, "/secrets/{key}/tags"), tags) + .await } /// Get tags for a secret pub async fn get_secret_tags(&self, key: &str) -> Result> { let path = format!("/secrets/{}/tags", key); - self.api.get(ApiPath::templated(&path, "/secrets/{key}/tags")).await + self.api + .get(ApiPath::templated(&path, "/secrets/{key}/tags")) + .await } /// Delete tags from a secret pub async fn delete_secret_tags(&self, tags: &[MetadataTag], key: &str) -> Result<()> { let path = format!("/secrets/{}/tags", key); - self.api.delete_with_body(ApiPath::templated(&path, "/secrets/{key}/tags"), tags).await + self.api + .delete_with_body(ApiPath::templated(&path, "/secrets/{key}/tags"), tags) + .await } } diff --git a/src/client/task_client.rs b/src/client/task_client.rs index 6bcf819..08df0c9 100644 --- a/src/client/task_client.rs +++ b/src/client/task_client.rs @@ -79,7 +79,10 @@ impl TaskClient { let tasks: Vec = self .api - .get_with_params(ApiPath::templated(&path, "/tasks/poll/batch/{taskType}"), ¶ms) + .get_with_params( + ApiPath::templated(&path, "/tasks/poll/batch/{taskType}"), + ¶ms, + ) .await?; debug!( @@ -152,7 +155,9 @@ impl TaskClient { /// Get task by ID pub async fn get_task(&self, task_id: &str) -> Result { let path = format!("/tasks/{}", task_id); - self.api.get(ApiPath::templated(&path, "/tasks/{taskId}")).await + self.api + .get(ApiPath::templated(&path, "/tasks/{taskId}")) + .await } /// Get tasks in progress for a task type @@ -175,14 +180,20 @@ impl TaskClient { let path = format!("/tasks/in_progress/{}", task_type); self.api - .get_with_params(ApiPath::templated(&path, "/tasks/in_progress/{taskType}"), ¶ms) + .get_with_params( + ApiPath::templated(&path, "/tasks/in_progress/{taskType}"), + ¶ms, + ) .await } /// Add a log to a task pub async fn add_task_log(&self, task_id: &str, log: &str) -> Result<()> { let path = format!("/tasks/{}/log", task_id); - let _: serde_json::Value = self.api.post(ApiPath::templated(&path, "/tasks/{taskId}/log"), &log).await?; + let _: serde_json::Value = self + .api + .post(ApiPath::templated(&path, "/tasks/{taskId}/log"), &log) + .await?; Ok(()) } @@ -192,7 +203,9 @@ impl TaskClient { task_id: &str, ) -> Result> { let path = format!("/tasks/{}/log", task_id); - self.api.get(ApiPath::templated(&path, "/tasks/{taskId}/log")).await + self.api + .get(ApiPath::templated(&path, "/tasks/{taskId}/log")) + .await } /// Get task queue sizes @@ -201,14 +214,19 @@ impl TaskClient { task_types: &[&str], ) -> Result> { let params: Vec<(&str, &str)> = task_types.iter().map(|t| ("taskType", *t)).collect(); - self.api.get_with_params("/tasks/queue/sizes", ¶ms).await + self.api + .get_with_params("/tasks/queue/sizes", ¶ms) + .await } /// Remove task from queue pub async fn remove_task_from_queue(&self, task_type: &str, task_id: &str) -> Result<()> { let path = format!("/tasks/queue/{}/{}", task_type, task_id); self.api - .delete_no_content(ApiPath::templated(&path, "/tasks/queue/{taskType}/{taskId}")) + .delete_no_content(ApiPath::templated( + &path, + "/tasks/queue/{taskType}/{taskId}", + )) .await } @@ -281,7 +299,10 @@ impl TaskClient { pub async fn get_task_poll_data(&self, task_type: &str) -> Result> { let path = format!("/tasks/queue/polldata/{}", task_type); self.api - .get(ApiPath::templated(&path, "/tasks/queue/polldata/{taskType}")) + .get(ApiPath::templated( + &path, + "/tasks/queue/polldata/{taskType}", + )) .await } @@ -314,9 +335,7 @@ impl TaskClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api - .get_with_params("/tasks/search", ¶ms) - .await + self.api.get_with_params("/tasks/search", ¶ms).await } /// Search for tasks V2 (returns full task objects) @@ -338,9 +357,7 @@ impl TaskClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api - .get_with_params("/tasks/search-v2", ¶ms) - .await + self.api.get_with_params("/tasks/search-v2", ¶ms).await } /// Requeue pending tasks diff --git a/src/client/workflow_client.rs b/src/client/workflow_client.rs index 61046c3..0914af7 100644 --- a/src/client/workflow_client.rs +++ b/src/client/workflow_client.rs @@ -124,7 +124,10 @@ impl WorkflowClient { let workflow: Workflow = self .api - .post(ApiPath::templated(&path, "/workflow/execute/{name}/{version}"), &exec_request) + .post( + ApiPath::templated(&path, "/workflow/execute/{name}/{version}"), + &exec_request, + ) .await?; info!( @@ -140,7 +143,9 @@ impl WorkflowClient { /// Get workflow by ID pub async fn get_workflow(&self, workflow_id: &str, include_tasks: bool) -> Result { let path = format!("/workflow/{}?includeTasks={}", workflow_id, include_tasks); - self.api.get(ApiPath::templated(&path, "/workflow/{workflowId}")).await + self.api + .get(ApiPath::templated(&path, "/workflow/{workflowId}")) + .await } /// Get workflow status @@ -154,7 +159,9 @@ impl WorkflowClient { "/workflow/{}/status?includeOutput={}&includeVariables={}", workflow_id, include_output, include_variables ); - self.api.get(ApiPath::templated(&path, "/workflow/{workflowId}/status")).await + self.api + .get(ApiPath::templated(&path, "/workflow/{workflowId}/status")) + .await } /// Terminate a running workflow @@ -183,7 +190,10 @@ impl WorkflowClient { let path = format!("/workflow/{}/pause", workflow_id); let _: serde_json::Value = self .api - .put(ApiPath::templated(&path, "/workflow/{workflowId}/pause"), &serde_json::Value::Null) + .put( + ApiPath::templated(&path, "/workflow/{workflowId}/pause"), + &serde_json::Value::Null, + ) .await?; Ok(()) } @@ -264,7 +274,10 @@ impl WorkflowClient { }; self.api - .post(ApiPath::templated(&path, "/workflow/{workflowId}/rerun"), &request) + .post( + ApiPath::templated(&path, "/workflow/{workflowId}/rerun"), + &request, + ) .await } @@ -276,7 +289,10 @@ impl WorkflowClient { ) -> Result { let path = format!("/workflow/{}/variables", workflow_id); self.api - .post(ApiPath::templated(&path, "/workflow/{workflowId}/variables"), &variables) + .post( + ApiPath::templated(&path, "/workflow/{workflowId}/variables"), + &variables, + ) .await } @@ -316,9 +332,7 @@ impl WorkflowClient { let params: Vec<(&str, &str)> = params.iter().map(|(k, v)| (*k, v.as_str())).collect(); - self.api - .get_with_params("/workflow/search", ¶ms) - .await + self.api.get_with_params("/workflow/search", ¶ms).await } /// Search for workflows V2 (returns full workflow objects) @@ -381,7 +395,10 @@ impl WorkflowClient { use_latest_def ); self.api - .post(ApiPath::templated(&path, "/workflow/bulk/restart"), workflow_ids) + .post( + ApiPath::templated(&path, "/workflow/bulk/restart"), + workflow_ids, + ) .await } @@ -404,7 +421,10 @@ impl WorkflowClient { path.push_str(&format!("?reason={}", urlencoding::encode(r))); } self.api - .post(ApiPath::templated(&path, "/workflow/bulk/terminate"), workflow_ids) + .post( + ApiPath::templated(&path, "/workflow/bulk/terminate"), + workflow_ids, + ) .await } @@ -448,7 +468,10 @@ impl WorkflowClient { } self.api - .get(ApiPath::templated(&path, "/workflow/running/{workflowName}")) + .get(ApiPath::templated( + &path, + "/workflow/running/{workflowName}", + )) .await } @@ -462,9 +485,7 @@ impl WorkflowClient { /// Test a workflow (dry run) pub async fn test_workflow(&self, request: &TestWorkflowRequest) -> Result { - self.api - .post("/workflow/test", request) - .await + self.api.post("/workflow/test", request).await } /// Remove/delete a workflow @@ -504,7 +525,10 @@ impl WorkflowClient { include_completed, include_tasks ); self.api - .post(ApiPath::templated(&path, "/workflow/correlated/batch"), batch_request) + .post( + ApiPath::templated(&path, "/workflow/correlated/batch"), + batch_request, + ) .await } @@ -534,7 +558,10 @@ impl WorkflowClient { } self.api - .post(ApiPath::templated(&path, "/workflow/{workflowId}/state"), update_request) + .post( + ApiPath::templated(&path, "/workflow/{workflowId}/state"), + update_request, + ) .await } diff --git a/src/http/api_client.rs b/src/http/api_client.rs index bab3d96..fddbdd9 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -215,10 +215,7 @@ impl ApiClient { } /// GET request - pub async fn get( - &self, - path: impl Into>, - ) -> Result { + pub async fn get(&self, path: impl Into>) -> Result { let p = path.into(); self.request::<(), T>(reqwest::Method::GET, p.path, p.metric_uri, None) .await @@ -237,7 +234,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.query(params); - let response = self.send_observed("GET", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("GET", p.path, p.metric_uri, request) + .await?; self.handle_response(response).await } @@ -265,7 +264,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -287,27 +288,23 @@ impl ApiClient { } /// DELETE request - pub async fn delete( - &self, - path: impl Into>, - ) -> Result { + pub async fn delete(&self, path: impl Into>) -> Result { let p = path.into(); self.request::<(), T>(reqwest::Method::DELETE, p.path, p.metric_uri, None) .await } /// DELETE request with no response body - pub async fn delete_no_content( - &self, - path: impl Into>, - ) -> Result<()> { + pub async fn delete_no_content(&self, path: impl Into>) -> Result<()> { let p = path.into(); let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.delete(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("DELETE", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -330,7 +327,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("DELETE", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -353,7 +352,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.query(params); - let response = self.send_observed("DELETE", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("DELETE", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() || status == StatusCode::NO_CONTENT { @@ -376,7 +377,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -397,22 +400,23 @@ impl ApiClient { let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; self.handle_response(response).await } /// POST request with no body and no response - pub async fn post_no_body_no_response( - &self, - path: impl Into>, - ) -> Result<()> { + pub async fn post_no_body_no_response(&self, path: impl Into>) -> Result<()> { let p = path.into(); let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.post(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -435,7 +439,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("PUT", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -446,11 +452,7 @@ impl ApiClient { } /// PUT request with raw text body - pub async fn put_raw( - &self, - path: impl Into>, - body: &str, - ) -> Result<()> { + pub async fn put_raw(&self, path: impl Into>, body: &str) -> Result<()> { let p = path.into(); let url = format!("{}{}", self.base_url, p.path); @@ -459,7 +461,9 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("PUT", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -484,7 +488,9 @@ impl ApiClient { request = request.query(params); request = request.json(body); - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; self.handle_response(response).await } @@ -504,7 +510,9 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("POST", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("POST", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -530,7 +538,9 @@ impl ApiClient { request = request.body(body.to_string()); request = request.header("Content-Type", "text/plain"); - let response = self.send_observed("PUT", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("PUT", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -541,17 +551,16 @@ impl ApiClient { } /// GET request with no response - pub async fn get_no_response( - &self, - path: impl Into>, - ) -> Result<()> { + pub async fn get_no_response(&self, path: impl Into>) -> Result<()> { let p = path.into(); let url = format!("{}{}", self.base_url, p.path); let mut request = self.client.get(&url); request = self.add_auth_header(request).await?; - let response = self.send_observed("GET", p.path, p.metric_uri, request).await?; + let response = self + .send_observed("GET", p.path, p.metric_uri, request) + .await?; let status = response.status(); if status.is_success() { @@ -579,7 +588,9 @@ impl ApiClient { request = request.json(b); } - let response = self.send_observed(&method_str, path, metric_uri, request).await?; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; let status = response.status(); // If 401, try refreshing token and retry once @@ -593,7 +604,9 @@ impl ApiClient { request = request.json(b); } - let response = self.send_observed(&method_str, path, metric_uri, request).await?; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; return self.handle_response(response).await; } @@ -615,7 +628,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed(&method_str, path, metric_uri, request).await?; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; let status = response.status(); // If 401, try refreshing token and retry once @@ -626,7 +641,9 @@ impl ApiClient { request = self.add_auth_header(request).await?; request = request.json(body); - let response = self.send_observed(&method_str, path, metric_uri, request).await?; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; return self.handle_response(response).await; } @@ -827,7 +844,12 @@ impl ApiClient { }); let response = match self - .send_observed("POST", "/token", "/token", self.client.post(&url).json(&body)) + .send_observed( + "POST", + "/token", + "/token", + self.client.post(&url).json(&body), + ) .await { Ok(resp) => resp, @@ -918,7 +940,12 @@ impl ApiClient { let url = format!("{}/token", self.base_url); let body = serde_json::json!({"keyId": "probe", "keySecret": "probe"}); let is_oss = match self - .send_observed("POST", "/token", "/token", self.client.post(&url).json(&body)) + .send_observed( + "POST", + "/token", + "/token", + self.client.post(&url).json(&body), + ) .await { Ok(resp) => resp.status() == StatusCode::NOT_FOUND, From 78d65abd66a59a08cb8d6334e420dfdf5999bae4 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 12:39:20 -0600 Subject: [PATCH 10/15] don't record http metrics on token endpoint --- CHANGELOG.md | 1 + src/http/api_client.rs | 72 ++++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2aa778..01572e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Canonical metrics: harmonized metric surface aligned with the cross-SDK catalog -- see [METRICS.md](METRICS.md) for the full catalog, configuration, and implementation details - Bounded `uri` label on `http_api_client_request_seconds`: uses path templates (e.g. `/workflow/{workflowId}`) instead of fully-resolved paths, preventing metric cardinality explosion from dynamic IDs - `WorkflowStatusProbe` in harness: opt-in probe (via `HARNESS_PROBE_RATE_PER_SEC`) that exercises UUID-bearing endpoints to validate template URI metrics +- Worker panic resilience: spawned task executions are wrapped in `catch_unwind` so that an uncaught panic is logged, publishes a `thread_uncaught_exceptions_total` metric event, and cleans up tracking state (semaphore permit, active task count) instead of silently leaking resources ### Changed diff --git a/src/http/api_client.rs b/src/http/api_client.rs index fddbdd9..9a0c8d1 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -594,20 +594,22 @@ impl ApiClient { let status = response.status(); // If 401, try refreshing token and retry once - if self.is_token_expired_error(status) && self.force_refresh_token().await.is_ok() { + if self.is_token_expired_error(status) { debug!(method = %method, url = %url, "Got 401, refreshing token and retrying"); - let mut request = self.client.request(method.clone(), &url); - request = self.add_auth_header(request).await?; + if self.force_refresh_token().await.is_ok() { + let mut request = self.client.request(method.clone(), &url); + request = self.add_auth_header(request).await?; - if let Some(b) = body { - request = request.json(b); - } + if let Some(b) = body { + request = request.json(b); + } - let response = self - .send_observed(&method_str, path, metric_uri, request) - .await?; - return self.handle_response(response).await; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; + return self.handle_response(response).await; + } } self.handle_response(response).await @@ -634,17 +636,19 @@ impl ApiClient { let status = response.status(); // If 401, try refreshing token and retry once - if self.is_token_expired_error(status) && self.force_refresh_token().await.is_ok() { + if self.is_token_expired_error(status) { debug!(method = %method, url = %url, "Got 401, refreshing token and retrying"); - let mut request = self.client.request(method.clone(), &url); - request = self.add_auth_header(request).await?; - request = request.json(body); + if self.force_refresh_token().await.is_ok() { + let mut request = self.client.request(method.clone(), &url); + request = self.add_auth_header(request).await?; + request = request.json(body); - let response = self - .send_observed(&method_str, path, metric_uri, request) - .await?; - return self.handle_response(response).await; + let response = self + .send_observed(&method_str, path, metric_uri, request) + .await?; + return self.handle_response(response).await; + } } self.handle_response(response).await @@ -843,20 +847,20 @@ impl ApiClient { "keySecret": secret }); - let response = match self - .send_observed( - "POST", - "/token", - "/token", - self.client.post(&url).json(&body), - ) - .await - { - Ok(resp) => resp, + let response = match self.client.post(&url).json(&body).send().await { + Ok(resp) => { + debug!( + method = "POST", + url = %url, + status = %resp.status(), + "Token refresh request completed" + ); + resp + } Err(e) => { *self.auth_failures.write().await += 1; error!(error = %e, "Network error during token refresh"); - return Err(e); + return Err(ConductorError::Http(e)); } }; @@ -939,15 +943,7 @@ impl ApiClient { // Probe /token let url = format!("{}/token", self.base_url); let body = serde_json::json!({"keyId": "probe", "keySecret": "probe"}); - let is_oss = match self - .send_observed( - "POST", - "/token", - "/token", - self.client.post(&url).json(&body), - ) - .await - { + let is_oss = match self.client.post(&url).json(&body).send().await { Ok(resp) => resp.status() == StatusCode::NOT_FOUND, Err(_) => false, }; From 19bd1bfce26dc2025faff60460c1296f0c8107f5 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 12:53:37 -0600 Subject: [PATCH 11/15] adjust technical notes for metrics templated urls --- METRICS.md | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/METRICS.md b/METRICS.md index be6a3e6..e2d3bbc 100644 --- a/METRICS.md +++ b/METRICS.md @@ -200,25 +200,23 @@ same metric names and shapes that the Rust SDK emits by default. ## Detailed Technical Notes -### Path template `uri` label — cross-SDK implementation - -All Conductor SDKs preserve the API resource path template before path -parameter substitution and use it as the `uri` label on -`http_api_client_request_seconds`. This prevents cardinality explosion from -dynamic path segments (UUIDs, task type names, etc.) and excludes the server -URL's base path prefix. - -Each SDK implements this using the mechanism most natural to its HTTP stack: - -| SDK | Mechanism | Where template is captured | Where template is consumed | -|---|---|---|---| -| **Go** | `context.WithValue` with `pathTemplateKey` / `rawPathKey` | Each API resource method calls `metrics.WithPathTemplate(ctx, template)` before building the resolved URL. `executeCall` sets `WithRawPath` as fallback. | `metricsRoundTripper.RoundTrip` reads template from context; prefers template > rawPath > URL path. | -| **Java** | OkHttp `Request.tag(PathTemplateTag.class)` | `ConductorClient.buildRequest()` saves the un-substituted path as a `PathTemplateTag` on the request before replacing path params. | `ApiClientMetricsInterceptor` reads the tag at response time; falls back to `request.url().encodedPath()`. | -| **Python** | `metric_uri` keyword argument | `api_client.__call_api_no_retry()` saves `resource_path` before substitution and passes it as `metric_uri` through the call chain. | `CanonicalMetricsCollector.record_api_request_time()` prefers `metric_uri` over the resolved `uri`. | -| **Ruby** | `metric_uri` keyword argument | `ApiClient#call_api_no_retry` saves `resource_path` before substitution and passes it as `metric_uri:` to `RestClient#request`. | `RestClient#emit_http_event` uses `metric_uri` when present; falls back to `URI.parse(url).request_uri`. | -| **Rust** | `ApiPath` struct with `impl Into>` on public `ApiClient` methods | Static paths pass a plain `&str` (the `From<&str>` impl uses the same string for both path and metric label). Dynamic paths use `ApiPath::templated(&path, "/template/{id}")`. | `ApiClient::record_request` passes `metric_uri` directly to `HttpMetricsObserver::observe` as the `uri` label. | - -In all cases the template string is the API-relative resource path (e.g. +### Path template `uri` label — how it works + +The `uri` label on `http_api_client_request_seconds` carries a path +**template** rather than the fully-resolved request path, preventing +cardinality explosion from dynamic path segments (UUIDs, task type names, +etc.). The server URL's base path prefix (e.g. `/api`) is never included. + +The Rust SDK implements this via the `ApiPath` struct. Public `ApiClient` +methods accept `impl Into>`, which pairs a resolved request +path with a bounded-cardinality metric template. For static endpoints +(no dynamic segments), callers pass a plain `&str` — the `From<&str>` impl +uses the same string for both the request path and the metric label. For +dynamic endpoints, callers use `ApiPath::templated(&path, "/template/{id}")` +to supply both. `ApiClient::record_request` then passes the template to +`HttpMetricsObserver::observe` as the `uri` label. + +The template string is always the API-relative resource path (e.g. `/workflow/{workflowId}`), never the fully-qualified URL or the base-path- prefixed path. This means: From df3881789b08a3e6d67a6b1516d5dc32a134b27c Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 13:52:22 -0600 Subject: [PATCH 12/15] use a None for a noop metrics and don't need to swap metrics implementation out, which reduces weird rust complexities related to basic stuff like using variables. --- src/http/api_client.rs | 42 ++++++++++------------ src/http/metrics.rs | 15 -------- src/http/mod.rs | 2 +- src/worker/task_handler.rs | 25 ++++++++------ src/worker/task_runner.rs | 71 +++++++++++++++++++++++++++++++------- 5 files changed, 92 insertions(+), 63 deletions(-) diff --git a/src/http/api_client.rs b/src/http/api_client.rs index 9a0c8d1..f2ba8fb 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -10,7 +10,7 @@ use tracing::{debug, error, info, warn}; use crate::configuration::Configuration; use crate::error::{ConductorError, Result}; -use crate::http::metrics::{HttpMetricsObserver, NoopHttpMetricsObserver}; +use crate::http::metrics::HttpMetricsObserver; /// Token response from authentication endpoint #[derive(Debug, serde::Deserialize)] @@ -93,12 +93,10 @@ pub struct ApiClient { token_refresh_lock: Arc>, /// Cached result of OSS detection (None = not yet probed) is_oss: Arc>>, - /// Shared HTTP metrics observer. Starts as a no-op; can be replaced at - /// runtime via [`ApiClient::set_http_metrics`] so that a - /// `MetricsCollector` built later (e.g. inside `TaskHandler::enable_metrics`) - /// can observe requests made by clients already vended from this - /// `ApiClient` (since clones share the same inner `Arc`). - http_metrics: Arc>>, + /// HTTP metrics observer, set once at construction time. `None` when + /// the client is created standalone (without a `TaskHandler`). + /// `Some` when created via [`ApiClient::with_http_observer`]. + http_metrics: Option>, } impl ApiClient { @@ -124,7 +122,7 @@ impl ApiClient { last_refresh_attempt: Arc::new(RwLock::new(None)), token_refresh_lock: Arc::new(Mutex::new(())), is_oss: Arc::new(RwLock::new(None)), - http_metrics: Arc::new(parking_lot::RwLock::new(NoopHttpMetricsObserver::arc())), + http_metrics: None, }) } @@ -133,21 +131,16 @@ impl ApiClient { &self.base_url } - /// Install an [`HttpMetricsObserver`] that will be invoked after every - /// request completes. Replaces the previously-installed observer. + /// Create an API client with an HTTP metrics observer pre-installed. /// - /// This swap is visible to every clone of this `ApiClient` (they share the - /// same inner `Arc>`), so metrics can be enabled after service - /// clients have already been vended. - pub fn set_http_metrics(&self, observer: Arc) { - *self.http_metrics.write() = observer; - } - - /// Snapshot the current observer. Internal helper so we drop the read - /// lock before the request hot-path actually calls `observe`. - #[inline] - fn http_metrics(&self) -> Arc { - Arc::clone(&self.http_metrics.read()) + /// All clones of this client share the same observer via `Arc`. + pub fn with_http_observer( + config: Configuration, + observer: Arc, + ) -> Result { + let mut client = Self::new(config)?; + client.http_metrics = Some(observer); + Ok(client) } /// Unified post-request bookkeeping: tracing log + observer callback. @@ -173,8 +166,9 @@ impl ApiClient { duration_ms = %duration.as_millis(), "API request completed" ); - self.http_metrics() - .observe(method, metric_uri, status_str, duration); + if let Some(obs) = &self.http_metrics { + obs.observe(method, metric_uri, status_str, duration); + } } /// Convenience: call [`record_request`](Self::record_request) with a diff --git a/src/http/metrics.rs b/src/http/metrics.rs index eb11db5..a506ccf 100644 --- a/src/http/metrics.rs +++ b/src/http/metrics.rs @@ -14,7 +14,6 @@ //! rather than the interpolated request path. The server base-path prefix //! (e.g. `/api`) is not included. -use std::sync::Arc; use std::time::Duration; /// Observer invoked by [`ApiClient`](super::ApiClient) after every request @@ -35,17 +34,3 @@ pub trait HttpMetricsObserver: Send + Sync { /// - `duration`: wall-clock time from send to response-received (or error). fn observe(&self, method: &str, uri: &str, status: &str, duration: Duration); } - -/// No-op observer installed by default. -pub struct NoopHttpMetricsObserver; - -impl HttpMetricsObserver for NoopHttpMetricsObserver { - fn observe(&self, _method: &str, _uri: &str, _status: &str, _duration: Duration) {} -} - -impl NoopHttpMetricsObserver { - /// Return a shared no-op observer instance. - pub fn arc() -> Arc { - Arc::new(Self) - } -} diff --git a/src/http/mod.rs b/src/http/mod.rs index 6ad5180..8a4f82f 100644 --- a/src/http/mod.rs +++ b/src/http/mod.rs @@ -5,4 +5,4 @@ mod api_client; mod metrics; pub use api_client::{ApiClient, ApiPath}; -pub use metrics::{HttpMetricsObserver, NoopHttpMetricsObserver}; +pub use metrics::HttpMetricsObserver; diff --git a/src/worker/task_handler.rs b/src/worker/task_handler.rs index 026ff5b..1c7617a 100644 --- a/src/worker/task_handler.rs +++ b/src/worker/task_handler.rs @@ -66,21 +66,26 @@ impl TaskHandler { self.event_dispatcher.register(listener); } - /// Enable metrics collection. + /// Enable metrics collection and (optionally) the HTTP scrape endpoint. /// - /// Registers a [`MetricsCollector`] both as a task-runner event listener - /// (to populate task/workflow metrics) *and* as the - /// [`HttpMetricsObserver`](crate::http::HttpMetricsObserver) for the - /// underlying [`ApiClient`] (to populate - /// `http_api_client_request_seconds`). The observer swap is visible to - /// every `ApiClient` clone vended from this handler, including those - /// returned by [`conductor_client`](Self::conductor_client). + /// Creates a [`MetricsCollector`] with the given settings and wires it + /// as both the [`TaskRunnerEventsListener`] (for task/workflow counters + /// and histograms) and the + /// [`HttpMetricsObserver`](crate::http::HttpMetricsObserver) (for + /// `http_api_client_request_seconds`). + /// + /// Must be called **before** [`start`](Self::start). Clients vended + /// after this call (via [`conductor_client`](Self::conductor_client), + /// [`task_client`](Self::task_client), etc.) will share the observer. pub fn enable_metrics(&mut self, settings: MetricsSettings) { let collector = Arc::new(MetricsCollector::new(settings)); self.event_dispatcher .register(collector.clone() as Arc); - self.api_client - .set_http_metrics(collector.clone() as Arc); + self.api_client = ApiClient::with_http_observer( + self.config.clone(), + collector.clone() as Arc, + ) + .expect("ApiClient creation should not fail on previously valid config"); self.metrics_collector = Some(collector); } diff --git a/src/worker/task_runner.rs b/src/worker/task_runner.rs index f1a5527..a3e3c66 100644 --- a/src/worker/task_runner.rs +++ b/src/worker/task_runner.rs @@ -23,6 +23,17 @@ use crate::models::Task; use super::{Worker, WorkerOutput}; +/// Result of a task execution attempt, including panics. +/// +/// Returned by [`TaskRunner::execute_catching_panic`] so the caller can +/// handle success, regular errors, and panics without accessing any +/// state that was inside the `AssertUnwindSafe` boundary. +enum TaskOutcome { + Ok, + Err(crate::error::ConductorError), + Panic(String), +} + /// Task runner for a single worker type pub struct TaskRunner { worker: Arc, @@ -340,38 +351,36 @@ impl TaskRunner { active_task_count.fetch_add(1, Ordering::SeqCst); running_tasks.lock().insert(task_id.clone()); - // Catch panics escaping the worker so we can publish - // `ThreadUncaughtException` and still clean up tracking state. - // Tokio's default panic behavior is to unwind the task; we need - // observability before the unwind reaches the reactor. - let outcome = AssertUnwindSafe(Self::execute_and_update_task( - &worker, + // `worker`, `config`, and `task` are moved into the panic-catching + // boundary and cannot be accessed in the cleanup code below. + let outcome = Self::execute_catching_panic( + worker, &task_client, &event_dispatcher, - &config, + config, task, - )) - .catch_unwind() + ) .await; - // Cleanup: remove from tracking + // Cleanup: only atomics, locks, and event_dispatcher are accessible running_tasks.lock().remove(&task_id); active_task_count.fetch_sub(1, Ordering::SeqCst); spawned_task_count.fetch_sub(1, Ordering::SeqCst); match outcome { - Ok(Ok(())) => {} - Ok(Err(e)) => { + TaskOutcome::Ok => {} + TaskOutcome::Err(e) => { error!( task_id = %task_id, error = %e, "Task execution failed" ); } - Err(_panic_payload) => { + TaskOutcome::Panic(panic_msg) => { error!( task_id = %task_id, task_type = %task_type, + panic_message = %panic_msg, "Uncaught panic in worker task" ); event_dispatcher.publish_thread_uncaught_exception( @@ -382,6 +391,42 @@ impl TaskRunner { }); } + /// Execute a task inside a panic-catching boundary. + /// + /// `worker`, `config`, and `task` are consumed so that the caller + /// cannot access them after a potential panic — only the returned + /// [`TaskOutcome`] carries the information needed for logging and + /// event publishing. + async fn execute_catching_panic( + worker: Arc, + task_client: &TaskClient, + event_dispatcher: &EventDispatcher, + config: Arc, + task: Task, + ) -> TaskOutcome { + match AssertUnwindSafe(Self::execute_and_update_task( + &worker, + task_client, + event_dispatcher, + &config, + task, + )) + .catch_unwind() + .await + { + Ok(Ok(())) => TaskOutcome::Ok, + Ok(Err(e)) => TaskOutcome::Err(e), + Err(panic_payload) => { + let msg = panic_payload + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| panic_payload.downcast_ref::<&str>().copied()) + .unwrap_or(""); + TaskOutcome::Panic(msg.to_string()) + } + } + } + /// Execute a task and update the result /// /// Takes ownership of the Task to wrap it in Arc, avoiding clones From 1522abe75724d0a61bb1427dc118b29aeac3e45d Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 13:58:59 -0600 Subject: [PATCH 13/15] run cargo fmt --- src/worker/task_runner.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/worker/task_runner.rs b/src/worker/task_runner.rs index a3e3c66..d3157da 100644 --- a/src/worker/task_runner.rs +++ b/src/worker/task_runner.rs @@ -353,14 +353,9 @@ impl TaskRunner { // `worker`, `config`, and `task` are moved into the panic-catching // boundary and cannot be accessed in the cleanup code below. - let outcome = Self::execute_catching_panic( - worker, - &task_client, - &event_dispatcher, - config, - task, - ) - .await; + let outcome = + Self::execute_catching_panic(worker, &task_client, &event_dispatcher, config, task) + .await; // Cleanup: only atomics, locks, and event_dispatcher are accessible running_tasks.lock().remove(&task_id); From 31395f1ed1c617bc664c397d5c6a347187b11b85 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 14:07:53 -0600 Subject: [PATCH 14/15] bugfix for active workers decrement from panic --- src/metrics/collector.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/metrics/collector.rs b/src/metrics/collector.rs index 93f7a17..3d09648 100644 --- a/src/metrics/collector.rs +++ b/src/metrics/collector.rs @@ -587,6 +587,12 @@ impl TaskRunnerEventsListener for MetricsCollector { self.thread_uncaught_exceptions_total .with_label_values(&[&event.exception]) .inc(); + + // A panic bypasses the normal TaskExecutionCompleted / TaskExecutionFailure + // path, so we must decrement here to keep the gauge accurate. Safe if the + // task_type was never started (get_mut returns None) or if the count is + // already 0 (clamped by max(0) inside decrement_active). + self.decrement_active(&event.task_type); } fn on_workflow_started(&self, event: &WorkflowStarted) { From 0632ee3e50841e81a7b42f65d770fd7bbf1fd9b7 Mon Sep 17 00:00:00 2001 From: Chris Hagglund Date: Mon, 18 May 2026 15:19:18 -0600 Subject: [PATCH 15/15] make linter happier by streamlining how http observer is set on api client --- src/http/api_client.rs | 18 ++++++------------ src/worker/task_handler.rs | 7 ++----- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/http/api_client.rs b/src/http/api_client.rs index f2ba8fb..1a02f8b 100644 --- a/src/http/api_client.rs +++ b/src/http/api_client.rs @@ -93,9 +93,8 @@ pub struct ApiClient { token_refresh_lock: Arc>, /// Cached result of OSS detection (None = not yet probed) is_oss: Arc>>, - /// HTTP metrics observer, set once at construction time. `None` when - /// the client is created standalone (without a `TaskHandler`). - /// `Some` when created via [`ApiClient::with_http_observer`]. + /// HTTP metrics observer. `None` when the client is created standalone + /// (without a `TaskHandler`). `Some` after [`set_http_observer`](Self::set_http_observer). http_metrics: Option>, } @@ -131,16 +130,11 @@ impl ApiClient { &self.base_url } - /// Create an API client with an HTTP metrics observer pre-installed. + /// Install an HTTP metrics observer on an existing client. /// - /// All clones of this client share the same observer via `Arc`. - pub fn with_http_observer( - config: Configuration, - observer: Arc, - ) -> Result { - let mut client = Self::new(config)?; - client.http_metrics = Some(observer); - Ok(client) + /// All subsequent clones share the same observer via `Arc`. + pub fn set_http_observer(&mut self, observer: Arc) { + self.http_metrics = Some(observer); } /// Unified post-request bookkeeping: tracing log + observer callback. diff --git a/src/worker/task_handler.rs b/src/worker/task_handler.rs index 1c7617a..0062c8f 100644 --- a/src/worker/task_handler.rs +++ b/src/worker/task_handler.rs @@ -81,11 +81,8 @@ impl TaskHandler { let collector = Arc::new(MetricsCollector::new(settings)); self.event_dispatcher .register(collector.clone() as Arc); - self.api_client = ApiClient::with_http_observer( - self.config.clone(), - collector.clone() as Arc, - ) - .expect("ApiClient creation should not fail on previously valid config"); + self.api_client + .set_http_observer(collector.clone() as Arc); self.metrics_collector = Some(collector); }