diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index 2609cb06..376c684f 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -34,6 +34,7 @@ use openai_api::{ }; use openai_api::{CreateChatCompletionRequest, CreateChatCompletionStreamRequest}; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::Deserialize; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -46,9 +47,10 @@ use super::metrics::{ CHAT_COMPLETIONS_INPUT_TOKENS, CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, CHAT_COMPLETIONS_LATENCY_METRICS, CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, CHAT_COMPLETIONS_TOTAL_TOKENS_PER_USER, CHAT_COMPLETION_REQUESTS_PER_USER, - INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, + TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, TOTAL_TOO_MANY_REQUESTS, + TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, }; use super::request_model::{ComputeUnitsEstimate, RequestModel}; use super::{ @@ -78,6 +80,12 @@ pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; /// The messages field in the request payload. const MESSAGES: &str = "messages"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + #[derive(OpenApi)] #[openapi( paths(chat_completions_create, chat_completions_create_stream), @@ -175,12 +183,32 @@ pub async fn chat_completions_create( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - TOTAL_FAILED_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, diff --git a/atoma-proxy/src/server/handlers/completions.rs b/atoma-proxy/src/server/handlers/completions.rs index 9f28644e..9f2d4078 100644 --- a/atoma-proxy/src/server/handlers/completions.rs +++ b/atoma-proxy/src/server/handlers/completions.rs @@ -23,6 +23,7 @@ use openai_api_completions::{ Usage, }; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::Deserialize; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -31,11 +32,14 @@ use utoipa::OpenApi; use super::metrics::{ CHAT_COMPLETIONS_COMPLETIONS_TOKENS, CHAT_COMPLETIONS_COMPLETIONS_TOKENS_PER_USER, - CHAT_COMPLETIONS_INPUT_TOKENS, CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, - CHAT_COMPLETIONS_LATENCY_METRICS, CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, + CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS, CHAT_COMPLETIONS_INPUT_TOKENS, + CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, CHAT_COMPLETIONS_LATENCY_METRICS, + CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, CHAT_COMPLETIONS_TOTAL_TOKENS_PER_USER, CHAT_COMPLETION_REQUESTS_PER_USER, - INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, }; use super::request_model::{ComputeUnitsEstimate, RequestModel}; @@ -57,6 +61,12 @@ pub const CONFIDENTIAL_COMPLETIONS_PATH: &str = "/v1/confidential/completions"; /// The key for the prompt in the request. const PROMPT: &str = "prompt"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + /// The OpenAPI schema for the completions endpoint. #[derive(OpenApi)] #[openapi( @@ -133,12 +143,33 @@ pub async fn completions_create( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - TOTAL_FAILED_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } + if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, @@ -381,19 +412,38 @@ pub async fn confidential_completions_create( Ok(response) => { if !is_streaming { // The streaming metric is recorded in the streamer (final chunk) - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, metadata.model_name)]); } Ok(response) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_CHAT_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, @@ -670,7 +720,7 @@ async fn handle_non_streaming_response( /// * `node_address` - The address of the node /// * `user_id` - The user id /// * `headers` - The headers of the request -/// * `payload` - The payload of the request +/// * `payload` - The payload of the request /// * `num_input_tokens` - The number of input tokens /// * `estimated_output_tokens` - The estimated output tokens /// * `price_per_million` - The price per million @@ -687,7 +737,7 @@ async fn handle_non_streaming_response( /// * `serde_json::Error` - If the request fails /// * `flume::Error` - If the request fails /// * `tokio::Error` - If the request fails -/// +/// #[instrument( level = "info", skip_all, diff --git a/atoma-proxy/src/server/handlers/embeddings.rs b/atoma-proxy/src/server/handlers/embeddings.rs index fbc1db86..2e2eba48 100644 --- a/atoma-proxy/src/server/handlers/embeddings.rs +++ b/atoma-proxy/src/server/handlers/embeddings.rs @@ -9,6 +9,7 @@ use axum::{ Extension, Json, }; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -28,8 +29,10 @@ use super::{ handle_status_code_error, metrics::{ EMBEDDING_TOTAL_TOKENS_PER_USER, SUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER, - TEXT_EMBEDDINGS_LATENCY_METRICS, TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_REQUESTS, TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, + TEXT_EMBEDDINGS_LATENCY_METRICS, TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, TOTAL_LOCKED_REQUESTS, + TOTAL_TOO_EARLY_REQUESTS, TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER, }, request_model::{ComputeUnitsEstimate, RequestModel}, @@ -52,6 +55,12 @@ pub const EMBEDDINGS_PATH: &str = "/v1/embeddings"; /// The input field in the request payload. const INPUT: &str = "input"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + // A model representing an embeddings request payload. /// /// This struct encapsulates the necessary fields for processing an embeddings request @@ -224,11 +233,32 @@ pub async fn embeddings_create( Ok(Json(response).into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { update_state_manager( @@ -374,11 +404,32 @@ pub async fn confidential_embeddings_create( Ok(Json(response).into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { diff --git a/atoma-proxy/src/server/handlers/image_generations.rs b/atoma-proxy/src/server/handlers/image_generations.rs index 31dbbb80..78061d74 100644 --- a/atoma-proxy/src/server/handlers/image_generations.rs +++ b/atoma-proxy/src/server/handlers/image_generations.rs @@ -6,6 +6,7 @@ use axum::response::{IntoResponse, Response}; use axum::Extension; use axum::{extract::State, http::HeaderMap, Json}; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -19,8 +20,10 @@ use crate::server::{http_server::ProxyState, middleware::RequestMetadataExtensio use super::metrics::{ IMAGE_GENERATION_TOTAL_TOKENS_PER_USER, IMAGE_GEN_LATENCY_METRICS, IMAGE_GEN_NUM_REQUESTS, - SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_REQUESTS, + SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, TOTAL_BAD_REQUESTS, TOTAL_COMPLETED_REQUESTS, + TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, }; use super::request_model::ComputeUnitsEstimate; @@ -47,6 +50,12 @@ const N: &str = "n"; /// The size field in the request payload. const SIZE: &str = "size"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + /// A model representing the parameters for an image generation request. /// /// This struct encapsulates the required parameters for generating images through @@ -217,12 +226,32 @@ pub async fn image_generations_create( } Err(e) => { // Record the failed request in the image generations num requests metric - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model_label.clone())]); + let model: String = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_IMAGE_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); + UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { @@ -327,20 +356,38 @@ pub async fn confidential_image_generations_create( Ok(response) => { // NOTE: At this point, we do not need to update the stack num compute units, // because the image generation response was correctly generated by a TEE node. - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, metadata.model_name)]); SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); Ok(response.into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model_label.clone())]); - - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model: String = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { update_state_manager( diff --git a/atoma-proxy/src/server/handlers/metrics.rs b/atoma-proxy/src/server/handlers/metrics.rs index 09bfb38a..66014dfc 100644 --- a/atoma-proxy/src/server/handlers/metrics.rs +++ b/atoma-proxy/src/server/handlers/metrics.rs @@ -316,6 +316,112 @@ pub static TOTAL_FAILED_CHAT_REQUESTS: LazyLock> = LazyLock::new(|| .build() }); +/// Counter metric that tracks the total number of too many requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_many_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_MANY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_many_requests") + .with_description("Total number of too many requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of bad requests. +/// +/// # Metric Details +/// - Name: `atoma_total_bad_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_BAD_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_bad_requests") + .with_description("Total number of bad requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of locked requests. +/// +/// # Metric Details +/// - Name: `atoma_total_locked_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_LOCKED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_locked_requests") + .with_description("Total number of locked requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of too early requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_early_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_EARLY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_early_requests") + .with_description("Total number of too early requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of unauthorized requests. +/// +/// # Metric Details +/// - Name: `atoma_total_unauthorized_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_UNAUTHORIZED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_unauthorized_requests") + .with_description("Total number of unauthorized requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of confidential chat requests. +/// +/// # Metric Details +/// - Name: `atoma_total_confidential_chat_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_confidential_chat_requests") + .with_description("Total number of confidential chat requests") + .with_unit("requests") + .build() + }); + +/// Counter metric that tracks the total number of failed confidential chat requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_confidential_chat_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_confidential_chat_requests") + .with_description("Total number of failed confidential chat requests") + .with_unit("requests") + .build() +}); + /// Counter metric that tracks the total number of failed image generation requests. /// /// # Metric Details @@ -331,6 +437,22 @@ pub static TOTAL_FAILED_IMAGE_GENERATION_REQUESTS: LazyLock> = Lazy .build() }); +/// Counter metric that tracks the total number of failed confidential image generation requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_confidential_image_generation_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_confidential_image_generation_requests") + .with_description("Total number of failed confidential image generation requests") + .with_unit("requests") + .build() + }); + /// Counter metric that tracks the total number of failed text embedding requests. /// /// # Metric Details @@ -486,6 +608,22 @@ pub static UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER: LazyLock> .build() }); +/// Counter metric that tracks the total number of failed text embedding confidential requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_text_embedding_confidential_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_text_embedding_confidential_requests") + .with_description("Total number of failed text embedding confidential requests") + .with_unit("requests") + .build() + }); + /// Counter metric that tracks the total number of chat completion tokens per user. /// /// # Metric Details