Skip to content

Commit 0e67ce9

Browse files
chore: properly increment failed requests metric (#12)
* chore: systematize counter metrics * chore: properly increment err in centralized insecure crs gen
1 parent 9b8f6e1 commit 0e67ce9

File tree

14 files changed

+601
-518
lines changed

14 files changed

+601
-518
lines changed

core/service/src/engine/centralized/central_kms.rs

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -478,20 +478,13 @@ pub fn central_public_decrypt<
478478
// run the decryption of each ct in the batch in parallel
479479
cts.par_iter()
480480
.map(|ct| {
481-
let inner_timer = metrics::METRICS
481+
let mut inner_timer = metrics::METRICS
482482
.time_operation(OP_PUBLIC_DECRYPT_INNER)
483-
.map_err(|e| tracing::warn!("Failed to create metric: {}", e))
484-
.and_then(|b| {
485-
b.tags(metric_tags.clone()).map_err(|e| {
486-
tracing::warn!("Failed to a tag in party_id, key_id or request_id : {}", e)
487-
})
488-
})
489-
.map(|b| b.start())
490-
.map_err(|e| tracing::warn!("Failed to start timer: {:?}", e))
491-
.ok();
483+
.tags(metric_tags.clone())
484+
.start();
492485
let fhe_type = ct.fhe_type()?;
493486
let fhe_type_string = ct.fhe_type_string();
494-
inner_timer.map(|mut b| b.tag(TAG_TFHE_TYPE, fhe_type_string));
487+
inner_timer.tag(TAG_TFHE_TYPE, fhe_type_string);
495488
RealCentralizedKms::<PubS, PrivS>::public_decrypt(
496489
keys,
497490
&ct.ciphertext,
@@ -530,21 +523,14 @@ pub async fn async_user_decrypt<
530523

531524
let mut all_signcrypted_cts = vec![];
532525
for typed_ciphertext in typed_ciphertexts {
533-
let inner_timer = metrics::METRICS
526+
let mut inner_timer = metrics::METRICS
534527
.time_operation(OP_USER_DECRYPT_INNER)
535-
.map_err(|e| tracing::warn!("Failed to create metric: {}", e))
536-
.and_then(|b| {
537-
b.tags(metric_tags.clone()).map_err(|e| {
538-
tracing::warn!("Failed to a tag in party_id, key_id or request_id : {}", e)
539-
})
540-
})
541-
.map(|b| b.start())
542-
.map_err(|e| tracing::warn!("Failed to start timer: {:?}", e))
543-
.ok();
528+
.tags(metric_tags.clone())
529+
.start();
544530
let high_level_ct = &typed_ciphertext.ciphertext;
545531
let fhe_type = typed_ciphertext.fhe_type()?;
546532
let fhe_type_string = typed_ciphertext.fhe_type_string();
547-
inner_timer.map(|mut b| b.tag(TAG_TFHE_TYPE, fhe_type_string));
533+
inner_timer.tag(TAG_TFHE_TYPE, fhe_type_string);
548534
let ct_format = typed_ciphertext.ciphertext_format();
549535
let external_handle = typed_ciphertext.external_handle.clone();
550536
let signcrypted_ciphertext = RealCentralizedKms::<PubS, PrivS>::user_decrypt(

core/service/src/engine/centralized/endpoint.rs

Lines changed: 120 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ use crate::engine::centralized::service::{
1414
get_public_decryption_result_impl, get_user_decryption_result_impl, public_decrypt_impl,
1515
user_decrypt_impl,
1616
};
17+
use observability::{
18+
metrics::METRICS,
19+
metrics_names::{
20+
map_tonic_code_to_metric_tag, ERR_INVALID_REQUEST, OP_CRS_GEN_REQUEST, OP_CRS_GEN_RESULT,
21+
OP_CUSTODIAN_CONTEXT_RESTORE, OP_DESTROY_CUSTODIAN_CONTEXT, OP_DESTROY_KMS_CONTEXT,
22+
OP_FETCH_PK, OP_INIT, OP_KEYGEN_PREPROC_REQUEST, OP_KEYGEN_PREPROC_RESULT,
23+
OP_KEYGEN_REQUEST, OP_KEYGEN_RESULT, OP_NEW_CUSTODIAN_CONTEXT, OP_NEW_KMS_CONTEXT,
24+
OP_PUBLIC_DECRYPT_REQUEST, OP_PUBLIC_DECRYPT_RESULT, OP_USER_DECRYPT_REQUEST,
25+
OP_USER_DECRYPT_RESULT,
26+
},
27+
};
1728

1829
#[tonic::async_trait]
1930
impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'static>
@@ -30,6 +41,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
3041
/// * Pre-condition: -
3142
/// * Post-condition: -
3243
async fn init(&self, _request: Request<InitRequest>) -> Result<Response<Empty>, Status> {
44+
METRICS.increment_request_counter(OP_INIT);
45+
METRICS.increment_error_counter(OP_INIT, ERR_INVALID_REQUEST);
3346
tonic_some_or_err(
3447
None,
3548
"Requesting init on centralized kms is not suported".to_string(),
@@ -51,6 +64,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
5164
&self,
5265
_request: Request<KeyGenPreprocRequest>,
5366
) -> Result<Response<Empty>, Status> {
67+
METRICS.increment_request_counter(OP_KEYGEN_PREPROC_REQUEST);
68+
METRICS.increment_error_counter(OP_KEYGEN_PREPROC_REQUEST, ERR_INVALID_REQUEST);
5469
tonic_some_or_err(
5570
None,
5671
"Requesting preproc on centralized kms is not suported".to_string(),
@@ -71,6 +86,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
7186
&self,
7287
_request: Request<v1::RequestId>,
7388
) -> Result<Response<KeyGenPreprocResult>, Status> {
89+
METRICS.increment_request_counter(OP_KEYGEN_PREPROC_RESULT);
90+
METRICS.increment_error_counter(OP_KEYGEN_PREPROC_RESULT, ERR_INVALID_REQUEST);
7491
tonic_some_or_err(
7592
None,
7693
"Requesting preproc status on centralized kms is not suported".to_string(),
@@ -109,7 +126,14 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
109126
&self,
110127
request: Request<kms_grpc::kms::v1::KeyGenRequest>,
111128
) -> Result<Response<Empty>, Status> {
112-
self.key_gen(request).await
129+
METRICS.increment_request_counter(observability::metrics_names::OP_INSECURE_KEYGEN_REQUEST);
130+
self.key_gen(request).await.inspect_err(|err| {
131+
let tag = map_tonic_code_to_metric_tag(err.code());
132+
let _ = METRICS.increment_error_counter(
133+
observability::metrics_names::OP_INSECURE_KEYGEN_REQUEST,
134+
tag,
135+
);
136+
})
113137
}
114138

115139
/// WARNING: This method is insecure and should not be used in production.
@@ -142,7 +166,14 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
142166
&self,
143167
request: Request<v1::RequestId>,
144168
) -> Result<Response<kms_grpc::kms::v1::KeyGenResult>, Status> {
145-
self.get_key_gen_result(request).await
169+
METRICS.increment_request_counter(observability::metrics_names::OP_INSECURE_KEYGEN_RESULT);
170+
self.get_key_gen_result(request).await.inspect_err(|err| {
171+
let tag = map_tonic_code_to_metric_tag(err.code());
172+
let _ = METRICS.increment_error_counter(
173+
observability::metrics_names::OP_INSECURE_KEYGEN_RESULT,
174+
tag,
175+
);
176+
})
146177
}
147178

148179
/// Computes key generation.
@@ -173,7 +204,11 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
173204
&self,
174205
request: Request<kms_grpc::kms::v1::KeyGenRequest>,
175206
) -> Result<Response<Empty>, Status> {
176-
key_gen_impl(self, request).await
207+
METRICS.increment_request_counter(OP_KEYGEN_REQUEST);
208+
key_gen_impl(self, request).await.inspect_err(|err| {
209+
let tag = map_tonic_code_to_metric_tag(err.code());
210+
let _ = METRICS.increment_error_counter(OP_KEYGEN_REQUEST, tag);
211+
})
177212
}
178213

179214
/// Retrieves the result from a key generation.
@@ -205,7 +240,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
205240
&self,
206241
request: Request<v1::RequestId>,
207242
) -> Result<Response<kms_grpc::kms::v1::KeyGenResult>, Status> {
208-
get_key_gen_result_impl(self, request).await
243+
METRICS.increment_request_counter(OP_KEYGEN_RESULT);
244+
get_key_gen_result_impl(self, request)
245+
.await
246+
.inspect_err(|err| {
247+
let tag = map_tonic_code_to_metric_tag(err.code());
248+
let _ = METRICS.increment_error_counter(OP_KEYGEN_RESULT, tag);
249+
})
209250
}
210251

211252
/// Computes a user decryption. That is, it decrypts a ciphertext and encrypts the result under a user's public key.
@@ -236,7 +277,11 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
236277
&self,
237278
request: Request<kms_grpc::kms::v1::UserDecryptionRequest>,
238279
) -> Result<Response<Empty>, Status> {
239-
user_decrypt_impl(self, request).await
280+
METRICS.increment_request_counter(OP_USER_DECRYPT_REQUEST);
281+
user_decrypt_impl(self, request).await.inspect_err(|err| {
282+
let tag = map_tonic_code_to_metric_tag(err.code());
283+
let _ = METRICS.increment_error_counter(OP_USER_DECRYPT_REQUEST, tag);
284+
})
240285
}
241286

242287
/// Retrieves the result from a user decryption.
@@ -267,7 +312,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
267312
&self,
268313
request: Request<v1::RequestId>,
269314
) -> Result<Response<kms_grpc::kms::v1::UserDecryptionResponse>, Status> {
270-
get_user_decryption_result_impl(self, request).await
315+
METRICS.increment_request_counter(OP_USER_DECRYPT_RESULT);
316+
get_user_decryption_result_impl(self, request)
317+
.await
318+
.inspect_err(|err| {
319+
let tag = map_tonic_code_to_metric_tag(err.code());
320+
let _ = METRICS.increment_error_counter(OP_USER_DECRYPT_RESULT, tag);
321+
})
271322
}
272323

273324
/// Computes a public decryption. That is, it decrypts a ciphertext and returns the plaintext.
@@ -293,7 +344,11 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
293344
&self,
294345
request: Request<kms_grpc::kms::v1::PublicDecryptionRequest>,
295346
) -> Result<Response<Empty>, Status> {
296-
public_decrypt_impl(self, request).await
347+
METRICS.increment_request_counter(OP_PUBLIC_DECRYPT_REQUEST);
348+
public_decrypt_impl(self, request).await.inspect_err(|err| {
349+
let tag = map_tonic_code_to_metric_tag(err.code());
350+
let _ = METRICS.increment_error_counter(OP_PUBLIC_DECRYPT_REQUEST, tag);
351+
})
297352
}
298353

299354
/// Retrieves the result from a public decryption.
@@ -322,7 +377,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
322377
&self,
323378
request: Request<v1::RequestId>,
324379
) -> Result<Response<kms_grpc::kms::v1::PublicDecryptionResponse>, Status> {
325-
get_public_decryption_result_impl(self, request).await
380+
METRICS.increment_request_counter(OP_PUBLIC_DECRYPT_RESULT);
381+
get_public_decryption_result_impl(self, request)
382+
.await
383+
.inspect_err(|err| {
384+
let tag = map_tonic_code_to_metric_tag(err.code());
385+
let _ = METRICS.increment_error_counter(OP_PUBLIC_DECRYPT_RESULT, tag);
386+
})
326387
}
327388

328389
/// Computes a CRS generation. That is, it generates a common reference string (CRS) for the KMS.
@@ -350,7 +411,11 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
350411
&self,
351412
request: Request<kms_grpc::kms::v1::CrsGenRequest>,
352413
) -> Result<Response<Empty>, Status> {
353-
crs_gen_impl(self, request).await
414+
METRICS.increment_request_counter(OP_CRS_GEN_REQUEST);
415+
crs_gen_impl(self, request).await.inspect_err(|err| {
416+
let tag = map_tonic_code_to_metric_tag(err.code());
417+
let _ = METRICS.increment_error_counter(OP_CRS_GEN_REQUEST, tag);
418+
})
354419
}
355420

356421
/// Retrieves the result from a CRS generation.
@@ -381,7 +446,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
381446
&self,
382447
request: Request<v1::RequestId>,
383448
) -> Result<Response<kms_grpc::kms::v1::CrsGenResult>, Status> {
384-
get_crs_gen_result_impl(self, request).await
449+
METRICS.increment_request_counter(OP_CRS_GEN_RESULT);
450+
get_crs_gen_result_impl(self, request)
451+
.await
452+
.inspect_err(|err| {
453+
let tag = map_tonic_code_to_metric_tag(err.code());
454+
let _ = METRICS.increment_error_counter(OP_CRS_GEN_RESULT, tag);
455+
})
385456
}
386457

387458
/// WARNING: This method is by definition expected to be insecure and should not be used in production.
@@ -413,7 +484,15 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
413484
&self,
414485
request: Request<kms_grpc::kms::v1::CrsGenRequest>,
415486
) -> Result<Response<Empty>, Status> {
416-
self.crs_gen(request).await
487+
METRICS
488+
.increment_request_counter(observability::metrics_names::OP_INSECURE_CRS_GEN_REQUEST);
489+
self.crs_gen(request).await.inspect_err(|err| {
490+
let tag = map_tonic_code_to_metric_tag(err.code());
491+
let _ = METRICS.increment_error_counter(
492+
observability::metrics_names::OP_INSECURE_CRS_GEN_REQUEST,
493+
tag,
494+
);
495+
})
417496
}
418497

419498
/// WARNING: This method is by definition expected to be insecure and should not be used in production.
@@ -447,7 +526,14 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
447526
&self,
448527
request: Request<v1::RequestId>,
449528
) -> Result<Response<kms_grpc::kms::v1::CrsGenResult>, Status> {
450-
self.get_crs_gen_result(request).await
529+
METRICS.increment_request_counter(observability::metrics_names::OP_INSECURE_CRS_GEN_RESULT);
530+
self.get_crs_gen_result(request).await.inspect_err(|err| {
531+
let tag = map_tonic_code_to_metric_tag(err.code());
532+
let _ = METRICS.increment_error_counter(
533+
observability::metrics_names::OP_INSECURE_CRS_GEN_RESULT,
534+
tag,
535+
);
536+
})
451537
}
452538

453539
/// WARNING: This method is not implemented yet and will always return an error.
@@ -466,7 +552,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
466552
&self,
467553
request: Request<kms_grpc::kms::v1::NewKmsContextRequest>,
468554
) -> Result<Response<kms_grpc::kms::v1::Empty>, Status> {
469-
new_kms_context_impl(&self.crypto_storage, request).await
555+
METRICS.increment_request_counter(OP_NEW_KMS_CONTEXT);
556+
new_kms_context_impl(&self.crypto_storage, request)
557+
.await
558+
.inspect_err(|err| {
559+
let tag = map_tonic_code_to_metric_tag(err.code());
560+
let _ = METRICS.increment_error_counter(OP_NEW_KMS_CONTEXT, tag);
561+
})
470562
}
471563

472564
/// WARNING: This method is not implemented yet and will always return an error.
@@ -485,7 +577,13 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
485577
&self,
486578
request: Request<kms_grpc::kms::v1::DestroyKmsContextRequest>,
487579
) -> Result<Response<Empty>, Status> {
488-
delete_kms_context_impl(&self.crypto_storage, request).await
580+
METRICS.increment_request_counter(OP_DESTROY_KMS_CONTEXT);
581+
delete_kms_context_impl(&self.crypto_storage, request)
582+
.await
583+
.inspect_err(|err| {
584+
let tag = map_tonic_code_to_metric_tag(err.code());
585+
let _ = METRICS.increment_error_counter(OP_DESTROY_KMS_CONTEXT, tag);
586+
})
489587
}
490588

491589
/// WARNING: This method is not implemented yet and will always return an error.
@@ -504,6 +602,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
504602
&self,
505603
_request: Request<kms_grpc::kms::v1::NewCustodianContextRequest>,
506604
) -> Result<Response<Empty>, Status> {
605+
METRICS.increment_request_counter(OP_NEW_CUSTODIAN_CONTEXT);
606+
METRICS.increment_error_counter(OP_NEW_CUSTODIAN_CONTEXT, ERR_INVALID_REQUEST);
507607
Err(Status::unimplemented(
508608
"new_custodian_context is not implemented",
509609
))
@@ -525,6 +625,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
525625
&self,
526626
_request: Request<kms_grpc::kms::v1::DestroyCustodianContextRequest>,
527627
) -> Result<Response<Empty>, Status> {
628+
METRICS.increment_request_counter(OP_DESTROY_CUSTODIAN_CONTEXT);
629+
METRICS.increment_error_counter(OP_DESTROY_CUSTODIAN_CONTEXT, ERR_INVALID_REQUEST);
528630
Err(Status::unimplemented(
529631
"destroy_custodian_context is not implemented",
530632
))
@@ -550,6 +652,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
550652
&self,
551653
_request: Request<Empty>,
552654
) -> Result<Response<OperatorPublicKey>, Status> {
655+
METRICS.increment_request_counter(OP_FETCH_PK);
656+
METRICS.increment_error_counter(OP_FETCH_PK, ERR_INVALID_REQUEST);
553657
Err(Status::unimplemented(
554658
"get_operator_public_key is not implemented",
555659
))
@@ -571,6 +675,8 @@ impl<PubS: Storage + Sync + Send + 'static, PrivS: Storage + Sync + Send + 'stat
571675
&self,
572676
_request: Request<Empty>,
573677
) -> Result<Response<Empty>, Status> {
678+
METRICS.increment_request_counter(OP_CUSTODIAN_CONTEXT_RESTORE);
679+
METRICS.increment_error_counter(OP_CUSTODIAN_CONTEXT_RESTORE, ERR_INVALID_REQUEST);
574680
Err(Status::unimplemented(
575681
"custodian_backup_restore is not implemented",
576682
))

core/service/src/engine/centralized/service/crs_gen.rs

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use kms_grpc::kms::v1::{CrsGenRequest, CrsGenResult, Empty};
77
use kms_grpc::rpc_types::{optional_protobuf_to_alloy_domain, SignedPubDataHandleInternal};
88
use kms_grpc::RequestId;
99
use observability::metrics::METRICS;
10-
use observability::metrics_names::{ERR_CRS_GEN_FAILED, ERR_RATE_LIMIT_EXCEEDED, OP_CRS_GEN};
10+
use observability::metrics_names::{ERR_CRS_GEN_FAILED, OP_CRS_GEN_REQUEST};
1111
use threshold_fhe::execution::tfhe_internals::parameters::DKGParams;
1212
use threshold_fhe::session_id::SessionId;
1313
use tokio::sync::{OwnedSemaphorePermit, RwLock};
@@ -33,23 +33,9 @@ pub async fn crs_gen_impl<
3333
request: Request<CrsGenRequest>,
3434
) -> Result<Response<Empty>, Status> {
3535
tracing::info!("Received CRS generation request");
36-
let _timer = METRICS
37-
.time_operation(OP_CRS_GEN)
38-
.map_err(|e| Status::internal(format!("Failed to start metrics: {e}")))?
39-
.start();
40-
METRICS
41-
.increment_request_counter(OP_CRS_GEN)
42-
.map_err(|e| Status::internal(format!("Failed to increment counter: {e}")))?;
36+
let _timer = METRICS.time_operation(OP_CRS_GEN_REQUEST).start();
4337

44-
let permit = service
45-
.rate_limiter
46-
.start_crsgen()
47-
.await
48-
.inspect_err(|_e| {
49-
if let Err(e) = METRICS.increment_error_counter(OP_CRS_GEN, ERR_RATE_LIMIT_EXCEEDED) {
50-
tracing::warn!("Failed to increment error counter: {:?}", e);
51-
}
52-
})?;
38+
let permit = service.rate_limiter.start_crsgen().await?;
5339

5440
let inner = request.into_inner();
5541
let req_id = tonic_some_or_err(
@@ -90,6 +76,7 @@ pub async fn crs_gen_impl<
9076
)
9177
.await
9278
{
79+
METRICS.increment_error_counter(OP_CRS_GEN_REQUEST, ERR_CRS_GEN_FAILED);
9380
tracing::error!("CRS generation of request {} failed: {}", req_id, e);
9481
} else {
9582
tracing::info!(
@@ -160,9 +147,6 @@ pub(crate) async fn crs_gen_background<
160147
"Failed CRS generation for CRS with ID {req_id}: {e}"
161148
)),
162149
);
163-
METRICS
164-
.increment_error_counter(OP_CRS_GEN, ERR_CRS_GEN_FAILED)
165-
.ok();
166150
return Err(anyhow::anyhow!("Failed CRS generation: {}", e));
167151
}
168152
};

0 commit comments

Comments
 (0)