Skip to content

Commit 43e1c69

Browse files
committed
request latency
1 parent 8d862a7 commit 43e1c69

10 files changed

+126
-95
lines changed

Diff for: docs/metrics.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ For [MediaPipe Graphs](./mediapipe.md) execution there are 6 generic metrics whi
241241
| counter | ovms_responses | Useful to track number of packets generated by MediaPipe graph. Keep in mind that single request may trigger production of multiple (or zero) packets, therefore tracking number of responses is complementary to tracking accepted requests. For example tracking streaming partial responses of LLM text generation graphs. |
242242
| gauge | ovms_current_graphs | Number of graphs currently in-process. For unary communication it is equal to number of currently processing requests (each request initializes separate MediaPipe graph). For streaming communication it is equal to number of active client connections. Each connection is able to reuse the graph and decide when to delete it when the connection is closed. |
243243
| counter | ovms_graph_error | Counts errors in MediaPipe graph execution phase. For example V3 LLM text generation fails in LLMCalculator due to missing prompt - calculator returns an error and graph cancels. |
244-
| histogram | ovms_graph_processing_time_us | Tracks duration of successfully stared mediapipe graphs in us. It can represent pipeline processing time for unary calls or the session length for streamed requests |
244+
| histogram | ovms_graph_processing_time_us | Tracks duration of successfully started mediapipe graphs in us. It can represent pipeline processing time for unary calls or the session length for streamed requests. |
245+
| histogram | ovms_graph_request_latency_us | Time between MediaPipe graph packet creation (request receival) and result packet generation (full or partial response). Incoming packet timestamp is used as a source of truth for receival time, therefore using manual timestamping (via client) is highly discouraged. Model server automatically stamps the packets in case there is no manual timestamping. |
245246

246247
Exposing custom metrics in calculator implementations (MediaPipe graph nodes) is not supported yet.
247248

Diff for: src/kfs_frontend/kfs_graph_executor_impl.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//*****************************************************************************
1616
#include "kfs_graph_executor_impl.hpp"
1717

18+
#include <chrono>
1819
#include <sstream>
1920
#include <string>
2021
#include <unordered_map>
@@ -1040,6 +1041,9 @@ static Status deserializeTimestampIfAvailable(
10401041
SPDLOG_DEBUG(status.string());
10411042
return status;
10421043
}
1044+
} else {
1045+
auto now = std::chrono::system_clock::now();
1046+
timestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
10431047
}
10441048
return StatusCode::OK;
10451049
}
@@ -1147,8 +1151,6 @@ Status createAndPushPacketsImpl(
11471151
numberOfPacketsCreated++;
11481152
}
11491153

1150-
currentTimestamp = currentTimestamp.NextAllowedInStream();
1151-
11521154
return StatusCode::OK;
11531155
}
11541156

Diff for: src/llm/http_llm_calculator.cc

+7-6
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class HttpLLMCalculator : public CalculatorBase {
6666
static const std::string OUTPUT_TAG_NAME;
6767
static const std::string LOOPBACK_TAG_NAME;
6868

69-
mediapipe::Timestamp timestamp{0};
69+
mediapipe::Timestamp iterationBeginTimestamp{0};
7070

7171
public:
7272
static absl::Status GetContract(CalculatorContract* cc) {
@@ -211,7 +211,7 @@ class HttpLLMCalculator : public CalculatorBase {
211211
RET_CHECK(generationOutputs.size() >= 1);
212212
std::string response = this->apiHandler->serializeUnaryResponse(generationOutputs);
213213
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Complete unary response: {}", response);
214-
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp);
214+
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, iterationBeginTimestamp);
215215
} else {
216216
OVMS_PROFILE_SCOPE("Stream generation cycle");
217217
// Streaming scenario
@@ -241,9 +241,9 @@ class HttpLLMCalculator : public CalculatorBase {
241241
if (lastTextChunk.size() > 0) {
242242
std::string response = packIntoServerSideEventMessage(this->apiHandler->serializeStreamingChunk(lastTextChunk, finishReason));
243243
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Generated subsequent streaming response: {}", response);
244-
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp);
244+
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, iterationBeginTimestamp);
245245
}
246-
cc->Outputs().Tag(LOOPBACK_TAG_NAME).Add(new bool{true}, timestamp);
246+
cc->Outputs().Tag(LOOPBACK_TAG_NAME).Add(new bool{true}, iterationBeginTimestamp);
247247
} else { // finish generation
248248
OVMS_PROFILE_SCOPE("Generation of last streaming response");
249249
this->streamer->end();
@@ -257,7 +257,7 @@ class HttpLLMCalculator : public CalculatorBase {
257257
response += packIntoServerSideEventMessage("[DONE]");
258258

259259
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Generated complete streaming response: {}", response);
260-
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, timestamp);
260+
cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new OutputDataType{std::move(response)}, iterationBeginTimestamp);
261261
}
262262
}
263263
}
@@ -266,7 +266,8 @@ class HttpLLMCalculator : public CalculatorBase {
266266
} catch (...) {
267267
return absl::InvalidArgumentError("Response generation failed");
268268
}
269-
timestamp = timestamp.NextAllowedInStream();
269+
auto now = std::chrono::system_clock::now();
270+
iterationBeginTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
270271

271272
return absl::OkStatus();
272273
}

Diff for: src/mediapipe_internal/mediapipegraphexecutor.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,10 @@ class MediapipeGraphExecutor {
254254
packet,
255255
serverReaderWriter),
256256
"error in send packet routine");
257+
258+
auto now = std::chrono::system_clock::now();
259+
auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
260+
OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds());
257261
INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getResponsesMetric(executionContext));
258262
return absl::OkStatus();
259263
} catch (...) {

Diff for: src/metric_config.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ const std::string METRIC_NAME_REQUESTS_REJECTED = "ovms_requests_rejected";
6363
const std::string METRIC_NAME_GRAPH_ERROR = "ovms_graph_error";
6464
const std::string METRIC_NAME_PROCESSING_TIME = "ovms_graph_processing_time_us";
6565

66+
const std::string METRIC_NAME_REQUEST_LATENCY = "ovms_graph_request_latency_us";
67+
6668
bool MetricConfig::validateEndpointPath(const std::string& endpoint) {
6769
std::regex valid_endpoint_regex("^/[a-zA-Z0-9]*$");
6870
return std::regex_match(endpoint, valid_endpoint_regex);

Diff for: src/metric_config.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ extern const std::string METRIC_NAME_REQUESTS_REJECTED;
4949
extern const std::string METRIC_NAME_GRAPH_ERROR;
5050
extern const std::string METRIC_NAME_PROCESSING_TIME;
5151

52+
extern const std::string METRIC_NAME_REQUEST_LATENCY;
53+
5254
class Status;
5355
/**
5456
* @brief This class represents metrics configuration

Diff for: src/model_metric_reporter.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,23 @@ MediapipeServableMetricReporter::MediapipeServableMetricReporter(const MetricCon
584584
this->buckets);
585585
THROW_IF_NULL(this->processingTimeRestV3Stream, "cannot create metric");
586586
}
587+
familyName = METRIC_NAME_REQUEST_LATENCY;
588+
if (metricConfig->isFamilyEnabled(familyName)) {
589+
auto family = registry->createFamily<MetricHistogram>(familyName,
590+
"Time difference between incoming request and output packet in mediapipe graph.");
591+
THROW_IF_NULL(family, "cannot create family");
592+
593+
// KFS
594+
this->requestLatencyGrpcModelInferStream = family->addMetric({{"name", graphName},
595+
{"method", "ModelInferStream"}},
596+
this->buckets);
597+
THROW_IF_NULL(this->requestLatencyGrpcModelInferStream, "cannot create metric");
598+
// V3
599+
this->requestLatencyRestV3Stream = family->addMetric({{"name", graphName},
600+
{"method", "Stream"}},
601+
this->buckets);
602+
THROW_IF_NULL(this->requestLatencyRestV3Stream, "cannot create metric");
603+
}
587604
}
588605

589606
} // namespace ovms

Diff for: src/model_metric_reporter.hpp

+11
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,17 @@ class MediapipeServableMetricReporter {
214214
std::unique_ptr<MetricHistogram> processingTimeRestV3Unary;
215215
std::unique_ptr<MetricHistogram> processingTimeRestV3Stream;
216216

217+
std::unique_ptr<MetricHistogram> requestLatencyGrpcModelInferStream;
218+
std::unique_ptr<MetricHistogram> requestLatencyRestV3Stream;
219+
220+
inline MetricHistogram* getRequestLatencyMetric(const ExecutionContext& context) {
221+
if (context.method == ExecutionContext::Method::ModelInferStream)
222+
return this->requestLatencyGrpcModelInferStream.get();
223+
if (context.method == ExecutionContext::Method::V3Stream)
224+
return this->requestLatencyRestV3Stream.get();
225+
return nullptr;
226+
}
227+
217228
inline MetricHistogram* getProcessingTimeMetric(const ExecutionContext& context) {
218229
if (context.method == ExecutionContext::Method::ModelInfer)
219230
return this->processingTimeGrpcModelInfer.get();

Diff for: src/test/metrics_flow_test.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,6 @@ TEST_F(MetricFlowTest, RestV3Unary) {
842842
checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_ACCEPTED, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
843843
// checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_REQUESTS_REJECTED, "dummy_gpt", "REST", "Unary", "V3", numberOfRejectedRequests);
844844
checkMediapipeRequestsCounter(server.collect(), METRIC_NAME_RESPONSES, "dummy_gpt", "REST", "Unary", "V3", numberOfAcceptedRequests * 2);
845-
846845
EXPECT_THAT(server.collect(), HasSubstr(METRIC_NAME_PROCESSING_TIME + std::string{"_count{method=\"Unary\",name=\""} + "dummy_gpt" + std::string{"\"} "} + std::to_string(numberOfAcceptedRequests * 2)));
847846
}
848847
#endif

0 commit comments

Comments
 (0)