You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/metrics.md
+1-1Lines changed: 1 addition & 1 deletion
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -44,7 +44,7 @@ Metrics defined by llm-d Router are in addition to Inference Gateway metrics. Fo
44
44
45
45
## Opt-in ext_proc Stream Metrics
46
46
47
-
Three metrics covering ext_proc gRPC stream lifecycle. Disabled by default; enable with `--enable-grpc-stream-metrics`. All carry the `llm_d_router_epp_` prefix.
47
+
Three metrics covering ext_proc gRPC stream lifecycle. Disabled by default; enable with `--enable-grpc-stream-metrics`. These metrics are emitted under the `llm_d_epp_` prefix (separate from `llm_d_inference_scheduler_*`).
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_prefix_indexer_hit_ratio] Ratio of prefix length matched to total prefix length in the cache lookup.", compbasemetrics.ALPHA),
53
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_prefix_indexer_hit_ratio] Ratio of prefix length matched to total prefix length in the cache lookup.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_prefix_indexer_hit_bytes] Length of the prefix match in number of bytes in the cache lookup.", compbasemetrics.ALPHA),
73
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_prefix_indexer_hit_bytes] Length of the prefix match in number of bytes in the cache lookup.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_inference_request_metric] Consolidated gauge for various inference request metrics including TTFT, TPOT, SLOs, and prediction durations.", compbasemetrics.ALPHA),
71
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_inference_request_metric] Consolidated gauge for various inference request metrics including TTFT, TPOT, SLOs, and prediction durations.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_ttft_seconds] Inference model TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
89
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_ttft_seconds] Inference model TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_predicted_ttft_seconds] Inference model Predicted TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
99
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_predicted_ttft_seconds] Inference model Predicted TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_ttft_prediction_duration_seconds] Duration taken to generate TTFT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
119
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_ttft_prediction_duration_seconds] Duration taken to generate TTFT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_tpot_seconds] Inference model TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
139
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_tpot_seconds] Inference model TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_predicted_tpot_seconds] Inference model Predicted TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
149
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_predicted_tpot_seconds] Inference model Predicted TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_tpot_prediction_duration_seconds] Duration taken to generate TPOT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
169
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_tpot_prediction_duration_seconds] Duration taken to generate TPOT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_slo_violation_total] Counter of SLO violations for each model, target model, and violation type.", compbasemetrics.ALPHA),
189
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_slo_violation_total] Counter of SLO violations for each model, target model, and violation type.", compbasemetrics.ALPHA),
Copy file name to clipboardExpand all lines: pkg/epp/framework/plugins/scheduling/profilehandler/disagg/metrics.go
+3-3Lines changed: 3 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -47,7 +47,7 @@ var (
47
47
prometheus.CounterOpts{
48
48
Subsystem: eppmetrics.SchedulerSubsystem,
49
49
Name: "pd_decision_total",
50
-
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_pd_decision_total] Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
50
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_pd_decision_total] Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
51
51
},
52
52
[]string{"model_name", "decision_type"}, // "decode-only" or "prefill-decode"
53
53
)
@@ -65,13 +65,13 @@ var (
65
65
// SchedulerDisaggDecisionCount records disaggregation routing decisions,
66
66
// covering all stages: decode-only, prefill-decode, encode-decode, encode-prefill-decode.
67
67
//
68
-
// Deprecated: Use llm_d_router_epp_disagg_decision_total instead.
68
+
// Deprecated: Use llm_d_epp_disagg_decision_total instead.
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made", compbasemetrics.ALPHA),
74
+
Help: metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made", compbasemetrics.ALPHA),
# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_pd_decision_total] Total number of P/D disaggregation decisions made
37
+
# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] [Deprecated: Use llm_d_epp_pd_decision_total] Total number of P/D disaggregation decisions made
38
38
# TYPE llm_d_inference_scheduler_pd_decision_total counter
# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made
76
+
# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made
77
77
# TYPE llm_d_inference_scheduler_disagg_decision_total counter
# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made
111
+
# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made
112
112
# TYPE llm_d_inference_scheduler_disagg_decision_total counter
0 commit comments