opendatahub-io
diff --git a/‎deploy/grafana/inference_gateway.json‎
Lines changed: 35 additions & 35 deletions b/‎deploy/grafana/inference_gateway.json‎
Lines changed: 35 additions & 35 deletions
diff --git a/‎docs/metrics.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/metrics.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/epp/flowcontrol/integration_test.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/epp/flowcontrol/integration_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/epp/framework/plugins/flowcontrol/fairness/program-aware/README.md‎
Lines changed: 1 addition & 1 deletion b/‎pkg/epp/framework/plugins/flowcontrol/fairness/program-aware/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/epp/framework/plugins/requestcontrol/dataproducer/approximateprefix/metrics.go‎
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/framework/plugins/requestcontrol/dataproducer/approximateprefix/metrics.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/framework/plugins/requestcontrol/dataproducer/predictedlatency/metrics.go‎
Lines changed: 8 additions & 8 deletions b/‎pkg/epp/framework/plugins/requestcontrol/dataproducer/predictedlatency/metrics.go‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎pkg/epp/framework/plugins/scheduling/profilehandler/disagg/metrics.go‎
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/framework/plugins/scheduling/profilehandler/disagg/metrics.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/framework/plugins/scheduling/profilehandler/disagg/metrics_test.go‎
Lines changed: 19 additions & 19 deletions b/‎pkg/epp/framework/plugins/scheduling/profilehandler/disagg/metrics_test.go‎
Lines changed: 19 additions & 19 deletions
diff --git a/‎pkg/epp/handlers/response_test.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/epp/handlers/response_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/epp/metrics/collectors/inference_pool_test.go‎
Lines changed: 4 additions & 4 deletions b/‎pkg/epp/metrics/collectors/inference_pool_test.go‎
Lines changed: 4 additions & 4 deletions
@@ -44,7 +44,7 @@ Metrics defined by llm-d Router are in addition to Inference Gateway metrics. Fo
 
 ## Opt-in ext_proc Stream Metrics
 
-Three metrics covering ext_proc gRPC stream lifecycle. Disabled by default; enable with `--enable-grpc-stream-metrics`. All carry the `llm_d_router_epp_` prefix.
+Three metrics covering ext_proc gRPC stream lifecycle. Disabled by default; enable with `--enable-grpc-stream-metrics`. These metrics are emitted under the `llm_d_epp_` prefix (separate from `llm_d_inference_scheduler_*`).
 
 ### `extproc_streams_inflight`
 
 
@@ -1122,15 +1122,15 @@ func TestFlowControlMetricsEmitted(t *testing.T) {
 	var queueSizeWhileQueued float64
 	var foundQueueSize bool
 	for _, f := range families {
-		if f.GetName() == "llm_d_router_epp_flow_control_queue_size" {
+		if f.GetName() == "llm_d_epp_flow_control_queue_size" {
 			foundQueueSize = true
 			for _, m := range f.GetMetric() {
 				queueSizeWhileQueued += m.GetGauge().GetValue()
 			}
 		}
 	}
 	require.True(t, foundQueueSize,
-		"llm_d_router_epp_flow_control_queue_size metric should exist")
+		"llm_d_epp_flow_control_queue_size metric should exist")
 	require.Greater(t, queueSizeWhileQueued, 0.0,
 		"queue_size should be > 0 while a request is actively queued")
 
 
@@ -63,7 +63,7 @@ A complete sample is shipped at [`deploy/config/sim-program-aware-config.yaml`](
 
 ## Observability
 
-The plugin exports two shared collectors and one strategy-owned collector under the `llm_d_router_epp` Prometheus subsystem:
+The plugin exports two shared collectors and one strategy-owned collector under the `llm_d_epp` Prometheus subsystem:
 
 | Metric | Type | Labels | Description |
 |---|---|---|---|
 
@@ -32,7 +32,7 @@ var (
 		prometheus.GaugeOpts{
 			Subsystem: eppmetrics.InferenceExtensionSubsystem,
 			Name:      "prefix_indexer_size",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_prefix_indexer_size] Size of the prefix indexer.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_prefix_indexer_size] Size of the prefix indexer.", compbasemetrics.ALPHA),
 		},
 		[]string{},
 	)
@@ -50,7 +50,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceExtensionSubsystem,
 			Name:      "prefix_indexer_hit_ratio",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_prefix_indexer_hit_ratio] Ratio of prefix length matched to total prefix length in the cache lookup.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_prefix_indexer_hit_ratio] Ratio of prefix length matched to total prefix length in the cache lookup.", compbasemetrics.ALPHA),
 			Buckets:   []float64{0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
 		},
 		[]string{},
@@ -70,7 +70,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceExtensionSubsystem,
 			Name:      "prefix_indexer_hit_bytes",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_prefix_indexer_hit_bytes] Length of the prefix match in number of bytes in the cache lookup.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_prefix_indexer_hit_bytes] Length of the prefix match in number of bytes in the cache lookup.", compbasemetrics.ALPHA),
 			Buckets:   []float64{0, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536},
 		},
 		[]string{},
 
@@ -68,7 +68,7 @@ var (
 		prometheus.GaugeOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "inference_request_metric",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_inference_request_metric] Consolidated gauge for various inference request metrics including TTFT, TPOT, SLOs, and prediction durations.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_inference_request_metric] Consolidated gauge for various inference request metrics including TTFT, TPOT, SLOs, and prediction durations.", compbasemetrics.ALPHA),
 		},
 		modelTypeLabels,
 	)
@@ -86,7 +86,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_ttft_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_ttft_seconds] Inference model TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_ttft_seconds] Inference model TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   generalLatencyBuckets,
 		},
 		modelLabels,
@@ -96,7 +96,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_predicted_ttft_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_predicted_ttft_seconds] Inference model Predicted TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_predicted_ttft_seconds] Inference model Predicted TTFT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   generalLatencyBuckets,
 		},
 		modelLabels,
@@ -116,7 +116,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_ttft_prediction_duration_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_ttft_prediction_duration_seconds] Duration taken to generate TTFT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_ttft_prediction_duration_seconds] Duration taken to generate TTFT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   predictionLatencyBuckets,
 		},
 		modelLabels,
@@ -136,7 +136,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_tpot_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_tpot_seconds] Inference model TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_tpot_seconds] Inference model TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   tpotBuckets,
 		},
 		modelLabels,
@@ -146,7 +146,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_predicted_tpot_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_predicted_tpot_seconds] Inference model Predicted TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_predicted_tpot_seconds] Inference model Predicted TPOT distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   tpotBuckets,
 		},
 		modelLabels,
@@ -166,7 +166,7 @@ var (
 		prometheus.HistogramOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_tpot_prediction_duration_seconds",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_tpot_prediction_duration_seconds] Duration taken to generate TPOT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_tpot_prediction_duration_seconds] Duration taken to generate TPOT predictions in seconds for each model and target model.", compbasemetrics.ALPHA),
 			Buckets:   predictionLatencyBuckets,
 		},
 		modelLabels,
@@ -186,7 +186,7 @@ var (
 		prometheus.CounterOpts{
 			Subsystem: eppmetrics.InferenceObjectiveSubsystem,
 			Name:      "request_slo_violation_total",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_request_slo_violation_total] Counter of SLO violations for each model, target model, and violation type.", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_request_slo_violation_total] Counter of SLO violations for each model, target model, and violation type.", compbasemetrics.ALPHA),
 		},
 		modelTypeLabels,
 	)
 
@@ -47,7 +47,7 @@ var (
 		prometheus.CounterOpts{
 			Subsystem: eppmetrics.SchedulerSubsystem,
 			Name:      "pd_decision_total",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_pd_decision_total] Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_pd_decision_total] Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
 		},
 		[]string{"model_name", "decision_type"}, // "decode-only" or "prefill-decode"
 	)
@@ -65,13 +65,13 @@ var (
 	// SchedulerDisaggDecisionCount records disaggregation routing decisions,
 	// covering all stages: decode-only, prefill-decode, encode-decode, encode-prefill-decode.
 	//
-	// Deprecated: Use llm_d_router_epp_disagg_decision_total instead.
+	// Deprecated: Use llm_d_epp_disagg_decision_total instead.
 	// Tracked in: https://github.com/llm-d/llm-d-inference-scheduler/issues/1070
 	SchedulerDisaggDecisionCount = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Subsystem: eppmetrics.SchedulerSubsystem,
 			Name:      "disagg_decision_total",
-			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made", compbasemetrics.ALPHA),
+			Help:      metricsutil.HelpMsgWithStability("[Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made", compbasemetrics.ALPHA),
 		},
 		[]string{"model_name", "decision_type"},
 	)
 
@@ -34,7 +34,7 @@ func TestSchedulerPDDecisionCount(t *testing.T) {
 	RecordPDDecision("test-plugin", "test-type", model, DecisionTypePrefillDecode)
 
 	expected := `
-		# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_pd_decision_total] Total number of P/D disaggregation decisions made
+		# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] [Deprecated: Use llm_d_epp_pd_decision_total] Total number of P/D disaggregation decisions made
 		# TYPE llm_d_inference_scheduler_pd_decision_total counter
 		llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only",model_name="test-model"} 1
 		llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode",model_name="test-model"} 2
@@ -46,14 +46,14 @@ func TestSchedulerPDDecisionCount(t *testing.T) {
 	}
 
 	expectedNew := `
-		# HELP llm_d_router_epp_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made
-		# TYPE llm_d_router_epp_pd_decision_total counter
-		llm_d_router_epp_pd_decision_total{decision_type="decode-only",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
-		llm_d_router_epp_pd_decision_total{decision_type="prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 2
+		# HELP llm_d_epp_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made
+		# TYPE llm_d_epp_pd_decision_total counter
+		llm_d_epp_pd_decision_total{decision_type="decode-only",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
+		llm_d_epp_pd_decision_total{decision_type="prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 2
 	`
 
 	if err := testutil.CollectAndCompare(LlmdPDDecisionCount, strings.NewReader(expectedNew),
-		"llm_d_router_epp_pd_decision_total"); err != nil {
+		"llm_d_epp_pd_decision_total"); err != nil {
 		t.Errorf("RecordPDDecision() new failed: %v", err)
 	}
 }
@@ -73,7 +73,7 @@ func TestRecordDisaggDecision(t *testing.T) {
 	RecordDisaggDecision("test-plugin", "test-type", model, DecisionTypeEncodePrefillDecode)
 
 	expected := `
-		# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made
+		# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made
 		# TYPE llm_d_inference_scheduler_disagg_decision_total counter
 		llm_d_inference_scheduler_disagg_decision_total{decision_type="decode-only",model_name="test-model"} 1
 		llm_d_inference_scheduler_disagg_decision_total{decision_type="encode-decode",model_name="test-model"} 1
@@ -87,16 +87,16 @@ func TestRecordDisaggDecision(t *testing.T) {
 	}
 
 	expectedNew := `
-		# HELP llm_d_router_epp_disagg_decision_total [ALPHA] Total number of disaggregation routing decisions made
-		# TYPE llm_d_router_epp_disagg_decision_total counter
-		llm_d_router_epp_disagg_decision_total{decision_type="decode-only",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
-		llm_d_router_epp_disagg_decision_total{decision_type="encode-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
-		llm_d_router_epp_disagg_decision_total{decision_type="encode-prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 3
-		llm_d_router_epp_disagg_decision_total{decision_type="prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 2
+		# HELP llm_d_epp_disagg_decision_total [ALPHA] Total number of disaggregation routing decisions made
+		# TYPE llm_d_epp_disagg_decision_total counter
+		llm_d_epp_disagg_decision_total{decision_type="decode-only",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
+		llm_d_epp_disagg_decision_total{decision_type="encode-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 1
+		llm_d_epp_disagg_decision_total{decision_type="encode-prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 3
+		llm_d_epp_disagg_decision_total{decision_type="prefill-decode",model_name="test-model",plugin_name="test-plugin",plugin_type="test-type"} 2
 	`
 
 	if err := testutil.CollectAndCompare(LlmdDisaggDecisionCount, strings.NewReader(expectedNew),
-		"llm_d_router_epp_disagg_decision_total"); err != nil {
+		"llm_d_epp_disagg_decision_total"); err != nil {
 		t.Errorf("RecordDisaggDecision() new failed: %v", err)
 	}
 }
@@ -108,7 +108,7 @@ func TestRecordDisaggDecisionEmptyModel(t *testing.T) {
 	RecordDisaggDecision("test-plugin", "test-type", "", DecisionTypeDecodeOnly)
 
 	expected := `
-		# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_router_epp_disagg_decision_total] Total number of disaggregation routing decisions made
+		# HELP llm_d_inference_scheduler_disagg_decision_total [ALPHA] [Deprecated: Use llm_d_epp_disagg_decision_total] Total number of disaggregation routing decisions made
 		# TYPE llm_d_inference_scheduler_disagg_decision_total counter
 		llm_d_inference_scheduler_disagg_decision_total{decision_type="decode-only",model_name="unknown"} 1
 	`
@@ -119,13 +119,13 @@ func TestRecordDisaggDecisionEmptyModel(t *testing.T) {
 	}
 
 	expectedNew := `
-		# HELP llm_d_router_epp_disagg_decision_total [ALPHA] Total number of disaggregation routing decisions made
-		# TYPE llm_d_router_epp_disagg_decision_total counter
-		llm_d_router_epp_disagg_decision_total{decision_type="decode-only",model_name="unknown",plugin_name="test-plugin",plugin_type="test-type"} 1
+		# HELP llm_d_epp_disagg_decision_total [ALPHA] Total number of disaggregation routing decisions made
+		# TYPE llm_d_epp_disagg_decision_total counter
+		llm_d_epp_disagg_decision_total{decision_type="decode-only",model_name="unknown",plugin_name="test-plugin",plugin_type="test-type"} 1
 	`
 
 	if err := testutil.CollectAndCompare(LlmdDisaggDecisionCount, strings.NewReader(expectedNew),
-		"llm_d_router_epp_disagg_decision_total"); err != nil {
+		"llm_d_epp_disagg_decision_total"); err != nil {
 		t.Errorf("RecordDisaggDecision() new empty model failed: %v", err)
 	}
 }
 
@@ -300,7 +300,7 @@ func TestHandleResponseBodyWithoutSchedulingRequest(t *testing.T) {
 		server.HandleResponseBody(ctx, reqCtx, []byte(body), true)
 	})
 
-	histogram := findHistogramMetric(t, "llm_d_router_epp_request_ntpot_seconds", map[string]string{
+	histogram := findHistogramMetric(t, "llm_d_epp_request_ntpot_seconds", map[string]string{
 		"model_name":        "incoming-model",
 		"target_model_name": "target-model",
 		"fairness_id":       metadata.DefaultFairnessID,
 
@@ -118,10 +118,10 @@ func TestMetricsCollected(t *testing.T) {
 		}
 
 		errNew := promtestutil.CollectAndCompare(collector, strings.NewReader(`
-		# HELP llm_d_router_epp_per_endpoint_queue_size [ALPHA] The total number of requests pending in the model server queue for each underlying endpoint.
-		# TYPE llm_d_router_epp_per_endpoint_queue_size gauge
-		llm_d_router_epp_per_endpoint_queue_size{model_server_endpoint="pod1-rank-0",name="test-pool"} 100
-`), "llm_d_router_epp_per_endpoint_queue_size")
+		# HELP llm_d_epp_per_endpoint_queue_size [ALPHA] The total number of requests pending in the model server queue for each underlying endpoint.
+		# TYPE llm_d_epp_per_endpoint_queue_size gauge
+		llm_d_epp_per_endpoint_queue_size{model_server_endpoint="pod1-rank-0",name="test-pool"} 100
+`), "llm_d_epp_per_endpoint_queue_size")
 		if errNew != nil {
 			t.Fatal(errNew)
 		}
Original file line number	Diff line number	Diff line change
`@@ -1122,15 +1122,15 @@ func TestFlowControlMetricsEmitted(t *testing.T) {`
`1122`	`1122`	`var queueSizeWhileQueued float64`
`1123`	`1123`	`var foundQueueSize bool`
`1124`	`1124`	`for _, f := range families {`
`1125`		`- if f.GetName() == "llm_d_router_epp_flow_control_queue_size" {`
	`1125`	`+ if f.GetName() == "llm_d_epp_flow_control_queue_size" {`
`1126`	`1126`	`foundQueueSize = true`
`1127`	`1127`	`for _, m := range f.GetMetric() {`
`1128`	`1128`	`queueSizeWhileQueued += m.GetGauge().GetValue()`
`1129`	`1129`	`}`
`1130`	`1130`	`}`
`1131`	`1131`	`}`
`1132`	`1132`	`require.True(t, foundQueueSize,`
`1133`		`- "llm_d_router_epp_flow_control_queue_size metric should exist")`
	`1133`	`+ "llm_d_epp_flow_control_queue_size metric should exist")`
`1134`	`1134`	`require.Greater(t, queueSizeWhileQueued, 0.0,`
`1135`	`1135`	`"queue_size should be > 0 while a request is actively queued")`
`1136`	`1136`
Original file line number	Diff line number	Diff line change
`@@ -118,10 +118,10 @@ func TestMetricsCollected(t *testing.T) {`
`118`	`118`	`}`
`119`	`119`
`120`	`120`	errNew := promtestutil.CollectAndCompare(collector, strings.NewReader(`
`121`		`- # HELP llm_d_router_epp_per_endpoint_queue_size [ALPHA] The total number of requests pending in the model server queue for each underlying endpoint.`
`122`		`- # TYPE llm_d_router_epp_per_endpoint_queue_size gauge`
`123`		`- llm_d_router_epp_per_endpoint_queue_size{model_server_endpoint="pod1-rank-0",name="test-pool"} 100`
`124`		-`), "llm_d_router_epp_per_endpoint_queue_size")
	`121`	`+ # HELP llm_d_epp_per_endpoint_queue_size [ALPHA] The total number of requests pending in the model server queue for each underlying endpoint.`
	`122`	`+ # TYPE llm_d_epp_per_endpoint_queue_size gauge`
	`123`	`+ llm_d_epp_per_endpoint_queue_size{model_server_endpoint="pod1-rank-0",name="test-pool"} 100`
	`124`	+`), "llm_d_epp_per_endpoint_queue_size")
`125`	`125`	`if errNew != nil {`
`126`	`126`	`t.Fatal(errNew)`
`127`	`127`	`}`