@@ -2,8 +2,10 @@ package e2e
22
33import (
44 "fmt"
5+ "io"
56 "net/http"
67 "strconv"
8+ "strings"
79 "time"
810
911 "github.com/onsi/ginkgo/v2"
@@ -76,6 +78,13 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() {
7678
7779 epp := createEndPointPicker (pdConfig )
7880
81+ metricsURL := fmt .Sprintf ("http://localhost:%s/metrics" , metricsPort )
82+
83+ if k8sContext != "" {
84+ // Use port-forward to access the EPP pod's metrics endpoint.
85+ startEPPMetricsPortForward ()
86+ }
87+
7988 prefillPods , decodePods := getModelServerPods (podSelector , prefillSelector , decodeSelector )
8089 gomega .Expect (prefillPods ).Should (gomega .HaveLen (prefillReplicas ))
8190 gomega .Expect (decodePods ).Should (gomega .HaveLen (decodeReplicas ))
@@ -110,6 +119,16 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() {
110119 gomega .Expect (podHdr ).Should (gomega .BeElementOf (decodePods ))
111120 gomega .Expect (podHdr ).Should (gomega .Equal (podHdrChat ))
112121
122+ // Metrics Validation
123+ labelFilter := fmt .Sprintf (`decision_type="prefill-decode",model_name="%s"` , modelName )
124+ prefillDecodeCount := getCounterMetric (metricsURL , "llm_d_inference_scheduler_pd_decision_total" , labelFilter )
125+
126+ labelFilter2 := fmt .Sprintf (`decision_type="decode-only",model_name="%s"` , modelName )
127+ decodeOnlyCount := getCounterMetric (metricsURL , "llm_d_inference_scheduler_pd_decision_total" , labelFilter2 )
128+
129+ gomega .Expect (prefillDecodeCount ).Should (gomega .Equal (6 ))
130+ gomega .Expect (decodeOnlyCount ).Should (gomega .Equal (0 ))
131+
113132 testutils .DeleteObjects (testConfig , epp )
114133 testutils .DeleteObjects (testConfig , modelServers )
115134 })
@@ -383,6 +402,33 @@ func runChatCompletion(prompt string) (string, string, string) {
383402 return namespaceHeader , podHeader , podPort
384403}
385404
405+ // getCounterMetric fetches the current value of a Prometheus counter metric from the given metrics URL.
406+ func getCounterMetric (metricsURL , metricName , labelMatch string ) int {
407+ resp , err := http .Get (metricsURL )
408+ gomega .Expect (err ).ShouldNot (gomega .HaveOccurred ())
409+ defer func () {
410+ err = resp .Body .Close ()
411+ gomega .Expect (err ).ToNot (gomega .HaveOccurred ())
412+ }()
413+ gomega .Expect (resp .StatusCode ).Should (gomega .Equal (http .StatusOK ))
414+
415+ body , err := io .ReadAll (resp .Body )
416+ gomega .Expect (err ).ShouldNot (gomega .HaveOccurred ())
417+
418+ metricsText := string (body )
419+ for _ , line := range strings .Split (metricsText , "\n " ) {
420+ if strings .HasPrefix (line , metricName ) && strings .Contains (line , labelMatch ) {
421+ fields := strings .Fields (line )
422+ if len (fields ) >= 2 {
423+ valFloat , err := strconv .ParseFloat (fields [len (fields )- 1 ], 64 )
424+ gomega .Expect (err ).ShouldNot (gomega .HaveOccurred ())
425+ return int (valFloat )
426+ }
427+ }
428+ }
429+ return 0
430+ }
431+
386432// Simple EPP configuration for running without P/D
387433const simpleConfig = `apiVersion: inference.networking.x-k8s.io/v1alpha1
388434kind: EndpointPickerConfig
0 commit comments