@@ -17,6 +17,7 @@ package conformance
1717import (
1818 "encoding/json"
1919 "fmt"
20+ "strings"
2021
2122 "github.com/NVIDIA/aicr/pkg/errors"
2223 "github.com/NVIDIA/aicr/pkg/validator/checks"
@@ -60,16 +61,20 @@ func checkAIServiceMetricsWithURL(ctx *checks.ValidationContext, promBaseURL str
6061 }
6162
6263 var promResp struct {
63- Data struct {
64+ Status string `json:"status"`
65+ Data struct {
6466 Result []json.RawMessage `json:"result"`
6567 } `json:"data"`
6668 }
6769 if err := json .Unmarshal (body , & promResp ); err != nil {
6870 return errors .Wrap (errors .ErrCodeInternal , "failed to parse Prometheus response" , err )
6971 }
7072
71- recordArtifact (ctx , "Prometheus Query: DCGM_FI_DEV_GPU_UTIL" ,
72- fmt .Sprintf ("Endpoint: %s\n Time series count: %d" , queryURL , len (promResp .Data .Result )))
73+ recordRawTextArtifact (ctx , "Prometheus Query: DCGM_FI_DEV_GPU_UTIL" ,
74+ fmt .Sprintf ("curl -sf '%s'" , queryURL ),
75+ fmt .Sprintf ("Status: %s\n Time series count: %d" , valueOrUnknown (promResp .Status ), len (promResp .Data .Result )))
76+ recordChunkedTextArtifact (ctx , "Prometheus query response (GPU util)" ,
77+ fmt .Sprintf ("curl -sf '%s'" , queryURL ), string (body ))
7378
7479 if len (promResp .Data .Result ) == 0 {
7580 return errors .New (errors .ErrCodeNotFound ,
@@ -83,34 +88,42 @@ func checkAIServiceMetricsWithURL(ctx *checks.ValidationContext, promBaseURL str
8388 return errors .New (errors .ErrCodeInternal , "discovery REST client is not available" )
8489 }
8590 result := restClient .Get ().AbsPath (rawURL ).Do (ctx .Context )
86- var statusCode int
87- result .StatusCode (& statusCode )
8891 if cmErr := result .Error (); cmErr != nil {
89- recordArtifact (ctx , "Custom Metrics API" ,
90- fmt . Sprintf ( "Endpoint: %s \n HTTP Status: %d \n Status: unavailable \n Error: %v " ,
91- rawURL , statusCode , cmErr ))
92+ recordRawTextArtifact (ctx , "Custom Metrics API" ,
93+ "kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 " ,
94+ fmt . Sprintf ( "Status: unavailable \n Error: %v" , cmErr ))
9295 return errors .Wrap (errors .ErrCodeNotFound ,
9396 "custom metrics API not available" , cmErr )
9497 }
95-
96- groupVersion := "unknown"
97- resourceCount := 0
98- discoveryBody , rawErr := result .Raw ()
99- if rawErr == nil {
100- var discovery struct {
101- GroupVersion string `json:"groupVersion"`
102- Resources []json.RawMessage `json:"resources"`
103- }
104- if json .Unmarshal (discoveryBody , & discovery ) == nil {
105- if discovery .GroupVersion != "" {
106- groupVersion = discovery .GroupVersion
107- }
108- resourceCount = len (discovery .Resources )
109- }
98+ var statusCode int
99+ result .StatusCode (& statusCode )
100+ rawBody , rawErr := result .Raw ()
101+ if rawErr != nil {
102+ return errors .Wrap (errors .ErrCodeInternal , "failed to read custom metrics API response" , rawErr )
103+ }
104+ var customMetricsResp struct {
105+ GroupVersion string `json:"groupVersion"`
106+ Resources []struct {
107+ Name string `json:"name"`
108+ Namespaced bool `json:"namespaced"`
109+ } `json:"resources"`
110+ }
111+ if err := json .Unmarshal (rawBody , & customMetricsResp ); err != nil {
112+ return errors .Wrap (errors .ErrCodeInternal , "failed to parse custom metrics API response" , err )
113+ }
114+ var resources strings.Builder
115+ limit := len (customMetricsResp .Resources )
116+ if limit > 20 {
117+ limit = 20
118+ }
119+ for i := 0 ; i < limit ; i ++ {
120+ r := customMetricsResp .Resources [i ]
121+ fmt .Fprintf (& resources , "- %s (namespaced=%t)\n " , r .Name , r .Namespaced )
110122 }
111- recordArtifact (ctx , "Custom Metrics API" ,
112- fmt .Sprintf ("Endpoint: %s\n HTTP Status: %d\n GroupVersion: %s\n API Resources: %d\n Status: available" ,
113- rawURL , statusCode , groupVersion , resourceCount ))
123+ recordRawTextArtifact (ctx , "Custom Metrics API" ,
124+ "kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1" ,
125+ fmt .Sprintf ("HTTP Status: %d\n GroupVersion: %s\n Resource count: %d\n \n Resources:\n %s" ,
126+ statusCode , valueOrUnknown (customMetricsResp .GroupVersion ), len (customMetricsResp .Resources ), resources .String ()))
114127
115128 return nil
116129}
0 commit comments