@@ -33,67 +33,68 @@ import (
3333
3434 "github.com/nuclio/errors"
3535 "github.com/nuclio/logger"
36- "github.com/prometheus/client_golang/api"
37- v1 "github.com/prometheus/client_golang/api/prometheus/v1"
36+ prometheusapi "github.com/prometheus/client_golang/api"
37+ prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
3838 "github.com/prometheus/common/model"
3939)
4040
41- type PrometheusClient struct {
41+ const (
42+ functionLabelName = "function" // For nuclio functions (nuclio_processor_handled_events_total) - maps to nuclio function resource
43+ serviceLabelName = "service_name" // For deployments (num_of_requests, jupyter_kernel_busyness) - maps to deployment resource
44+ podLabelName = "pod" // For pod-based metrics (DCGM_FI_DEV_GPU_UTIL) - maps to pod resource
45+ )
46+
47+ type PrometheusMetricsClient struct {
4248 logger logger.Logger
43- apiClient v1 .API
49+ apiClient prometheusv1 .API
4450 namespace string
4551 queryTemplates map [string ]* template.Template
4652}
4753
48- func NewPrometheusClient (parentLogger logger.Logger , prometheusURL , namespace string , templates []scalertypes.QueryTemplate ) (* PrometheusClient , error ) {
54+ func NewPrometheusClient (parentLogger logger.Logger , prometheusURL , namespace string , templates []scalertypes.QueryTemplate ) (* PrometheusMetricsClient , error ) {
4955 if len (templates ) == 0 {
50- return nil , errors .New ("Failed to created Prometheus client: query template cannot be empty" )
56+ return nil , errors .New ("query template cannot be empty" )
5157 }
5258
5359 if prometheusURL == "" {
54- return nil , errors .New ("Failed to created Prometheus client: prometheus URL cannot be empty" )
60+ return nil , errors .New ("prometheus URL cannot be empty" )
5561 }
5662
5763 if namespace == "" {
58- return nil , errors .New ("Failed to created Prometheus client: namespace cannot be empty" )
64+ return nil , errors .New ("namespace cannot be empty" )
5965 }
6066
61- client , err := api .NewClient (api .Config {
67+ client , err := prometheusapi .NewClient (prometheusapi .Config {
6268 Address : prometheusURL ,
6369 })
6470 if err != nil {
65- return nil , errors .Wrap (err , "Failed to created Prometheus client: failed to create prometheus API client" )
71+ return nil , errors .Wrap (err , "failed to create prometheus API client" )
6672 }
6773
6874 queryTemplates := make (map [string ]* template.Template )
6975 for _ , queryTemplate := range templates {
70- if queryTemplate .Name == "" {
71- return nil , errors .New ("Failed to created Prometheus client: template name cannot be empty" )
72- }
73- if queryTemplate .Template == "" {
74- return nil , errors .New ("Failed to created Prometheus client: query template cannot be empty" )
75- }
76- tmpl , err := template .New (queryTemplate .Name ).Parse (queryTemplate .Template )
76+ tmpl , err := queryTemplate .CreateQueryTemplate ()
7777 if err != nil {
78- return nil , errors .Wrap (err , "Failed to created Prometheus client: failed to parse template in prometheus client " )
78+ return nil , errors .Wrap (err , "failed to create query template " )
7979 }
8080 queryTemplates [queryTemplate .Name ] = tmpl
8181 }
8282
8383 childLogger := parentLogger .GetChild ("prometheus-client" )
84- childLogger .Info ("Creating prometheus client" )
84+ childLogger .Info ("Creating prometheus metrics client" )
8585
86- return & PrometheusClient {
86+ return & PrometheusMetricsClient {
8787 logger : childLogger ,
88- apiClient : v1 .NewAPI (client ),
88+ apiClient : prometheusv1 .NewAPI (client ),
8989 namespace : namespace ,
9090 queryTemplates : queryTemplates ,
9191 }, nil
9292}
9393
9494// GetResourceMetrics retrieves metrics for multiple resources
95- func (pc * PrometheusClient ) GetResourceMetrics (resources []scalertypes.Resource ) (map [string ]map [string ]int , error ) {
95+ func (pc * PrometheusMetricsClient ) GetResourceMetrics (resources []scalertypes.Resource ) (map [string ]map [string ]int , error ) {
9696 metricsByResource := make (map [string ]map [string ]int )
97+ resourceNameRegex := pc .buildResourceNameRegex (resources )
9798
9899 for metricName , queryTemplate := range pc .queryTemplates {
99100 windowSizes := pc .extractWindowSizesForMetric (resources , metricName )
@@ -103,8 +104,6 @@ func (pc *PrometheusClient) GetResourceMetrics(resources []scalertypes.Resource)
103104 continue
104105 }
105106
106- resourceNameRegex := pc .buildResourceNameRegex (resources )
107-
108107 for windowSize := range windowSizes {
109108 fullMetricName , err := pc .resolveFullMetricName (resources , metricName , windowSize )
110109 if err != nil {
@@ -122,39 +121,25 @@ func (pc *PrometheusClient) GetResourceMetrics(resources []scalertypes.Resource)
122121
123122 rawResult , warnings , err := pc .apiClient .Query (context .Background (), query , time .Now ())
124123 if err != nil {
125- pc .logger .WarnWith ("Failed to execute Prometheus query" ,
126- "metricName" , metricName ,
127- "windowSize" , windowSize ,
128- "error" , err )
129- continue
124+ return nil , errors .Wrapf (err , "failed to execute Prometheus query for metricName=%s, windowSize=%s" , metricName , windowSize )
130125 }
131126
132127 if len (warnings ) > 0 {
133- pc .logger .DebugWith ("Prometheus query warnings" ,
128+ pc .logger .WarnWith ("Prometheus query warnings" ,
134129 "metricName" , metricName ,
135130 "windowSize" , windowSize ,
136131 "warnings" , warnings )
137132 }
138133
139134 metricSamples , ok := rawResult .(model.Vector )
140135 if ! ok {
141- pc .logger .WarnWith ("Unexpected Prometheus result type" ,
142- "metricName" , metricName ,
143- "windowSize" , windowSize ,
144- "expectedType" , "model.Vector" ,
145- "actualType" , fmt .Sprintf ("%T" , rawResult ))
146- continue
136+ return nil , errors .Wrapf (err , "unexpected Prometheus result type for metricName=%s, windowSize=%s" , metricName , windowSize )
147137 }
148138
149139 for _ , metricSample := range metricSamples {
150140 resourceName , err := pc .extractResourceName (metricSample .Metric )
151141 if err != nil {
152- pc .logger .WarnWith ("Failed to extract resource name from the Prometheus metric's labels" ,
153- "metricName" , metricName ,
154- "windowSize" , windowSize ,
155- "labels" , metricSample .Metric .String (),
156- "error" , err )
157- continue
142+ return nil , errors .Wrapf (err , "failed to extract resource name from the prometheus metric's labels. metricName=%s, windowSize=%s" , metricName , windowSize )
158143 }
159144
160145 // Round up values to ensure any fractional value > 0 becomes at least 1
@@ -182,7 +167,7 @@ func (pc *PrometheusClient) GetResourceMetrics(resources []scalertypes.Resource)
182167}
183168
184169// renderQuery renders the Prometheus query template
185- func (pc * PrometheusClient ) renderQuery (queryTemplate * template.Template , windowSize , resourceNameRegex string ) (string , error ) {
170+ func (pc * PrometheusMetricsClient ) renderQuery (queryTemplate * template.Template , windowSize , resourceNameRegex string ) (string , error ) {
186171 templateData := make (map [string ]string )
187172 templateData ["Namespace" ] = pc .namespace
188173 templateData ["WindowSize" ] = windowSize
@@ -197,7 +182,7 @@ func (pc *PrometheusClient) renderQuery(queryTemplate *template.Template, window
197182}
198183
199184// extractWindowSizesForMetric extracts unique window sizes from resources' ScaleResources for a specific metric name.
200- func (pc * PrometheusClient ) extractWindowSizesForMetric (resources []scalertypes.Resource , metricName string ) map [string ]bool {
185+ func (pc * PrometheusMetricsClient ) extractWindowSizesForMetric (resources []scalertypes.Resource , metricName string ) map [string ]bool {
201186 windowSizes := make (map [string ]bool )
202187 for _ , resource := range resources {
203188 for _ , scaleResource := range resource .ScaleResources {
@@ -211,7 +196,7 @@ func (pc *PrometheusClient) extractWindowSizesForMetric(resources []scalertypes.
211196}
212197
213198// buildResourceNameRegex creates a Prometheus regex pattern for query filtering
214- func (pc * PrometheusClient ) buildResourceNameRegex (resources []scalertypes.Resource ) string {
199+ func (pc * PrometheusMetricsClient ) buildResourceNameRegex (resources []scalertypes.Resource ) string {
215200 resourceNames := make ([]string , len (resources ))
216201 for i , resource := range resources {
217202 resourceNames [i ] = resource .Name
@@ -223,7 +208,7 @@ func (pc *PrometheusClient) buildResourceNameRegex(resources []scalertypes.Resou
223208// resolveFullMetricName resolves the full metric name because the same resource can have multiple
224209// metrics with the same base name but different window sizes (e.g., "metric_name_per_1m" vs "metric_name_per_5m"),
225210// and we need unique keys in our internal metrics map to store them separately.
226- func (pc * PrometheusClient ) resolveFullMetricName (resources []scalertypes.Resource , metricName , windowSize string ) (string , error ) {
211+ func (pc * PrometheusMetricsClient ) resolveFullMetricName (resources []scalertypes.Resource , metricName , windowSize string ) (string , error ) {
227212 for _ , resource := range resources {
228213 for _ , scaleResource := range resource .ScaleResources {
229214 if scaleResource .MetricName == metricName {
@@ -238,11 +223,11 @@ func (pc *PrometheusClient) resolveFullMetricName(resources []scalertypes.Resour
238223}
239224
240225// extractResourceName extracts the resource name from Prometheus metric labels.
241- func (pc * PrometheusClient ) extractResourceName (labels model.Metric ) (string , error ) {
226+ func (pc * PrometheusMetricsClient ) extractResourceName (labels model.Metric ) (string , error ) {
242227 labelNames := []model.LabelName {
243- "function" , // For nuclio functions (nuclio_processor_handled_events_total) - maps to nucliofunction resource
244- "service_name" , // For deployments (num_of_requests, jupyter_kernel_busyness) - maps to deployment resource
245- "pod" , // For pod-based metrics (DCGM_FI_DEV_GPU_UTIL) - maps to pod resource
228+ functionLabelName ,
229+ serviceLabelName ,
230+ podLabelName ,
246231 }
247232 for _ , labelName := range labelNames {
248233 if value , ok := labels [labelName ]; ok {
0 commit comments