Skip to content

Commit 20b194e

Browse files
committed
add metric to track started actions
1 parent 1e4cb42 commit 20b194e

File tree

2 files changed

+37
-32
lines changed

2 files changed

+37
-32
lines changed

internal/castai/client_test.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ func TestConvertPrometheusMetricFamilies(t *testing.T) {
250250
metricName := "test_histogram"
251251
sampleCount := uint64(100)
252252
sampleSum := 250.5
253-
253+
254254
// Create histogram buckets
255255
buckets := []*dto.Bucket{
256256
{
@@ -334,20 +334,20 @@ func TestConvertPrometheusMetricFamilies(t *testing.T) {
334334

335335
// Verify bucket timeseries (4 explicit buckets + 1 +Inf bucket)
336336
r.Len(bucketTimeseries, 5)
337-
337+
338338
// Verify each expected bucket exists
339339
expectedBuckets := map[string]float64{
340-
"1.000000": 10,
341-
"5.000000": 30,
340+
"1.000000": 10,
341+
"5.000000": 30,
342342
"10.000000": 80,
343343
"25.000000": 100,
344-
"+Inf": 100,
344+
"+Inf": 100,
345345
}
346346

347347
foundBuckets := make(map[string]bool)
348348
for _, ts := range bucketTimeseries {
349349
assertLabelPresent(t, ts.Labels, "__name__", metricName+"_bucket")
350-
350+
351351
// Find the 'le' label value
352352
var leValue string
353353
for _, label := range ts.Labels {
@@ -356,38 +356,42 @@ func TestConvertPrometheusMetricFamilies(t *testing.T) {
356356
break
357357
}
358358
}
359-
359+
360360
r.NotEmpty(leValue, "Bucket timeseries should have 'le' label")
361-
361+
362362
expectedCount, exists := expectedBuckets[leValue]
363363
r.True(exists, "Unexpected bucket with le=%s", leValue)
364364
r.Equal(expectedCount, ts.Samples[0].Value)
365365
foundBuckets[leValue] = true
366366
}
367-
367+
368368
// Ensure all expected buckets were found
369369
r.Len(foundBuckets, len(expectedBuckets))
370370

371371
// Verify _sum timeseries
372372
r.NotNil(sumTimeseries)
373373
assertLabelPresent(t, sumTimeseries.Labels, "__name__", metricName+"_sum")
374374
r.Equal(sampleSum, sumTimeseries.Samples[0].Value)
375-
375+
376376
// Verify _sum doesn't have 'le' label
377377
for _, label := range sumTimeseries.Labels {
378378
r.NotEqual("le", label.Name, "_sum should not have 'le' label")
379379
}
380380

381-
// Verify _count timeseries
381+
// Verify _count timeseries
382382
r.NotNil(countTimeseries)
383383
assertLabelPresent(t, countTimeseries.Labels, "__name__", metricName+"_count")
384384
r.Equal(float64(sampleCount), countTimeseries.Samples[0].Value)
385-
385+
386386
// Verify _count doesn't have 'le' label
387387
for _, label := range countTimeseries.Labels {
388388
r.NotEqual("le", label.Name, "_count should not have 'le' label")
389389
}
390390
})
391+
392+
t.Run("summary", func(t *testing.T) {
393+
// TODO: unit test for summary metrics
394+
})
391395
}
392396

393397
func assertLabelPresent(t *testing.T, labels []PrometheusLabel, name, value string) {

internal/metrics/custom_metrics.go

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,41 @@ import (
77
"github.com/prometheus/client_golang/prometheus"
88
)
99

10-
// actionCounter tracks actions executed by the cluster controller.
11-
var actionCounter = prometheus.NewCounterVec(
10+
var actionStartedCounter = prometheus.NewCounterVec(
1211
prometheus.CounterOpts{
13-
Name: "action_executed_total",
14-
Help: "Count of successful and unsuccessful actions executed by type.",
12+
Name: "action_started_total",
13+
Help: "Count of actions started by type.",
1514
},
16-
[]string{"success", "type"},
15+
[]string{"type"},
1716
)
1817

19-
// actionDuration tracks the duration of actions executed by the cluster controller.
20-
var actionExecutedDuration = prometheus.NewHistogramVec(
21-
prometheus.HistogramOpts{
22-
Name: "action_executed_duration_seconds",
23-
Help: "Duration of action handle execution in seconds.",
24-
Buckets: prometheus.ExponentialBucketsRange(0.01, 30, 7),
18+
// actionExecutedCounter tracks actions executed by the cluster controller.
19+
var actionExecutedCounter = prometheus.NewCounterVec(
20+
prometheus.CounterOpts{
21+
Name: "action_executed_total",
22+
Help: "Count of successful and unsuccessful actions executed by type.",
2523
},
26-
[]string{"type"},
24+
[]string{"success", "type"},
2725
)
2826

29-
var actionExecutedDurationSummary = prometheus.NewSummaryVec(
27+
var actionExecutedDuration = prometheus.NewSummaryVec(
3028
prometheus.SummaryOpts{
31-
Name: "action_executed_duration_summary_seconds",
32-
Help: "Duration of action handle execution in seconds.",
29+
Name: "action_executed_duration_seconds",
30+
Help: "Duration of actions executed by type.",
3331
Objectives: map[float64]float64{
34-
0.5: 0.05, // 50th percentile (median) with 5% error
35-
0.9: 0.01, // 90th percentile with 1% error
36-
0.99: 0.001, // 99th percentile with 0.1% error
32+
0.5: 0.05,
33+
0.9: 0.01,
34+
0.99: 0.001,
3735
},
3836
},
3937
[]string{"type"},
4038
)
4139

40+
func ActionStarted(actionType string) {
41+
actionStartedCounter.With(prometheus.Labels{"type": actionType}).Inc()
42+
}
43+
4244
func ActionFinished(actionType string, success bool, duration time.Duration) {
43-
actionCounter.With(prometheus.Labels{"success": strconv.FormatBool(success), "type": actionType}).Inc()
45+
actionExecutedCounter.With(prometheus.Labels{"success": strconv.FormatBool(success), "type": actionType}).Inc()
4446
actionExecutedDuration.With(prometheus.Labels{"type": actionType}).Observe(duration.Seconds())
45-
actionExecutedDurationSummary.With(prometheus.Labels{"type": actionType}).Observe(duration.Seconds())
4647
}

0 commit comments

Comments
 (0)