Skip to content

Commit 78dd453

Browse files
committed
add ipamd readiness latency metrics
1 parent a1d8f06 commit 78dd453

2 files changed

Lines changed: 36 additions & 1 deletion

File tree

cmd/aws-k8s-agent/main.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package main
1717
import (
1818
"context"
1919
"os"
20+
"strconv"
2021
"time"
2122

2223
"github.com/aws/amazon-vpc-cni-k8s/pkg/ipamd"
@@ -90,6 +91,9 @@ func startBackgroundAPIServerCheck(ipamContext *ipamd.IPAMContext) {
9091
}
9192

9293
func _main() int {
94+
// Start measuring full startup duration
95+
startupStartTime := time.Now()
96+
9397
// Do not add anything before initializing logger
9498
log := logger.Get()
9599

@@ -105,6 +109,8 @@ func _main() int {
105109
log.Info("SGP, custom networking or pod annotation feature is in use, waiting for API server connectivity to start IPAMD")
106110
if err := k8sapi.CheckAPIServerConnectivity(); err != nil {
107111
log.Errorf("Failed to check API server connectivity: %s", err)
112+
// Record failed startup
113+
metrics.IpamdStartupDuration.WithLabelValues("false", strconv.FormatBool(withApiServer), "api_server_connectivity").Observe(time.Since(startupStartTime).Seconds())
108114
return 1
109115
} else {
110116
log.Info("API server connectivity established.")
@@ -131,12 +137,22 @@ func _main() int {
131137
log.Errorf("Failed to create event recorder: %s", err)
132138
log.Warn("Skipping event recorder initialization")
133139
}
140+
// Measure AWS initialization duration
141+
awsStartTime := time.Now()
134142
ipamContext, err := ipamd.New(k8sClient, withApiServer)
143+
awsDuration := time.Since(awsStartTime).Seconds()
144+
135145
if err != nil {
136146
log.Errorf("Initialization failure: %v", err)
147+
// Record failed AWS initialization and failed startup
148+
metrics.IpamdStartupAwsDuration.WithLabelValues("false").Observe(awsDuration)
149+
metrics.IpamdStartupDuration.WithLabelValues("false", strconv.FormatBool(withApiServer), "aws_initialization").Observe(time.Since(startupStartTime).Seconds())
137150
return 1
138151
}
139152

153+
// Record successful AWS initialization
154+
metrics.IpamdStartupAwsDuration.WithLabelValues("true").Observe(awsDuration)
155+
140156
// If not connected to API server yet, start background checks
141157
if !withApiServer {
142158
startBackgroundAPIServerCheck(ipamContext)
@@ -155,7 +171,10 @@ func _main() int {
155171
go ipamContext.ServeIntrospection()
156172
}
157173

158-
// Start the RPC listener
174+
// Record successful startup duration before the blocking RPC handler call
175+
metrics.IpamdStartupDuration.WithLabelValues("true", strconv.FormatBool(withApiServer), "").Observe(time.Since(startupStartTime).Seconds())
176+
177+
// Start the RPC listener (this is a blocking call)
159178
err = ipamContext.RunRPCHandler(version.Version)
160179
if err != nil {
161180
log.Errorf("Failed to set up gRPC handler: %v", err)

utils/prometheusmetrics/prometheusmetrics.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,20 @@ var (
172172
},
173173
[]string{"eni"},
174174
)
175+
IpamdStartupDuration = prometheus.NewHistogramVec(
176+
prometheus.HistogramOpts{
177+
Name: "awscni_ipamd_startup_duration_seconds",
178+
Help: "The duration of IPAMD startup from process start to ready to serve CNI requests",
179+
},
180+
[]string{"success", "with_api_server", "failure_reason"},
181+
)
182+
IpamdStartupAwsDuration = prometheus.NewHistogramVec(
183+
prometheus.HistogramOpts{
184+
Name: "awscni_ipamd_startup_aws_duration_seconds",
185+
Help: "The duration of AWS initialization during IPAMD startup",
186+
},
187+
[]string{"success"},
188+
)
175189
)
176190

177191
// ServeMetrics sets up ipamd metrics and introspection endpoints
@@ -225,6 +239,8 @@ func PrometheusRegister() {
225239
prometheus.MustRegister(IpsPerCidr)
226240
prometheus.MustRegister(NoAvailableIPAddrs)
227241
prometheus.MustRegister(EniIPsInUse)
242+
prometheus.MustRegister(IpamdStartupDuration)
243+
prometheus.MustRegister(IpamdStartupAwsDuration)
228244
}
229245

230246
// This can be enhanced to get it programatically.

0 commit comments

Comments
 (0)