Skip to content

Commit 57ebc3c

Browse files
committed
Add pod_completed_total
1 parent eafab75 commit 57ebc3c

File tree

2 files changed

+39
-4
lines changed

2 files changed

+39
-4
lines changed

audit-policy.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ rules:
1111
resources:
1212
- pods
1313
- pods/binding
14+
- pods/status
1415
- group: batch
1516
resources:
1617
- jobs

exporter/metrics.go

+38-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ var (
2323
Help: "Total number of pods deleted",
2424
}, []string{"cluster", "namespace", "user", "phase"})
2525

26+
podCompletedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
27+
Name: "pod_completed_total",
28+
Help: "Total number of pods transitioned to completed status",
29+
}, []string{"cluster", "namespace", "user", "phase"})
30+
2631
podSchedulingLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
2732
Name: "pod_scheduling_latency_seconds",
2833
Help: "Duration from pod creation to scheduled on node in seconds",
@@ -42,6 +47,7 @@ func init() {
4247
podSchedulingLatency,
4348
podDeletedTotal,
4449
batchJobCompleteLatency,
50+
podCompletedTotal,
4551
)
4652
}
4753

@@ -118,16 +124,17 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
118124
} else {
119125
p.podCreationTimes[target] = nil
120126
}
121-
} else if event.Verb == "delete" {
122-
delete(p.podCreationTimes, buildTarget(event.ObjectRef))
123-
124-
if event.ResponseObject != nil {
127+
} else if event.Verb == "delete" && event.ResponseObject != nil {
128+
target := buildTarget(event.ObjectRef)
129+
_, ok := p.podCreationTimes[target]
130+
if ok {
125131
var pod Pod
126132
if err := json.Unmarshal(event.ResponseObject.Raw, &pod); err != nil {
127133
slog.Error("failed to unmarshal pod during delete", "err", err)
128134
return
129135
}
130136

137+
delete(p.podCreationTimes, target)
131138
user := extractUserAgent(event.UserAgent)
132139
podDeletedTotal.WithLabelValues(
133140
clusterLabel,
@@ -136,6 +143,31 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
136143
pod.Status.Phase,
137144
).Inc()
138145
}
146+
} else if (event.Verb == "update" || event.Verb == "patch") &&
147+
event.ObjectRef.Subresource == "status" &&
148+
event.ResponseObject != nil {
149+
150+
target := buildTarget(event.ObjectRef)
151+
t, ok := p.podCreationTimes[target]
152+
if ok && t == nil {
153+
var pod Pod
154+
if err := json.Unmarshal(event.ResponseObject.Raw, &pod); err != nil {
155+
slog.Error("failed to unmarshal new pod during update", "err", err)
156+
return
157+
}
158+
159+
phase := pod.Status.Phase
160+
if podCompletedPhases[phase] {
161+
delete(p.podCreationTimes, target)
162+
user := extractUserAgent(event.UserAgent)
163+
podCompletedTotal.WithLabelValues(
164+
clusterLabel,
165+
ns,
166+
user,
167+
phase,
168+
).Inc()
169+
}
170+
}
139171
}
140172
}
141173

@@ -180,3 +212,5 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
180212
}
181213
}
182214
}
215+
216+
var podCompletedPhases = map[string]bool{"Succeeded": true, "Failed": true}

0 commit comments

Comments
 (0)