23
23
Help : "Total number of pods deleted" ,
24
24
}, []string {"cluster" , "namespace" , "user" , "phase" })
25
25
26
+ podCompletedTotal = prometheus .NewCounterVec (prometheus.CounterOpts {
27
+ Name : "pod_completed_total" ,
28
+ Help : "Total number of pods transitioned to completed status" ,
29
+ }, []string {"cluster" , "namespace" , "user" , "phase" })
30
+
26
31
podSchedulingLatency = prometheus .NewHistogramVec (prometheus.HistogramOpts {
27
32
Name : "pod_scheduling_latency_seconds" ,
28
33
Help : "Duration from pod creation to scheduled on node in seconds" ,
@@ -42,6 +47,7 @@ func init() {
42
47
podSchedulingLatency ,
43
48
podDeletedTotal ,
44
49
batchJobCompleteLatency ,
50
+ podCompletedTotal ,
45
51
)
46
52
}
47
53
@@ -118,16 +124,17 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
118
124
} else {
119
125
p .podCreationTimes [target ] = nil
120
126
}
121
- } else if event .Verb == "delete" {
122
- delete ( p . podCreationTimes , buildTarget (event .ObjectRef ) )
123
-
124
- if event . ResponseObject != nil {
127
+ } else if event .Verb == "delete" && event . ResponseObject != nil {
128
+ target := buildTarget (event .ObjectRef )
129
+ _ , ok := p . podCreationTimes [ target ]
130
+ if ok {
125
131
var pod Pod
126
132
if err := json .Unmarshal (event .ResponseObject .Raw , & pod ); err != nil {
127
133
slog .Error ("failed to unmarshal pod during delete" , "err" , err )
128
134
return
129
135
}
130
136
137
+ delete (p .podCreationTimes , target )
131
138
user := extractUserAgent (event .UserAgent )
132
139
podDeletedTotal .WithLabelValues (
133
140
clusterLabel ,
@@ -136,6 +143,31 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
136
143
pod .Status .Phase ,
137
144
).Inc ()
138
145
}
146
+ } else if (event .Verb == "update" || event .Verb == "patch" ) &&
147
+ event .ObjectRef .Subresource == "status" &&
148
+ event .ResponseObject != nil {
149
+
150
+ target := buildTarget (event .ObjectRef )
151
+ t , ok := p .podCreationTimes [target ]
152
+ if ok && t == nil {
153
+ var pod Pod
154
+ if err := json .Unmarshal (event .ResponseObject .Raw , & pod ); err != nil {
155
+ slog .Error ("failed to unmarshal new pod during update" , "err" , err )
156
+ return
157
+ }
158
+
159
+ phase := pod .Status .Phase
160
+ if podCompletedPhases [phase ] {
161
+ delete (p .podCreationTimes , target )
162
+ user := extractUserAgent (event .UserAgent )
163
+ podCompletedTotal .WithLabelValues (
164
+ clusterLabel ,
165
+ ns ,
166
+ user ,
167
+ phase ,
168
+ ).Inc ()
169
+ }
170
+ }
139
171
}
140
172
}
141
173
@@ -180,3 +212,5 @@ func (p *Exporter) updateMetrics(clusterLabel string, event auditv1.Event) {
180
212
}
181
213
}
182
214
}
215
+
216
+ var podCompletedPhases = map [string ]bool {"Succeeded" : true , "Failed" : true }
0 commit comments