Skip to content

Commit e4d5281

Browse files
sambhav-jain-16dims
authored andcommitted
add cpu burst metrics
1 parent cca9bd3 commit e4d5281

7 files changed

Lines changed: 83 additions & 0 deletions

File tree

container/libcontainer/handler.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,8 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
773773
ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods
774774
ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
775775
ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime
776+
ret.Cpu.CFS.BurstsPeriods = s.CpuStats.BurstData.BurstsPeriods
777+
ret.Cpu.CFS.BurstTime = s.CpuStats.BurstData.BurstTime
776778
setPSIStats(s.CpuStats.PSI, &ret.Cpu.PSI)
777779

778780
if !withPerCPU {

info/v1/container.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,13 @@ type CpuCFS struct {
329329
// Total time duration for which tasks in the cgroup have been throttled.
330330
// Unit: nanoseconds.
331331
ThrottledTime uint64 `json:"throttled_time"`
332+
333+
// Total number of periods when CPU burst occurs.
334+
BurstsPeriods uint64 `json:"bursts_periods"`
335+
336+
// Total time duration when CPU burst occurs.
337+
// Unit: nanoseconds.
338+
BurstTime uint64 `json:"burst_time"`
332339
}
333340

334341
// Cpu Aggregated scheduler statistics

metrics/prometheus.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,30 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
234234
timestamp: s.Timestamp,
235235
}}
236236
},
237+
}, {
238+
name: "container_cpu_cfs_burst_periods_total",
239+
help: "Number of periods when burst occurs.",
240+
valueType: prometheus.CounterValue,
241+
condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
242+
getValues: func(s *info.ContainerStats) metricValues {
243+
return metricValues{
244+
{
245+
value: float64(s.Cpu.CFS.BurstsPeriods),
246+
timestamp: s.Timestamp,
247+
}}
248+
},
249+
}, {
250+
name: "container_cpu_cfs_burst_seconds_total",
251+
help: "Total time duration the container has been bursted.",
252+
valueType: prometheus.CounterValue,
253+
condition: func(s info.ContainerSpec) bool { return s.Cpu.Quota != 0 },
254+
getValues: func(s *info.ContainerStats) metricValues {
255+
return metricValues{
256+
{
257+
value: float64(s.Cpu.CFS.BurstTime) / float64(time.Second),
258+
timestamp: s.Timestamp,
259+
}}
260+
},
237261
},
238262
}...)
239263
}

metrics/prometheus_fake.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
320320
Periods: 723,
321321
ThrottledPeriods: 18,
322322
ThrottledTime: 1724314000,
323+
BurstsPeriods: 25,
324+
BurstTime: 500000000,
323325
},
324326
Schedstat: info.CpuSchedstat{
325327
RunTime: 53643567,

metrics/prometheus_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,39 @@ func TestIOCostMetrics(t *testing.T) {
428428
})
429429
}
430430
}
431+
432+
func TestCPUBurstMetrics(t *testing.T) {
433+
containerStats := &info.ContainerStats{
434+
Timestamp: time.Unix(1395066363, 0),
435+
Cpu: info.CpuStats{
436+
CFS: info.CpuCFS{
437+
BurstsPeriods: 25,
438+
BurstTime: 500000000,
439+
},
440+
},
441+
}
442+
443+
testCases := []struct {
444+
name string
445+
getValue func() float64
446+
expectedValue float64
447+
}{
448+
{
449+
name: "BurstsPeriods",
450+
getValue: func() float64 { return float64(containerStats.Cpu.CFS.BurstsPeriods) },
451+
expectedValue: 25.0,
452+
},
453+
{
454+
name: "BurstTime",
455+
getValue: func() float64 { return float64(containerStats.Cpu.CFS.BurstTime) / float64(time.Second) },
456+
expectedValue: 0.5,
457+
},
458+
}
459+
460+
for _, tc := range testCases {
461+
t.Run(tc.name, func(t *testing.T) {
462+
result := tc.getValue()
463+
assert.Equal(t, tc.expectedValue, result)
464+
})
465+
}
466+
}

metrics/testdata/prometheus_metrics

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ container_cpu_cfs_throttled_periods_total{container_env_foo_env="prod",container
1818
# HELP container_cpu_cfs_throttled_seconds_total Total time duration the container has been throttled.
1919
# TYPE container_cpu_cfs_throttled_seconds_total counter
2020
container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.724314 1395066363000
21+
# HELP container_cpu_cfs_burst_periods_total Number of periods when burst occurs.
22+
# TYPE container_cpu_cfs_burst_periods_total counter
23+
container_cpu_cfs_burst_periods_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 25 1395066363000
24+
# HELP container_cpu_cfs_burst_seconds_total Total time duration the container has been bursted.
25+
# TYPE container_cpu_cfs_burst_seconds_total counter
26+
container_cpu_cfs_burst_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000
2127
# HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds.
2228
# TYPE container_cpu_load_average_10s gauge
2329
container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000

metrics/testdata/prometheus_metrics_whitelist_filtered

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ container_cpu_cfs_throttled_periods_total{container_env_foo_env="prod",id="testc
1818
# HELP container_cpu_cfs_throttled_seconds_total Total time duration the container has been throttled.
1919
# TYPE container_cpu_cfs_throttled_seconds_total counter
2020
container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.724314 1395066363000
21+
# HELP container_cpu_cfs_burst_periods_total Number of periods when burst occurs.
22+
# TYPE container_cpu_cfs_burst_periods_total counter
23+
container_cpu_cfs_burst_periods_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 25 1395066363000
24+
# HELP container_cpu_cfs_burst_seconds_total Total time duration the container has been bursted.
25+
# TYPE container_cpu_cfs_burst_seconds_total counter
26+
container_cpu_cfs_burst_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000
2127
# HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds.
2228
# TYPE container_cpu_load_average_10s gauge
2329
container_cpu_load_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000

0 commit comments

Comments
 (0)