This repository was archived by the owner on Sep 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Expand file tree
/
Copy pathprovisioning.go
More file actions
147 lines (130 loc) · 7.23 KB
/
provisioning.go
File metadata and controls
147 lines (130 loc) · 7.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package shared
import (
"fmt"
"strings"
"time"
"github.com/sourcegraph/sourcegraph/monitoring/monitoring"
)
// Provisioning indicator overviews - these provide long-term overviews of container
// resource usage. The goal of these observables are to provide guidance on whether or not
// a service requires more or less resources.
//
// These observables should only use cAdvisor metrics, and are thus only available on
// Kubernetes and docker-compose deployments.
const TitleResourceUtilization = "Resource utilization (not available on server)"
var (
ProvisioningCPUUsageLongTerm sharedObservable = func(containerName string, owner monitoring.ObservableOwner) Observable {
return Observable{
Name: "provisioning_container_cpu_usage_long_term",
Description: "container cpu usage total (90th percentile over 1d) across all cores by instance",
Query: fmt.Sprintf(`quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{%s}[1d])`, CadvisorContainerNameMatcher(containerName)),
Warning: monitoring.Alert().GreaterOrEqual(80).For(14 * 24 * time.Hour),
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Percentage).Max(100).Min(0),
Owner: owner,
NextSteps: strings.ReplaceAll(`
- **Kubernetes:** Consider increasing CPU limits in the 'Deployment.yaml' for the {{CONTAINER_NAME}} service.
- **Docker Compose:** Consider increasing 'cpus:' of the {{CONTAINER_NAME}} container in 'docker-compose.yml'.
`, "{{CONTAINER_NAME}}", containerName),
}
}
ProvisioningMemoryUsageLongTerm sharedObservable = func(containerName string, owner monitoring.ObservableOwner) Observable {
return Observable{
Name: "provisioning_container_memory_usage_long_term",
Description: "container memory usage (1d maximum) by instance",
Query: fmt.Sprintf(`max_over_time(cadvisor_container_memory_usage_percentage_total{%s}[1d])`, CadvisorContainerNameMatcher(containerName)),
Warning: monitoring.Alert().GreaterOrEqual(80).For(14 * 24 * time.Hour),
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Percentage).Max(100).Min(0),
Owner: owner,
NextSteps: strings.ReplaceAll(`
- **Kubernetes:** Consider increasing memory limits in the 'Deployment.yaml' for the {{CONTAINER_NAME}} service.
- **Docker Compose:** Consider increasing 'memory:' of the {{CONTAINER_NAME}} container in 'docker-compose.yml'.
`, "{{CONTAINER_NAME}}", containerName),
}
}
ProvisioningCPUUsageShortTerm sharedObservable = func(containerName string, owner monitoring.ObservableOwner) Observable {
return Observable{
Name: "provisioning_container_cpu_usage_short_term",
Description: "container cpu usage total (5m maximum) across all cores by instance",
Query: fmt.Sprintf(`max_over_time(cadvisor_container_cpu_usage_percentage_total{%s}[5m])`, CadvisorContainerNameMatcher(containerName)),
Warning: monitoring.Alert().GreaterOrEqual(90).For(30 * time.Minute),
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: owner,
NextSteps: strings.ReplaceAll(`
- **Kubernetes:** Consider increasing CPU limits in the the relevant 'Deployment.yaml'.
- **Docker Compose:** Consider increasing 'cpus:' of the {{CONTAINER_NAME}} container in 'docker-compose.yml'.
`, "{{CONTAINER_NAME}}", containerName),
}
}
ProvisioningMemoryUsageShortTerm sharedObservable = func(containerName string, owner monitoring.ObservableOwner) Observable {
return Observable{
Name: "provisioning_container_memory_usage_short_term",
Description: "container memory usage (5m maximum) by instance",
Query: fmt.Sprintf(`max_over_time(cadvisor_container_memory_usage_percentage_total{%s}[5m])`, CadvisorContainerNameMatcher(containerName)),
Warning: monitoring.Alert().GreaterOrEqual(90),
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Percentage).Interval(100).Max(100).Min(0),
Owner: owner,
NextSteps: strings.ReplaceAll(`
- **Kubernetes:** Consider increasing memory limit in relevant 'Deployment.yaml'.
- **Docker Compose:** Consider increasing 'memory:' of {{CONTAINER_NAME}} container in 'docker-compose.yml'.
`, "{{CONTAINER_NAME}}", containerName),
}
}
ContainerOOMKILLEvents sharedObservable = func(containerName string, owner monitoring.ObservableOwner) Observable {
return Observable{
Name: "container_oomkill_events_total",
Description: "container OOMKILL events total by instance",
Query: fmt.Sprintf(`max by (name) (container_oom_events_total{%s})`, CadvisorContainerNameMatcher(containerName)),
Warning: monitoring.Alert().GreaterOrEqual(1),
Panel: monitoring.Panel().LegendFormat("{{name}}"),
Owner: owner,
Interpretation: `
This value indicates the total number of times the container main process or child processes were terminated by OOM killer.
When it occurs frequently, it is an indicator of underprovisioning.
`,
NextSteps: strings.ReplaceAll(`
- **Kubernetes:** Consider increasing memory limit in relevant 'Deployment.yaml'.
- **Docker Compose:** Consider increasing 'memory:' of {{CONTAINER_NAME}} container in 'docker-compose.yml'.
`, "{{CONTAINER_NAME}}", containerName),
}
}
)
type ContainerProvisioningIndicatorsGroupOptions struct {
// LongTermCPUUsage transforms the default observable used to construct the long-term CPU usage panel.
LongTermCPUUsage ObservableOption
// LongTermMemoryUsage transforms the default observable used to construct the long-term memory usage panel.
LongTermMemoryUsage ObservableOption
// ShortTermCPUUsage transforms the default observable used to construct the short-term CPU usage panel.
ShortTermCPUUsage ObservableOption
// ShortTermMemoryUsage transforms the default observable used to construct the short-term memory usage panel.
ShortTermMemoryUsage ObservableOption
OOMKILLEvents ObservableOption
// CustomTitle, if provided, provides a custom title for this provisioning group that will be displayed in Grafana.
CustomTitle string
}
// NewProvisioningIndicatorsGroup creates a group containing panels displaying
// provisioning indication metrics - long and short term usage for both CPU and
// memory usage - for the given container.
func NewProvisioningIndicatorsGroup(containerName string, owner monitoring.ObservableOwner, options *ContainerProvisioningIndicatorsGroupOptions) monitoring.Group {
if options == nil {
options = &ContainerProvisioningIndicatorsGroupOptions{}
}
title := TitleResourceUtilization
if options.CustomTitle != "" {
title = options.CustomTitle
}
return monitoring.Group{
Title: title,
Hidden: true,
Rows: []monitoring.Row{
{
options.LongTermCPUUsage.safeApply(ProvisioningCPUUsageLongTerm(containerName, owner)).Observable(),
options.LongTermMemoryUsage.safeApply(ProvisioningMemoryUsageLongTerm(containerName, owner)).Observable(),
},
{
options.ShortTermCPUUsage.safeApply(ProvisioningCPUUsageShortTerm(containerName, owner)).Observable(),
options.ShortTermMemoryUsage.safeApply(ProvisioningMemoryUsageShortTerm(containerName, owner)).Observable(),
options.OOMKILLEvents.safeApply(ContainerOOMKILLEvents(containerName, owner)).Observable(),
},
},
}
}