Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ae2c346
Update charts to provide optional operator metrics
jbiers Jan 15, 2025
a8f216f
Update BRO code to optionally export Backup metrics
jbiers Jan 17, 2025
34e656b
Make metrics conditional to metricsServerEnabled
jbiers Jan 17, 2025
872de73
Set debug back to false
jbiers Jan 22, 2025
918a6a8
Refactor metrics and create separate loop for metadata metrics
jbiers Feb 6, 2025
883c1bf
Fix wrong logrus.Error reference
jbiers Feb 6, 2025
a18241c
Fix logs
jbiers Feb 6, 2025
69a3026
Fix formatting
jbiers Feb 6, 2025
9d1a835
Add function for collecting time-related metrics and some fixes
jbiers Feb 6, 2025
7823810
Add function for emitting metrics on processed backups
jbiers Feb 6, 2025
2cdff68
Move metrics collection and bump histogram buckets
jbiers Feb 7, 2025
1172069
Rebase with main and fix go.sum
jbiers Feb 7, 2025
13ad4b1
Add values to hull tests
jbiers Feb 7, 2025
734ed78
Improve hull tests
jbiers Feb 7, 2025
958ad20
Add relabelings to hull tests
jbiers Feb 7, 2025
166853e
More hull fixes
jbiers Feb 7, 2025
f3922a6
Backup metrics testing
jbiers Feb 7, 2025
0bb6c98
Restore metrics testing
jbiers Feb 11, 2025
c732b03
Use early-return in settings backups_failed metric
jbiers Feb 11, 2025
dd28984
Make histogram buckets less granular
jbiers Feb 11, 2025
cb7ab1b
Add very basic unit tests for metrics that are complex to predict in e2e
mallardduck Feb 11, 2025
5cf1221
Merge pull request #1 from mallardduck/prom-metrics-unit
jbiers Feb 11, 2025
8d6b866
Add recurring backup unit test for time-sensitive metrics
jbiers Feb 11, 2025
cfa6b13
Fixes from feedback
jbiers Feb 12, 2025
63a912c
Update tests to match new buckets
jbiers Feb 12, 2025
fc2604f
Replace unnecessary Sprintf functions
jbiers Feb 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions charts/rancher-backup/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ spec:
annotations:
checksum/s3: {{ include (print $.Template.BasePath "/s3-secret.yaml") . | sha256sum }}
checksum/pvc: {{ include (print $.Template.BasePath "/pvc.yaml") . | sha256sum }}
{{- if .Values.monitoring.metrics.enabled }}
prometheus.io/port: "metrics"
prometheus.io/scrape: "true"
{{ end }}
spec:
serviceAccountName: {{ include "backupRestore.serviceAccountName" . }}
{{- if .Values.imagePullSecrets }}
Expand All @@ -32,6 +36,8 @@ spec:
- name: {{ .Chart.Name }}
image: {{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ default "Always" .Values.imagePullPolicy }}
ports:
- containerPort: 8080
args:
{{- if .Values.debug }}
- "--debug"
Expand All @@ -54,6 +60,10 @@ spec:
- name: NO_PROXY
value: {{ .Values.noProxy }}
{{- end }}
{{- if .Values.monitoring.metrics.enabled }}
- name: METRICS_SERVER
value: "true"
{{ end }}
{{- if .Values.persistence.enabled }}
- name: DEFAULT_PERSISTENCE_ENABLED
value: "persistence-enabled"
Expand Down
2 changes: 2 additions & 0 deletions charts/rancher-backup/templates/hardened.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ metadata:
namespace: {{ .Release.Namespace }}
spec:
podSelector: {}
ingress:
- {}
egress:
- {}
policyTypes:
Expand Down
30 changes: 30 additions & 0 deletions charts/rancher-backup/templates/service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{{ if and (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1") .Values.monitoring.serviceMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "backupRestore.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "backupRestore.labels" . | nindent 4 }}
{{- with .Values.monitoring.serviceMonitor.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
selector:
matchLabels:
{{- include "backupRestore.labels" . | nindent 6 }}
endpoints:
- port: http
path: /metrics
{{- with .Values.monitoring.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.monitoring.serviceMonitor.relabelings }}
relabelings:
{{- toYaml . | nindent 4 }}
{{- end }}
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
{{- end }}
24 changes: 24 additions & 0 deletions charts/rancher-backup/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{ if .Values.monitoring.metrics.enabled }}
apiVersion: v1
kind: Service
metadata:
name: {{ include "backupRestore.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "backupRestore.labels" . | nindent 4 }}
annotations:
prometheus.io/path: /metrics
prometheus.io/port: /8080
prometheus.io/scrape: "true"
spec:
type: ClusterIP
clusterIP: None
ports:
- port: 8080
targetPort: 8080
protocol: TCP
name: http
selector:
app.kubernetes.io/name: {{ .Release.Name }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{ end }}
11 changes: 11 additions & 0 deletions charts/rancher-backup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,14 @@ imagePullPolicy: "Always"
## Optional array of imagePullSecrets containing private registry credentials
## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets: []

monitoring:
metrics:
enabled: false

serviceMonitor:
enabled: false

additionalLabels: {}
metricRelabelings: []
relabelings: []
5 changes: 5 additions & 0 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ var (
LocalBackupStorageLocation = "/var/lib/backups" // local within the pod, this is the mountPath for PVC
KubeConfig string
OperatorPVEnabled string
MetricsServerEnabled string
OperatorS3BackupStorageLocation string
ChartNamespace string
Debug bool
Expand All @@ -36,6 +37,7 @@ func init() {
OperatorPVEnabled = os.Getenv("DEFAULT_PERSISTENCE_ENABLED")
OperatorS3BackupStorageLocation = os.Getenv("DEFAULT_S3_BACKUP_STORAGE_LOCATION")
ChartNamespace = os.Getenv("CHART_NAMESPACE")
MetricsServerEnabled = os.Getenv("METRICS_SERVER")
}

func main() {
Expand All @@ -60,6 +62,9 @@ func main() {
backuputil.SetDevMode(dm != "")
runOptions := operator.RunOptions{
OperatorPVCEnabled: OperatorPVEnabled != "",
MetricsServerEnabled: MetricsServerEnabled != "",
MetricsPort: 8080,
MetricsIntervalSeconds: 60,
OperatorS3BackupStorageLocation: OperatorS3BackupStorageLocation,
ChartNamespace: ChartNamespace,
LocalDriverPath: "",
Expand Down
Loading