Skip to content

Commit 29fc06f

Browse files
authored
Merge pull request #1027 from k8up-io/fix/false-positive-backups
Fail if the backup command in the annotation exits != 0
2 parents 88f8de7 + 194e331 commit 29fc06f

File tree

10 files changed

+116
-8
lines changed

10 files changed

+116
-8
lines changed

cmd/operator/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ var (
4141
&cli.StringFlag{Destination: &cfg.Config.FileExtensionAnnotation, Name: "fileextensionannotation", EnvVars: []string{"BACKUP_FILEEXTENSIONANNOTATION"}, Value: "k8up.io/file-extension", Usage: "set the annotation name where the file extension is stored for backup commands"},
4242

4343
&cli.IntFlag{Destination: &cfg.Config.GlobalKeepJobs, Hidden: true, Name: "globalkeepjobs", EnvVars: []string{"BACKUP_GLOBALKEEPJOBS"}, Value: -1, DefaultText: "unlimited", Usage: "set the number of old jobs to keep when cleaning up, applies to all job types"},
44+
&cli.IntFlag{Destination: &cfg.Config.GlobalBackoffLimit, Name: "global-backoff-limit", EnvVars: []string{"BACKUP_GLOBAL_BACKOFF_LIMIT"}, Value: 6, Usage: "set the backoff limit for all backup jobs"},
4445
&cli.IntFlag{Destination: &cfg.Config.GlobalFailedJobsHistoryLimit, Name: "global-failed-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_FAILED_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, failed jobs to keep when cleaning up, applies to all job types"},
4546
&cli.IntFlag{Destination: &cfg.Config.GlobalSuccessfulJobsHistoryLimit, Name: "global-successful-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_SUCCESSFUL_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, successful jobs to keep when cleaning up, applies to all job types"},
4647
&cli.IntFlag{Destination: &cfg.Config.GlobalConcurrentArchiveJobsLimit, Name: "global-concurrent-archive-jobs-limit", EnvVars: []string{"BACKUP_GLOBAL_CONCURRENT_ARCHIVE_JOBS_LIMIT"}, DefaultText: "unlimited", Usage: "set the limit of concurrent archive jobs"},
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
apiVersion: apps/v1
3+
kind: Deployment
4+
metadata:
5+
name: annotated-subject-deployment
6+
namespace: k8up-e2e-subject
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: subject
12+
template:
13+
metadata:
14+
labels:
15+
app: subject
16+
annotations:
17+
k8up.io/backupcommand: 'invalid'
18+
k8up.io/backupcommand-container: subject-container
19+
spec:
20+
containers:
21+
- image: busybox
22+
imagePullPolicy: IfNotPresent
23+
name: dummy-container-blocking-first-position
24+
command:
25+
- "/bin/sh"
26+
- "-c"
27+
- "sleep infinity"
28+
- name: subject-container
29+
image: quay.io/prometheus/busybox:latest
30+
imagePullPolicy: IfNotPresent
31+
args:
32+
- sh
33+
- -c
34+
- |
35+
sleep infinity
36+
securityContext:
37+
runAsUser: $ID
38+
volumeMounts:
39+
- name: volume
40+
mountPath: /data
41+
env:
42+
- name: BACKUP_FILE_CONTENT
43+
value: ""
44+
- name: BACKUP_FILE_NAME
45+
value: ""
46+
volumes:
47+
- name: volume
48+
persistentVolumeClaim:
49+
claimName: subject-pvc

e2e/definitions/operator/deploy.yaml

Lines changed: 0 additions & 4 deletions
This file was deleted.

e2e/definitions/operator/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ k8up:
99
envVars:
1010
- name: K8UP_DEBUG
1111
value: "true"
12+
- name: BACKUP_GLOBAL_BACKOFF_LIMIT
13+
value: "2"

e2e/lib/k8up.bash

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,15 @@ given_an_annotated_subject() {
170170
echo "✅ The annotated subject is ready"
171171
}
172172

173+
given_a_broken_annotated_subject() {
174+
require_args 2 ${#}
175+
176+
kubectl apply -f definitions/pv/pvc.yaml
177+
yq e '.spec.template.spec.containers[1].securityContext.runAsUser='$(id -u)' ' definitions/annotated-subject/deployment-error.yaml | kubectl apply -f -
178+
179+
echo "✅ The annotated subject is ready"
180+
}
181+
173182
given_an_annotated_subject_pod() {
174183
require_args 2 ${#}
175184

@@ -457,6 +466,20 @@ wait_until() {
457466
kubectl -n "${ns}" wait --timeout 5m --for "condition=${condition}" "${object}"
458467
}
459468

469+
wait_for_until_jsonpath() {
470+
require_args 3 ${#}
471+
472+
local object condition ns
473+
object=${1}
474+
until=${2}
475+
jsonpath=${3}
476+
ns=${NAMESPACE=${DETIK_CLIENT_NAMESPACE}}
477+
478+
echo "Waiting for '${object}' in namespace '${ns}' to become '${condition}' ..."
479+
kubectl -n "${ns}" wait --timeout "${until}" --for "${jsonpath}" "${object}"
480+
}
481+
482+
460483
expect_file_in_container() {
461484
require_args 4 ${#}
462485

e2e/test-12-annotated-failure.bats

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env bats
2+
3+
load "lib/utils"
4+
load "lib/detik"
5+
load "lib/k8up"
6+
7+
# shellcheck disable=SC2034
8+
DETIK_CLIENT_NAME="kubectl"
9+
# shellcheck disable=SC2034
10+
DETIK_CLIENT_NAMESPACE="k8up-e2e-subject"
11+
# shellcheck disable=SC2034
12+
DEBUG_DETIK="true"
13+
14+
@test "Annotated app, When creating a backup, Then expect Error" {
15+
expected_content="expected content: $(timestamp)"
16+
expected_filename="expected_filename.txt"
17+
18+
given_a_running_operator
19+
given_a_clean_ns
20+
given_s3_storage
21+
given_a_broken_annotated_subject "${expected_filename}" "${expected_content}"
22+
23+
kubectl apply -f definitions/secrets
24+
yq e '.spec.podSecurityContext.runAsUser='$(id -u)'' definitions/backup/backup.yaml | kubectl apply -f -
25+
26+
try "at most 10 times every 5s to get backup named 'k8up-backup' and verify that '.status.started' is 'true'"
27+
verify_object_value_by_label job 'k8up.io/owned-by=backup_k8up-backup' '.status.active' 1 true
28+
29+
wait_for_until_jsonpath backup/k8up-backup 2m 'jsonpath={.status.conditions[?(@.type=="Completed")].reason}=Failed'
30+
31+
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/k8up-io/k8up/v2
22

3-
go 1.21
3+
go 1.23
44

55
require (
66
github.com/firepear/qsplit/v2 v2.5.0

operator/backupcontroller/executor.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
corev1 "k8s.io/api/core/v1"
1919
"k8s.io/apimachinery/pkg/labels"
2020
"k8s.io/apimachinery/pkg/types"
21+
"k8s.io/utils/ptr"
2122
controllerruntime "sigs.k8s.io/controller-runtime"
2223
"sigs.k8s.io/controller-runtime/pkg/client"
2324
)
@@ -236,7 +237,7 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
236237
}
237238

238239
index := 0
239-
for _, batchJob := range backupJobs {
240+
for name, batchJob := range backupJobs {
240241
_, err = controllerruntime.CreateOrUpdate(ctx, b.Generic.Config.Client, batchJob.job, func() error {
241242
mutateErr := job.MutateBatchJob(ctx, batchJob.job, b.backup, b.Generic.Config, b.Client)
242243
if mutateErr != nil {
@@ -262,16 +263,17 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
262263
}
263264
// each job sleeps for index seconds to avoid concurrent restic repository creation. Not the prettiest way but it works and a repository
264265
// is created only once usually.
265-
if index > 0 {
266+
if name == "prebackup" || index != 0 {
266267
batchJob.job.Spec.Template.Spec.Containers[0].Env = append(batchJob.job.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{
267268
Name: "SLEEP_DURATION",
268-
Value: (time.Duration(index) * time.Second).String(),
269+
Value: (3 * time.Second).String(),
269270
})
270271
}
271272
b.backup.Spec.AppendEnvFromToContainer(&batchJob.job.Spec.Template.Spec.Containers[0])
272273
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, batchJob.volumes...)
273274
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, utils.AttachTLSVolumes(b.backup.Spec.Volumes)...)
274275
batchJob.job.Spec.Template.Spec.Containers[0].VolumeMounts = append(b.newVolumeMounts(batchJob.volumes), b.attachTLSVolumeMounts()...)
276+
batchJob.job.Spec.BackoffLimit = ptr.To(int32(cfg.Config.GlobalBackoffLimit))
275277

276278
batchJob.job.Spec.Template.Spec.Containers[0].Args = b.setupArgs()
277279

operator/cfg/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type Configuration struct {
5151
GlobalKeepJobs int
5252
GlobalFailedJobsHistoryLimit int
5353
GlobalSuccessfulJobsHistoryLimit int
54+
GlobalBackoffLimit int
5455
GlobalRepoPassword string
5556
GlobalRestoreS3AccessKey string
5657
GlobalRestoreS3Bucket string

restic/kubernetes/pod_exec.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"io"
7+
"os"
78
"strings"
89

910
"github.com/firepear/qsplit/v2"
@@ -73,6 +74,8 @@ func PodExec(pod BackupPod, log logr.Logger) (*ExecData, error) {
7374

7475
if err != nil {
7576
execLogger.Error(err, "streaming data failed", "namespace", pod.Namespace, "pod", pod.PodName)
77+
// we just completely hard fail the whole backup pod
78+
os.Exit(1)
7679
return
7780
}
7881
}()

0 commit comments

Comments
 (0)