Skip to content

Commit 8904ec0

Browse files
chris-rockjpaodev
andauthored
🐛 preserve active jobs when cleaning up completed scan jobs (#1377)
* fix: preserve active jobs when cleaning up completed scan jobs Previously the controller would delete all jobs when updating CronJob specs, including active/running jobs. This could interrupt in-progress scans. The new DeleteCompletedJobs function only deletes jobs that have completed (succeeded or failed), preserving any active jobs. Includes tests for the new behavior. * 🧹 update license header * improve DeleteCompletedJobs robustness and test coverage - Handle NotFound errors on delete (race condition with TTL controller) - Change log level to V(1) to reduce noise during normal operation - Remove unused expectedDeleted field from tests - Add test for jobs with non-matching labels being preserved --------- Co-authored-by: jpaodev <hello@jpao.dev>
1 parent 187d055 commit 8904ec0

2 files changed

Lines changed: 286 additions & 1 deletion

File tree

pkg/utils/k8s/cron_job.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33

44
package k8s
55

6-
import batchv1 "k8s.io/api/batch/v1"
6+
import (
7+
"context"
8+
9+
"github.com/go-logr/logr"
10+
batchv1 "k8s.io/api/batch/v1"
11+
apierrors "k8s.io/apimachinery/pkg/api/errors"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/labels"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
)
716

817
// AreCronJobsSuccessful returns true if the latest runs of all of the provided CronJobs has been
918
// successful.
@@ -17,3 +26,39 @@ func AreCronJobsSuccessful(cs []batchv1.CronJob) bool {
1726
}
1827
return true
1928
}
29+
30+
// DeleteCompletedJobs deletes only completed or failed jobs matching the given labels.
31+
// Active/running jobs are preserved to avoid killing in-progress scans.
32+
func DeleteCompletedJobs(ctx context.Context, kubeClient client.Client, namespace string, jobLabels map[string]string, log logr.Logger) error {
33+
jobList := &batchv1.JobList{}
34+
listOpts := &client.ListOptions{
35+
Namespace: namespace,
36+
LabelSelector: labels.SelectorFromSet(jobLabels),
37+
}
38+
39+
if err := kubeClient.List(ctx, jobList, listOpts); err != nil {
40+
log.Error(err, "Failed to list Jobs", "namespace", namespace)
41+
return err
42+
}
43+
44+
for _, job := range jobList.Items {
45+
// Skip active jobs - only delete completed or failed ones
46+
if job.Status.Active > 0 {
47+
log.V(1).Info("Skipping deletion of active job", "namespace", job.Namespace, "name", job.Name)
48+
continue
49+
}
50+
51+
// Delete the job with foreground propagation to also delete its pods
52+
if err := kubeClient.Delete(ctx, &job, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil {
53+
// Ignore NotFound errors - job may have been deleted by TTL controller or another reconcile
54+
if apierrors.IsNotFound(err) {
55+
continue
56+
}
57+
log.Error(err, "Failed to delete completed job", "namespace", job.Namespace, "name", job.Name)
58+
return err
59+
}
60+
log.V(1).Info("Deleted completed job", "namespace", job.Namespace, "name", job.Name)
61+
}
62+
63+
return nil
64+
}

pkg/utils/k8s/cron_job_test.go

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
// Copyright Mondoo, Inc. 2026
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package k8s
5+
6+
import (
7+
"context"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
batchv1 "k8s.io/api/batch/v1"
13+
corev1 "k8s.io/api/core/v1"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/apimachinery/pkg/runtime"
16+
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
17+
ctrl "sigs.k8s.io/controller-runtime"
18+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
19+
)
20+
21+
func TestAreCronJobsSuccessful(t *testing.T) {
22+
tests := []struct {
23+
name string
24+
cronJobs []batchv1.CronJob
25+
expected bool
26+
}{
27+
{
28+
name: "empty list is successful",
29+
cronJobs: []batchv1.CronJob{},
30+
expected: true,
31+
},
32+
{
33+
name: "active job is successful",
34+
cronJobs: []batchv1.CronJob{
35+
{
36+
Status: batchv1.CronJobStatus{
37+
Active: []corev1.ObjectReference{{Name: "job-1"}},
38+
},
39+
},
40+
},
41+
expected: true,
42+
},
43+
}
44+
45+
for _, tt := range tests {
46+
t.Run(tt.name, func(t *testing.T) {
47+
result := AreCronJobsSuccessful(tt.cronJobs)
48+
assert.Equal(t, tt.expected, result)
49+
})
50+
}
51+
}
52+
53+
func TestDeleteCompletedJobs(t *testing.T) {
54+
scheme := runtime.NewScheme()
55+
require.NoError(t, clientgoscheme.AddToScheme(scheme))
56+
57+
labels := map[string]string{"app": "test-scan", "mondoo_cr": "test"}
58+
namespace := "test-ns"
59+
60+
tests := []struct {
61+
name string
62+
existingJobs []batchv1.Job
63+
expectedRemaining int
64+
}{
65+
{
66+
name: "no jobs to delete",
67+
existingJobs: []batchv1.Job{},
68+
expectedRemaining: 0,
69+
},
70+
{
71+
name: "delete completed job",
72+
existingJobs: []batchv1.Job{
73+
{
74+
ObjectMeta: metav1.ObjectMeta{
75+
Name: "completed-job",
76+
Namespace: namespace,
77+
Labels: labels,
78+
},
79+
Status: batchv1.JobStatus{
80+
Active: 0,
81+
Succeeded: 1,
82+
},
83+
},
84+
},
85+
expectedRemaining: 0,
86+
},
87+
{
88+
name: "delete failed job",
89+
existingJobs: []batchv1.Job{
90+
{
91+
ObjectMeta: metav1.ObjectMeta{
92+
Name: "failed-job",
93+
Namespace: namespace,
94+
Labels: labels,
95+
},
96+
Status: batchv1.JobStatus{
97+
Active: 0,
98+
Failed: 1,
99+
},
100+
},
101+
},
102+
expectedRemaining: 0,
103+
},
104+
{
105+
name: "preserve active job",
106+
existingJobs: []batchv1.Job{
107+
{
108+
ObjectMeta: metav1.ObjectMeta{
109+
Name: "active-job",
110+
Namespace: namespace,
111+
Labels: labels,
112+
},
113+
Status: batchv1.JobStatus{
114+
Active: 1,
115+
},
116+
},
117+
},
118+
expectedRemaining: 1,
119+
},
120+
{
121+
name: "mixed jobs - delete completed, preserve active",
122+
existingJobs: []batchv1.Job{
123+
{
124+
ObjectMeta: metav1.ObjectMeta{
125+
Name: "active-job",
126+
Namespace: namespace,
127+
Labels: labels,
128+
},
129+
Status: batchv1.JobStatus{
130+
Active: 1,
131+
},
132+
},
133+
{
134+
ObjectMeta: metav1.ObjectMeta{
135+
Name: "completed-job",
136+
Namespace: namespace,
137+
Labels: labels,
138+
},
139+
Status: batchv1.JobStatus{
140+
Active: 0,
141+
Succeeded: 1,
142+
},
143+
},
144+
{
145+
ObjectMeta: metav1.ObjectMeta{
146+
Name: "failed-job",
147+
Namespace: namespace,
148+
Labels: labels,
149+
},
150+
Status: batchv1.JobStatus{
151+
Active: 0,
152+
Failed: 1,
153+
},
154+
},
155+
},
156+
expectedRemaining: 1,
157+
},
158+
}
159+
160+
for _, tt := range tests {
161+
t.Run(tt.name, func(t *testing.T) {
162+
// Build fake client with existing jobs
163+
objs := make([]runtime.Object, len(tt.existingJobs))
164+
for i := range tt.existingJobs {
165+
objs[i] = &tt.existingJobs[i]
166+
}
167+
fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build()
168+
169+
log := ctrl.Log.WithName("test")
170+
171+
// Call DeleteCompletedJobs
172+
err := DeleteCompletedJobs(context.Background(), fakeClient, namespace, labels, log)
173+
require.NoError(t, err)
174+
175+
// Verify remaining jobs
176+
jobList := &batchv1.JobList{}
177+
err = fakeClient.List(context.Background(), jobList)
178+
require.NoError(t, err)
179+
180+
assert.Equal(t, tt.expectedRemaining, len(jobList.Items), "unexpected number of remaining jobs")
181+
182+
// Verify that remaining jobs are all active
183+
for _, job := range jobList.Items {
184+
assert.Greater(t, job.Status.Active, int32(0), "remaining job should be active: %s", job.Name)
185+
}
186+
})
187+
}
188+
}
189+
190+
func TestDeleteCompletedJobs_PreservesJobsWithDifferentLabels(t *testing.T) {
191+
scheme := runtime.NewScheme()
192+
require.NoError(t, clientgoscheme.AddToScheme(scheme))
193+
194+
targetLabels := map[string]string{"app": "test-scan", "mondoo_cr": "test"}
195+
otherLabels := map[string]string{"app": "other-scan", "mondoo_cr": "other"}
196+
namespace := "test-ns"
197+
198+
// Create jobs: one matching labels (completed), one with different labels (completed)
199+
matchingJob := batchv1.Job{
200+
ObjectMeta: metav1.ObjectMeta{
201+
Name: "matching-completed-job",
202+
Namespace: namespace,
203+
Labels: targetLabels,
204+
},
205+
Status: batchv1.JobStatus{
206+
Active: 0,
207+
Succeeded: 1,
208+
},
209+
}
210+
nonMatchingJob := batchv1.Job{
211+
ObjectMeta: metav1.ObjectMeta{
212+
Name: "other-completed-job",
213+
Namespace: namespace,
214+
Labels: otherLabels,
215+
},
216+
Status: batchv1.JobStatus{
217+
Active: 0,
218+
Succeeded: 1,
219+
},
220+
}
221+
222+
fakeClient := fake.NewClientBuilder().
223+
WithScheme(scheme).
224+
WithRuntimeObjects(&matchingJob, &nonMatchingJob).
225+
Build()
226+
227+
log := ctrl.Log.WithName("test")
228+
229+
// Delete jobs with targetLabels
230+
err := DeleteCompletedJobs(context.Background(), fakeClient, namespace, targetLabels, log)
231+
require.NoError(t, err)
232+
233+
// Verify only matching job was deleted
234+
jobList := &batchv1.JobList{}
235+
err = fakeClient.List(context.Background(), jobList)
236+
require.NoError(t, err)
237+
238+
require.Len(t, jobList.Items, 1, "should have exactly one job remaining")
239+
assert.Equal(t, "other-completed-job", jobList.Items[0].Name, "non-matching job should be preserved")
240+
}

0 commit comments

Comments
 (0)