Skip to content

Commit 8259711

Browse files
fix(health): app missing health only when all resources are missing (#23995) (#25962)
Signed-off-by: Alexandre Gaudreault <alexandre_gaudreault@intuit.com> Co-authored-by: Peter Jiang <35584807+pjiang-dev@users.noreply.github.com>
1 parent fded82a commit 8259711

7 files changed

Lines changed: 156 additions & 43 deletions

File tree

controller/health.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"github.com/argoproj/gitops-engine/pkg/sync/ignore"
99
kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube"
1010
log "github.com/sirupsen/logrus"
11-
"k8s.io/apimachinery/pkg/runtime/schema"
1211

1312
"github.com/argoproj/argo-cd/v3/common"
1413
"github.com/argoproj/argo-cd/v3/pkg/apis/application"
@@ -21,27 +20,35 @@ import (
2120
func setApplicationHealth(resources []managedResource, statuses []appv1.ResourceStatus, resourceOverrides map[string]appv1.ResourceOverride, app *appv1.Application, persistResourceHealth bool) (health.HealthStatusCode, error) {
2221
var savedErr error
2322
var errCount uint
23+
var containsResources, containsLiveResources bool
2424

2525
appHealthStatus := health.HealthStatusHealthy
2626
for i, res := range resources {
2727
if res.Target != nil && hookutil.Skip(res.Target) {
2828
continue
2929
}
30-
if res.Live != nil && res.Live.GetAnnotations() != nil && res.Live.GetAnnotations()[common.AnnotationIgnoreHealthCheck] == "true" {
30+
if res.Live != nil && (hookutil.IsHook(res.Live) || ignore.Ignore(res.Live)) {
3131
continue
3232
}
33-
if res.Live != nil && (hookutil.IsHook(res.Live) || ignore.Ignore(res.Live)) {
33+
34+
// Contains actual resources that are not hooks
35+
containsResources = true
36+
if res.Live != nil {
37+
containsLiveResources = true
38+
}
39+
40+
// Do not aggregate the health of the resource if the annotation to ignore health check is set to true
41+
if res.Live != nil && res.Live.GetAnnotations() != nil && res.Live.GetAnnotations()[common.AnnotationIgnoreHealthCheck] == "true" {
3442
continue
3543
}
3644

3745
var healthStatus *health.HealthStatus
3846
var err error
3947
healthOverrides := lua.ResourceHealthOverrides(resourceOverrides)
40-
gvk := schema.GroupVersionKind{Group: res.Group, Version: res.Version, Kind: res.Kind}
4148
if res.Live == nil {
4249
healthStatus = &health.HealthStatus{Status: health.HealthStatusMissing}
4350
} else {
44-
// App the manages itself should not affect own health
51+
// App that manages itself should not affect own health
4552
if isSelfReferencedApp(app, kubeutil.GetObjectRef(res.Live)) {
4653
continue
4754
}
@@ -65,8 +72,8 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
6572
statuses[i].Health = nil
6673
}
6774

68-
// Is health status is missing but resource has not built-in/custom health check then it should not affect parent app health
69-
if _, hasOverride := healthOverrides[lua.GetConfigMapKey(gvk)]; healthStatus.Status == health.HealthStatusMissing && !hasOverride && health.GetHealthCheckFunc(gvk) == nil {
75+
// Missing resources should not affect parent app health - the OutOfSync status already indicates resources are missing
76+
if res.Live == nil && healthStatus.Status == health.HealthStatusMissing {
7077
continue
7178
}
7279

@@ -79,6 +86,12 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
7986
appHealthStatus = healthStatus.Status
8087
}
8188
}
89+
90+
// If the app is expected to have resources but does not contain any live resources, set the app health to missing
91+
if containsResources && !containsLiveResources && health.IsWorse(appHealthStatus, health.HealthStatusMissing) {
92+
appHealthStatus = health.HealthStatusMissing
93+
}
94+
8295
if persistResourceHealth {
8396
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationInline
8497
} else {

controller/health_test.go

Lines changed: 109 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"k8s.io/apimachinery/pkg/runtime/schema"
1717
"sigs.k8s.io/yaml"
1818

19+
"github.com/argoproj/argo-cd/v3/common"
1920
"github.com/argoproj/argo-cd/v3/pkg/apis/application"
2021
appv1 "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1"
2122
"github.com/argoproj/argo-cd/v3/util/lua"
@@ -103,12 +104,103 @@ func TestSetApplicationHealth_ResourceHealthNotPersisted(t *testing.T) {
103104
assert.Nil(t, resourceStatuses[0].Health)
104105
}
105106

107+
func TestSetApplicationHealth_NoResource(t *testing.T) {
108+
resources := []managedResource{}
109+
resourceStatuses := initStatuses(resources)
110+
111+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
112+
require.NoError(t, err)
113+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
114+
}
115+
116+
func TestSetApplicationHealth_OnlyHooks(t *testing.T) {
117+
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
118+
pod.SetAnnotations(map[string]string{synccommon.AnnotationKeyHook: string(synccommon.HookTypeSync)})
119+
120+
resources := []managedResource{{
121+
Group: "", Version: "v1", Kind: "Pod", Target: &pod, Live: &pod,
122+
}}
123+
resourceStatuses := initStatuses(resources)
124+
125+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
126+
require.NoError(t, err)
127+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
128+
}
129+
106130
func TestSetApplicationHealth_MissingResource(t *testing.T) {
107131
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
132+
pod2 := pod.DeepCopy()
133+
pod2.SetName("pod2")
134+
135+
resources := []managedResource{
136+
{Group: "", Version: "v1", Kind: "Pod", Target: &pod},
137+
{Group: "", Version: "v1", Kind: "Pod", Target: pod2, Live: pod2},
138+
}
139+
resourceStatuses := initStatuses(resources)
140+
141+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
142+
require.NoError(t, err)
143+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
144+
}
145+
146+
func TestSetApplicationHealth_MissingResource_WithIgnoreHealthcheck(t *testing.T) {
147+
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
148+
pod2 := pod.DeepCopy()
149+
pod2.SetName("pod2")
150+
pod2.SetAnnotations(map[string]string{common.AnnotationIgnoreHealthCheck: "true"})
151+
152+
resources := []managedResource{
153+
{Group: "", Version: "v1", Kind: "Pod", Target: &pod},
154+
{Group: "", Version: "v1", Kind: "Pod", Target: pod2, Live: pod2},
155+
}
156+
resourceStatuses := initStatuses(resources)
157+
158+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
159+
require.NoError(t, err)
160+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
161+
}
162+
163+
func TestSetApplicationHealth_MissingResource_WithChildApp(t *testing.T) {
164+
childApp := newAppLiveObj(health.HealthStatusUnknown)
165+
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
166+
resources := []managedResource{
167+
{Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Target: childApp, Live: childApp},
168+
{Group: "", Version: "v1", Kind: "Pod", Target: &pod},
169+
}
170+
resourceStatuses := initStatuses(resources)
171+
172+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
173+
require.NoError(t, err)
174+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
175+
}
176+
177+
func TestSetApplicationHealth_AllMissingResources(t *testing.T) {
178+
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
179+
pod2 := pod.DeepCopy()
180+
pod2.SetName("pod2")
181+
182+
resources := []managedResource{
183+
{Group: "", Version: "v1", Kind: "Pod", Target: &pod},
184+
{Group: "", Version: "v1", Kind: "Pod", Target: pod2},
185+
}
186+
resourceStatuses := initStatuses(resources)
187+
188+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
189+
require.NoError(t, err)
190+
assert.Equal(t, health.HealthStatusMissing, healthStatus)
191+
}
192+
193+
func TestSetApplicationHealth_AllMissingResources_WithHooks(t *testing.T) {
194+
pod := resourceFromFile("./testdata/pod-running-restart-always.yaml")
195+
pod2 := pod.DeepCopy()
196+
pod2.SetName("pod2")
197+
pod2.SetAnnotations(map[string]string{synccommon.AnnotationKeyHook: string(synccommon.HookTypeSync)})
108198

109199
resources := []managedResource{{
110200
Group: "", Version: "v1", Kind: "Pod", Target: &pod,
111-
}, {}}
201+
}, {
202+
Group: "", Version: "v1", Kind: "Pod", Target: pod2, Live: pod2,
203+
}}
112204
resourceStatuses := initStatuses(resources)
113205

114206
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
@@ -149,32 +241,6 @@ func TestSetApplicationHealth_HealthImproves(t *testing.T) {
149241
}
150242
}
151243

152-
func TestSetApplicationHealth_MissingResourceNoBuiltHealthCheck(t *testing.T) {
153-
cm := resourceFromFile("./testdata/configmap.yaml")
154-
155-
resources := []managedResource{{
156-
Group: "", Version: "v1", Kind: "ConfigMap", Target: &cm,
157-
}}
158-
resourceStatuses := initStatuses(resources)
159-
160-
t.Run("NoOverride", func(t *testing.T) {
161-
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
162-
require.NoError(t, err)
163-
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
164-
assert.Equal(t, health.HealthStatusMissing, resourceStatuses[0].Health.Status)
165-
})
166-
167-
t.Run("HasOverride", func(t *testing.T) {
168-
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{
169-
lua.GetConfigMapKey(schema.GroupVersionKind{Version: "v1", Kind: "ConfigMap"}): appv1.ResourceOverride{
170-
HealthLua: "some health check",
171-
},
172-
}, app, true)
173-
require.NoError(t, err)
174-
assert.Equal(t, health.HealthStatusMissing, healthStatus)
175-
})
176-
}
177-
178244
func newAppLiveObj(status health.HealthStatusCode) *unstructured.Unstructured {
179245
app := appv1.Application{
180246
ObjectMeta: metav1.ObjectMeta{
@@ -214,9 +280,9 @@ return hs`,
214280
}
215281

216282
t.Run("ChildAppDegraded", func(t *testing.T) {
217-
degradedApp := newAppLiveObj(health.HealthStatusDegraded)
283+
childApp := newAppLiveObj(health.HealthStatusDegraded)
218284
resources := []managedResource{{
219-
Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Live: degradedApp,
285+
Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Live: childApp,
220286
}, {}}
221287
resourceStatuses := initStatuses(resources)
222288

@@ -226,9 +292,21 @@ return hs`,
226292
})
227293

228294
t.Run("ChildAppMissing", func(t *testing.T) {
229-
degradedApp := newAppLiveObj(health.HealthStatusMissing)
295+
childApp := newAppLiveObj(health.HealthStatusMissing)
296+
resources := []managedResource{{
297+
Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Live: childApp,
298+
}, {}}
299+
resourceStatuses := initStatuses(resources)
300+
301+
healthStatus, err := setApplicationHealth(resources, resourceStatuses, overrides, app, true)
302+
require.NoError(t, err)
303+
assert.Equal(t, health.HealthStatusHealthy, healthStatus)
304+
})
305+
306+
t.Run("ChildAppUnknown", func(t *testing.T) {
307+
childApp := newAppLiveObj(health.HealthStatusUnknown)
230308
resources := []managedResource{{
231-
Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Live: degradedApp,
309+
Group: application.Group, Version: "v1alpha1", Kind: application.ApplicationKind, Live: childApp,
232310
}, {}}
233311
resourceStatuses := initStatuses(resources)
234312

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# v3.3 to 3.4
2+
3+
## Applications with `Missing` health status
4+
5+
The behavior of Application health status has changed to be more consistent and informative. Previously, Applications would show `Missing` health status inconsistently depending on whether missing resources had built-in or custom health checks defined.
6+
7+
**New behavior:**
8+
9+
- Applications now show `Missing` health **only when ALL resources are missing** (e.g., before the first sync)
10+
- Individual missing resources no longer affect the Application's overall health status
11+
- The health status now reflects the aggregated health of **existing resources**
12+
- The `OutOfSync` status already indicates when resources are missing, making the health status redundant for individual missing resources
13+
- If the defined resource health check is explicitly returning `Missing` for an existing resource, that will still be reflected in the overall Application health
14+
15+
**Impact:**
16+
17+
- Applications with some missing resources will now show the health of their existing resources (e.g., `Healthy`, `Progressing`, `Degraded`) instead of `Missing`
18+
- Automation relying on the Application Health status to detect missing resources should now check the Sync status for `OutOfSync` instead, and optionally inspect individual resource health if needed.
19+
- Users can now distinguish between an Application that has never been synced (all resources missing = `Missing` health) vs. an Application with some resources deleted (shows health of remaining resources)

docs/operator-manual/upgrading/overview.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ kubectl apply -n argocd --server-side --force-conflicts -f https://raw.githubuse
3939
4040
<hr/>
4141

42+
- [v3.3 to v3.4](./3.3-3.4.md)
4243
- [v3.2 to v3.3](./3.2-3.3.md)
4344
- [v3.1 to v3.2](./3.1-3.2.md)
4445
- [v3.0 to v3.1](./3.0-3.1.md)

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ nav:
135135
- operator-manual/server-commands/additional-configuration-method.md
136136
- Upgrading:
137137
- operator-manual/upgrading/overview.md
138+
- operator-manual/upgrading/3.3-3.4.md
138139
- operator-manual/upgrading/3.2-3.3.md
139140
- operator-manual/upgrading/3.1-3.2.md
140141
- operator-manual/upgrading/3.0-3.1.md

test/e2e/app_management_ns_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,24 +291,24 @@ func TestNamespacedAppCreationWithoutForceUpdate(t *testing.T) {
291291

292292
func TestNamespacedDeleteAppResource(t *testing.T) {
293293
ctx := Given(t)
294-
295-
ctx.
296-
Path(guestbookPath).
294+
ctx.Path(guestbookPath).
297295
SetTrackingMethod("annotation").
298296
SetAppNamespace(fixture.AppNamespace()).
299297
When().
300298
CreateApp().
301299
Sync().
302300
Then().
303301
Expect(SyncStatusIs(SyncStatusCodeSynced)).
302+
Expect(HealthIs(health.HealthStatusHealthy)).
304303
And(func(_ *Application) {
305304
// app should be listed
306305
if _, err := fixture.RunCli("app", "delete-resource", ctx.AppQualifiedName(), "--kind", "Service", "--resource-name", "guestbook-ui"); err != nil {
307306
require.NoError(t, err)
308307
}
309308
}).
310309
Expect(SyncStatusIs(SyncStatusCodeOutOfSync)).
311-
Expect(HealthIs(health.HealthStatusMissing))
310+
Expect(HealthIs(health.HealthStatusHealthy)).
311+
Expect(ResourceHealthIs("Service", "guestbook-ui", health.HealthStatusMissing))
312312
}
313313

314314
// demonstrate that we cannot use a standard sync when an immutable field is changed, we must use "force"

test/e2e/app_management_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,8 @@ func TestDeleteAppResource(t *testing.T) {
487487
}
488488
}).
489489
Expect(SyncStatusIs(SyncStatusCodeOutOfSync)).
490-
Expect(HealthIs(health.HealthStatusMissing))
490+
Expect(HealthIs(health.HealthStatusHealthy)).
491+
Expect(ResourceHealthIs("Service", "guestbook-ui", health.HealthStatusMissing))
491492
}
492493

493494
// Fix for issue #2677, support PATCH in HTTP service

0 commit comments

Comments
 (0)