fix: manual ExpectedResources take precedence over HealthCheckAsserts

xdu31 · xdu31 · commit 54c326dbf7bc · 2026-02-24T15:10:12.000-08:00
diff --git a/pkg/validator/checks/deployment/expected_resources_check.go b/pkg/validator/checks/deployment/expected_resources_check.go
@@ -56,7 +56,10 @@ func validateExpectedResources(ctx *checks.ValidationContext) error {
 	var failures []string
 
 	for _, ref := range ctx.Recipe.ComponentRefs {
-		if ref.HealthCheckAsserts != "" {
+		// Manual expectedResources take precedence over Chainsaw health check asserts.
+		// This allows users to override the registry's healthCheck.assertFile by
+		// declaring expectedResources explicitly in the recipe.
+		if ref.HealthCheckAsserts != "" && len(ref.ExpectedResources) == 0 {
 			chainsawAsserts = append(chainsawAsserts, chainsaw.ComponentAssert{
 				Name:       ref.Name,
 				AssertYAML: ref.HealthCheckAsserts,
diff --git a/pkg/validator/checks/deployment/expected_resources_check_unit_test.go b/pkg/validator/checks/deployment/expected_resources_check_unit_test.go
@@ -440,12 +440,10 @@ func TestValidateExpectedResources_ChainsawBranch(t *testing.T) {
 		errContains string
 	}{
 		{
-			name: "component with HealthCheckAsserts skips typed client checks",
+			name: "manual expectedResources take precedence over HealthCheckAsserts",
 			setup: func() *checks.ValidationContext {
-				// No K8s objects — if the typed client path ran, it would fail.
-				// But since HealthCheckAsserts is set, it should go to chainsaw path.
-				// Use a nonexistent deployment name so chainsaw always fails even if
-				// the binary is installed and a local cluster is accessible.
+				// No K8s objects — typed client check will fail for the missing Deployment.
+				// Even though HealthCheckAsserts is set, manual expectedResources take precedence.
 				//nolint:staticcheck // SA1019: fake.NewSimpleClientset is sufficient for tests
 				clientset := fake.NewSimpleClientset()
 				return &checks.ValidationContext{
@@ -454,10 +452,9 @@ func TestValidateExpectedResources_ChainsawBranch(t *testing.T) {
 					Recipe: &recipe.RecipeResult{
 						ComponentRefs: []recipe.ComponentRef{
 							{
-								Name:               "test-chainsaw-component",
+								Name:               "test-component",
 								Type:               "Helm",
-								HealthCheckAsserts: "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: nonexistent-chainsaw-test\n  namespace: nonexistent-ns\n",
-								// These expectedResources should NOT be checked (chainsaw path)
+								HealthCheckAsserts: "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: something\n",
 								ExpectedResources: []recipe.ExpectedResource{
 									{Kind: "Deployment", Name: "gpu-operator", Namespace: "gpu-operator"},
 								},
@@ -466,6 +463,30 @@ func TestValidateExpectedResources_ChainsawBranch(t *testing.T) {
 					},
 				}
 			},
+			// Typed client fails because the Deployment doesn't exist in the fake clientset
+			wantErr:     true,
+			errContains: "not found",
+		},
+		{
+			name: "HealthCheckAsserts used when no manual expectedResources",
+			setup: func() *checks.ValidationContext {
+				// No manual expectedResources → Chainsaw path activates.
+				//nolint:staticcheck // SA1019: fake.NewSimpleClientset is sufficient for tests
+				clientset := fake.NewSimpleClientset()
+				return &checks.ValidationContext{
+					Context:   context.Background(),
+					Clientset: clientset,
+					Recipe: &recipe.RecipeResult{
+						ComponentRefs: []recipe.ComponentRef{
+							{
+								Name:               "test-chainsaw-component",
+								Type:               "Helm",
+								HealthCheckAsserts: "apiVersion: apps/v1\nkind: Deployment\nmetadata:\n  name: nonexistent-chainsaw-test\n  namespace: nonexistent-ns\n",
+							},
+						},
+					},
+				}
+			},
 			// Chainsaw fails: either binary not available or assertion doesn't match
 			wantErr:     true,
 			errContains: "chainsaw health check failed",
diff --git a/pkg/validator/resource_discovery.go b/pkg/validator/resource_discovery.go
@@ -114,9 +114,9 @@ func resolveExpectedResources(ctx context.Context, recipeResult *recipe.RecipeRe
 
 		ref := &recipeResult.ComponentRefs[i]
 
-		// Skip auto-discovery for components with Chainsaw health check asserts.
-		// These components use Chainsaw CLI assertions instead of typed replica checks.
-		if ref.HealthCheckAsserts != "" {
+		// Skip auto-discovery for components with Chainsaw health check asserts,
+		// unless the recipe already has manual expectedResources (which take precedence).
+		if ref.HealthCheckAsserts != "" && len(ref.ExpectedResources) == 0 {
 			slog.Debug("skipping auto-discovery for component with chainsaw health check",
 				"component", ref.Name)
 			continue
diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh
@@ -1115,68 +1115,7 @@ YAML
   # Wait for deployment to be available
   kubectl wait --for=condition=available deployment/gpu-operator -n gpu-operator --timeout=60s 2>&1 || true
 
-  # Test 1: Validate expected-resources with passing check (resource exists)
-  msg "--- Test: Expected resources check (should pass) ---"
-  local recipe_file="${validate_dir}/recipe-expected-resources.yaml"
-  cat > "$recipe_file" <<RECIPE
-kind: RecipeResult
-apiVersion: aicr.nvidia.com/v1alpha1
-metadata:
-  version: dev
-componentRefs:
-  - name: fake-gpu-operator
-    type: Helm
-    namespace: gpu-operator
-    expectedResources:
-      - kind: Deployment
-        name: gpu-operator
-        namespace: gpu-operator
-validation:
-  deployment:
-    checks:
-      - expected-resources
-RECIPE
-
-  echo -e "${DIM}  \$ aicr validate --phase deployment --recipe recipe.yaml${NC}"
-  local result_file="${validate_dir}/result-pass.yaml"
-  local result_output
-  result_output=$("${AICR_BIN}" validate \
-    --recipe "$recipe_file" \
-    --snapshot "cm://${SNAPSHOT_NAMESPACE}/${SNAPSHOT_CM}" \
-    --phase deployment \
-    --image "${AICR_VALIDATOR_IMAGE}" \
-    --output "$result_file" 2>&1) || true
-
-  # DEBUG: Print captured output to see what's happening
-  detail "Captured validation output:"
-  echo "$result_output" | sed 's/^/    /'
-
-  # Check the output file for expected-resources check results
-  if [ -f "$result_file" ]; then
-    detail "Validation output file created: $result_file"
-  else
-    detail "Validation output file NOT created: $result_file"
-  fi
-
-  if [ -f "$result_file" ] && \
-     grep -q "TestCheckExpectedResources" "$result_file"; then
-    if grep -A1 "name: TestCheckExpectedResources" "$result_file" | grep -q "status: pass"; then
-      detail "Expected-resources check: PASS (gpu-operator deployment found)"
-      pass "validate/expected-resources-pass"
-    elif grep -q "summary:" "$result_file" && grep -q "status: pass" "$result_file"; then
-      # Fallback: check summary status
-      detail "Expected-resources check: PASS (from summary status)"
-      pass "validate/expected-resources-pass"
-    else
-      detail "Check found but status unclear. Showing check section:"
-      grep -A5 "TestCheckExpectedResources" "$result_file" | sed 's/^/    /' || true
-      fail "validate/expected-resources-pass" "Check did not pass"
-    fi
-  else
-    fail "validate/expected-resources-pass" "TestCheckExpectedResources not found in output"
-  fi
-
-  # Test 2: Validate expected-resources with failing check (resource missing)
+  # Test 1: Validate expected-resources with failing check (resource missing)
   msg "--- Test: Expected resources check (should fail - missing resource) ---"
   local recipe_file_fail="${validate_dir}/recipe-expected-resources-fail.yaml"
   cat > "$recipe_file_fail" <<RECIPE