NVIDIA
diff --git a/‎docs/conformance/cncf/README.md‎
Lines changed: 24 additions & 17 deletions b/‎docs/conformance/cncf/README.md‎
Lines changed: 24 additions & 17 deletions
diff --git a/‎pkg/cli/doc.go‎
Lines changed: 3 additions & 2 deletions b/‎pkg/cli/doc.go‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎pkg/cli/evidence.go‎
Lines changed: 152 additions & 0 deletions b/‎pkg/cli/evidence.go‎
Lines changed: 152 additions & 0 deletions
diff --git a/‎pkg/cli/root.go‎
Lines changed: 1 addition & 0 deletions b/‎pkg/cli/root.go‎
Lines changed: 1 addition & 0 deletions
@@ -19,11 +19,9 @@ recipe meets the Must-have requirements for Kubernetes v1.34.
 ```
 docs/conformance/cncf/
 ├── README.md
-├── collect-evidence.sh
-├── manifests/
-│   ├── dra-gpu-test.yaml
-│   ├── gang-scheduling-test.yaml
-│   └── hpa-gpu-test.yaml
+├── submission/
+│   ├── PRODUCT.yaml
+│   └── README.md
 └── evidence/
     ├── index.md
     ├── dra-support.md
@@ -34,6 +32,13 @@ docs/conformance/cncf/
     ├── robust-operator.md
     ├── pod-autoscaling.md
     └── cluster-autoscaling.md
+
+pkg/evidence/scripts/             # Evidence collection script + test manifests
+├── collect-evidence.sh
+└── manifests/
+    ├── dra-gpu-test.yaml
+    ├── gang-scheduling-test.yaml
+    └── hpa-gpu-test.yaml
 ```
 
 ## Usage
@@ -58,23 +63,25 @@ aicr validate -r recipe.yaml -s snapshot.yaml \
 
 ### Step 2: Behavioral Test Evidence
 
-`collect-evidence.sh` deploys test workloads and collects behavioral evidence
+`aicr evidence` deploys test workloads and collects behavioral evidence
 (DRA GPU allocation, gang scheduling, HPA autoscaling, etc.) that requires
 running actual GPU workloads on the cluster:
 
 ```bash
 # Collect all behavioral evidence
-./docs/conformance/cncf/collect-evidence.sh all
-
-# Collect evidence for a single feature
-./docs/conformance/cncf/collect-evidence.sh dra
-./docs/conformance/cncf/collect-evidence.sh gang
-./docs/conformance/cncf/collect-evidence.sh secure
-./docs/conformance/cncf/collect-evidence.sh metrics
-./docs/conformance/cncf/collect-evidence.sh gateway
-./docs/conformance/cncf/collect-evidence.sh operator
-./docs/conformance/cncf/collect-evidence.sh hpa
-./docs/conformance/cncf/collect-evidence.sh cluster-autoscaling
+aicr evidence -o ./evidence
+
+# Collect specific features
+aicr evidence -o ./evidence -f dra -f hpa
+
+# List available features
+aicr evidence --list
+```
+
+Alternatively, run the script directly:
+```bash
+./pkg/evidence/scripts/collect-evidence.sh all
+./pkg/evidence/scripts/collect-evidence.sh dra
 ```
 
 > **Note:** The HPA test (`hpa`) deploys a GPU stress workload (nbody) and waits
 
@@ -16,8 +16,9 @@
 //
 // # Overview
 //
-// The aicr CLI provides commands for the four-stage workflow: capturing system snapshots,
-// generating configuration recipes, validating constraints, and creating deployment bundles.
+// The aicr CLI provides commands for the five-stage workflow: capturing system snapshots,
+// generating configuration recipes, validating constraints, creating deployment bundles,
+// and collecting CNCF AI Conformance evidence.
 // It is designed for cluster administrators and SREs managing NVIDIA GPU infrastructure.
 //
 // # Commands
 
@@ -0,0 +1,152 @@
+// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cli
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"strings"
+	"time"
+
+	"github.com/NVIDIA/aicr/pkg/errors"
+	"github.com/NVIDIA/aicr/pkg/evidence"
+	"github.com/urfave/cli/v3"
+)
+
+func evidenceCmd() *cli.Command {
+	return &cli.Command{
+		Name:                  "evidence",
+		Category:              functionalCategoryName,
+		EnableShellCompletion: true,
+		Usage:                 "Collect CNCF AI Conformance evidence from a live cluster.",
+		Description: `Deploy test workloads and collect behavioral evidence for CNCF AI
+Conformance submission. This captures detailed command outputs, workload logs,
+and metric queries that demonstrate conformance requirements are met.
+
+This is separate from 'aicr validate --phase conformance' which performs
+structural pass/fail checks for CI. Evidence collection captures rich,
+human-reviewable proof for CNCF submission.
+
+Requires GPU hardware on the target cluster.
+
+Examples:
+
+Collect all evidence:
+  aicr evidence -o ./evidence
+
+Collect specific features:
+  aicr evidence -o ./evidence -f dra -f hpa
+
+List available features:
+  aicr evidence --list
+`,
+		Flags: []cli.Flag{
+			&cli.StringFlag{
+				Name:    "output-dir",
+				Aliases: []string{"o"},
+				Usage:   "Evidence output directory",
+			},
+			&cli.StringSliceFlag{
+				Name:    "feature",
+				Aliases: []string{"f"},
+				Usage:   "Feature to collect evidence for (repeatable, default: all)",
+			},
+			&cli.BoolFlag{
+				Name:  "list",
+				Usage: "List available evidence features",
+			},
+			&cli.BoolFlag{
+				Name:  "no-cleanup",
+				Usage: "Skip test namespace cleanup after collection",
+			},
+			&cli.DurationFlag{
+				Name:  "timeout",
+				Usage: "Overall timeout for evidence collection",
+				Value: 20 * time.Minute,
+			},
+		},
+		Action: runEvidence,
+	}
+}
+
+func runEvidence(ctx context.Context, cmd *cli.Command) error {
+	// Handle --list flag.
+	if cmd.Bool("list") {
+		fmt.Println("Available evidence features:")
+		fmt.Println()
+		fmt.Printf("  %-24s %s\n", "Feature", "Description")
+		fmt.Printf("  %-24s %s\n", strings.Repeat("─", 24), strings.Repeat("─", 45))
+		for _, f := range evidence.ValidFeatures {
+			fmt.Printf("  %-24s %s\n", f, evidence.FeatureDescriptions[f])
+		}
+		fmt.Println()
+		fmt.Println("Use -f/--feature to select specific features, or omit for all.")
+		return nil
+	}
+
+	// Require --output-dir.
+	outputDir := cmd.String("output-dir")
+	if outputDir == "" {
+		return errors.New(errors.ErrCodeInvalidRequest,
+			"--output-dir is required (use -o ./evidence)")
+	}
+
+	// Validate features.
+	features := cmd.StringSlice("feature")
+	if err := validateFeatures(features); err != nil {
+		return err
+	}
+
+	// Apply timeout.
+	timeout := cmd.Duration("timeout")
+	ctx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	slog.Info("starting evidence collection",
+		"outputDir", outputDir,
+		"features", features,
+		"timeout", timeout)
+
+	// Run collector.
+	collector := evidence.NewCollector(outputDir,
+		evidence.WithFeatures(features),
+		evidence.WithNoCleanup(cmd.Bool("no-cleanup")),
+	)
+
+	if err := collector.Run(ctx); err != nil {
+		return errors.Wrap(errors.ErrCodeInternal, "evidence collection failed", err)
+	}
+
+	slog.Info("evidence collection complete", "outputDir", outputDir)
+	return nil
+}
+
+// validateFeatures checks that all specified features are valid.
+func validateFeatures(features []string) error {
+	valid := make(map[string]bool, len(evidence.ValidFeatures)+1)
+	for _, f := range evidence.ValidFeatures {
+		valid[f] = true
+	}
+	valid["all"] = true
+	for _, f := range features {
+		if !valid[f] {
+			return errors.New(errors.ErrCodeInvalidRequest,
+				fmt.Sprintf("unknown feature %q, valid features: %s",
+					f, strings.Join(evidence.ValidFeatures, ", ")))
+		}
+	}
+	return nil
+}
@@ -133,6 +133,7 @@ func Execute() {
 			recipeCmd(),
 			bundleCmd(),
 			validateCmd(),
+			evidenceCmd(),
 		},
 		ShellComplete: commandLister,
 	}
Original file line number	Diff line number	Diff line change
`@@ -133,6 +133,7 @@ func Execute() {`
`133`	`133`	`recipeCmd(),`
`134`	`134`	`bundleCmd(),`
`135`	`135`	`validateCmd(),`
	`136`	`+ evidenceCmd(),`
`136`	`137`	`},`
`137`	`138`	`ShellComplete: commandLister,`
`138`	`139`	`}`