NVIDIA
diff --git a/‎docs/conformance/cncf/README.md‎
Lines changed: 18 additions & 22 deletions b/‎docs/conformance/cncf/README.md‎
Lines changed: 18 additions & 22 deletions
diff --git a/‎docs/conformance/cncf/evidence/dra-support.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/conformance/cncf/evidence/dra-support.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/conformance/cncf/evidence/gang-scheduling.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/conformance/cncf/evidence/gang-scheduling.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/conformance/cncf/evidence/pod-autoscaling.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/conformance/cncf/evidence/pod-autoscaling.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/cli/validate.go‎
Lines changed: 28 additions & 1 deletion b/‎pkg/cli/validate.go‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎pkg/evidence/collector.go‎
Lines changed: 188 additions & 0 deletions b/‎pkg/evidence/collector.go‎
Lines changed: 188 additions & 0 deletions
@@ -19,11 +19,9 @@ recipe meets the Must-have requirements for Kubernetes v1.34.
 ```
 docs/conformance/cncf/
 ├── README.md
-├── collect-evidence.sh
-├── manifests/
-│   ├── dra-gpu-test.yaml
-│   ├── gang-scheduling-test.yaml
-│   └── hpa-gpu-test.yaml
+├── submission/
+│   ├── PRODUCT.yaml
+│   └── README.md
 └── evidence/
     ├── index.md
     ├── dra-support.md
@@ -34,6 +32,13 @@ docs/conformance/cncf/
     ├── robust-operator.md
     ├── pod-autoscaling.md
     └── cluster-autoscaling.md
+
+pkg/evidence/scripts/             # Evidence collection script + test manifests
+├── collect-evidence.sh
+└── manifests/
+    ├── dra-gpu-test.yaml
+    ├── gang-scheduling-test.yaml
+    └── hpa-gpu-test.yaml
 ```
 
 ## Usage
@@ -56,25 +61,16 @@ aicr validate -r recipe.yaml -s snapshot.yaml \
   --result validation-result.yaml
 ```
 
-### Step 2: Behavioral Test Evidence
-
-`collect-evidence.sh` deploys test workloads and collects behavioral evidence
-(DRA GPU allocation, gang scheduling, HPA autoscaling, etc.) that requires
-running actual GPU workloads on the cluster:
+When `--evidence-dir` is specified, both structural validation evidence and
+behavioral test evidence (DRA GPU allocation, gang scheduling, HPA autoscaling,
+etc.) are collected atomically in a single command. Behavioral tests deploy GPU
+workloads on the cluster and capture detailed command outputs, workload logs,
+and Prometheus queries.
 
+Alternatively, run the evidence collection script directly:
 ```bash
-# Collect all behavioral evidence
-./docs/conformance/cncf/collect-evidence.sh all
-
-# Collect evidence for a single feature
-./docs/conformance/cncf/collect-evidence.sh dra
-./docs/conformance/cncf/collect-evidence.sh gang
-./docs/conformance/cncf/collect-evidence.sh secure
-./docs/conformance/cncf/collect-evidence.sh metrics
-./docs/conformance/cncf/collect-evidence.sh gateway
-./docs/conformance/cncf/collect-evidence.sh operator
-./docs/conformance/cncf/collect-evidence.sh hpa
-./docs/conformance/cncf/collect-evidence.sh cluster-autoscaling
+./pkg/evidence/scripts/collect-evidence.sh all
+./pkg/evidence/scripts/collect-evidence.sh dra
 ```
 
 > **Note:** The HPA test (`hpa`) deploys a GPU stress workload (nbody) and waits
 
@@ -47,7 +47,7 @@ ip-100-64-171-120.ec2.internal-gpu.nvidia.com-75xvv              ip-100-64-171-1
 
 Deploy a test pod that requests 1 GPU via ResourceClaim and verifies device access.
 
-**Test manifest:** `docs/conformance/cncf/manifests/dra-gpu-test.yaml`
+**Test manifest:** `pkg/evidence/scripts/manifests/dra-gpu-test.yaml`
 
 ```yaml
 ---
@@ -99,7 +99,7 @@ spec:
 
 **Apply test manifest**
 ```
-$ kubectl apply -f docs/conformance/cncf/manifests/dra-gpu-test.yaml
+$ kubectl apply -f pkg/evidence/scripts/manifests/dra-gpu-test.yaml
 namespace/dra-test created
 resourceclaim.resource.k8s.io/gpu-claim created
 pod/dra-gpu-test created
 
@@ -52,7 +52,7 @@ podgroups.scheduling.run.ai   2026-02-12T20:42:05Z
 Deploy a PodGroup with minMember=2 and two GPU pods. KAI scheduler ensures both
 pods are scheduled atomically.
 
-**Test manifest:** `docs/conformance/cncf/manifests/gang-scheduling-test.yaml`
+**Test manifest:** `pkg/evidence/scripts/manifests/gang-scheduling-test.yaml`
 
 ```yaml
 ---
@@ -149,7 +149,7 @@ spec:
 
 **Apply test manifest**
 ```
-$ kubectl apply -f docs/conformance/cncf/manifests/gang-scheduling-test.yaml
+$ kubectl apply -f pkg/evidence/scripts/manifests/gang-scheduling-test.yaml
 namespace/gang-scheduling-test created
 podgroup.scheduling.run.ai/gang-test-group created
 pod/gang-worker-0 created
 
@@ -56,7 +56,7 @@ pods/gpu_utilization
 Deploy a GPU workload running CUDA N-Body Simulation to generate sustained GPU utilization,
 then create an HPA targeting `gpu_utilization` to demonstrate autoscaling.
 
-**Test manifest:** `docs/conformance/cncf/manifests/hpa-gpu-test.yaml`
+**Test manifest:** `pkg/evidence/scripts/manifests/hpa-gpu-test.yaml`
 
 ```yaml
 ---
@@ -123,7 +123,7 @@ spec:
 
 **Apply test manifest**
 ```
-$ kubectl apply -f docs/conformance/cncf/manifests/hpa-gpu-test.yaml
+$ kubectl apply -f pkg/evidence/scripts/manifests/hpa-gpu-test.yaml
 namespace/hpa-test created
 deployment.apps/gpu-workload created
 horizontalpodautoscaler.autoscaling/gpu-workload-hpa created
 
@@ -361,7 +361,12 @@ func validateCmdFlags() []cli.Flag {
 		},
 		&cli.StringFlag{
 			Name:  "evidence-dir",
-			Usage: "Write CNCF conformance evidence markdown to this directory. Requires --phase conformance.",
+			Usage: "Collect CNCF conformance evidence to this directory. When set, runs behavioral evidence collection (GPU workload tests, HPA scaling, Prometheus queries) instead of structural Go checks. Requires --phase conformance.",
+		},
+		&cli.StringSliceFlag{
+			Name:    "feature",
+			Aliases: []string{"f"},
+			Usage:   "Evidence feature to collect (repeatable, default: all). Use -f all to run all features (cannot be combined with other features). Only used with --evidence-dir.",
 		},
 		&cli.StringFlag{
 			Name:  "result",
@@ -462,6 +467,28 @@ Use a saved result file for evidence instead of the live run:
 				return errors.New(errors.ErrCodeInvalidRequest, "--result requires --evidence-dir")
 			}
 
+			// When --evidence-dir is set, run behavioral evidence collection
+			// instead of structural Go checks. This deploys GPU workloads and
+			// captures detailed outputs for CNCF submission.
+			if evidenceDir != "" {
+				features := cmd.StringSlice("feature")
+				slog.Info("collecting behavioral conformance evidence",
+					"dir", evidenceDir, "features", features)
+
+				evidenceTimeout := cmd.Duration("timeout")
+				evidenceCtx, evidenceCancel := context.WithTimeout(ctx, evidenceTimeout)
+				defer evidenceCancel()
+
+				collector := evidence.NewCollector(evidenceDir,
+					evidence.WithFeatures(features),
+				)
+				if err := collector.Run(evidenceCtx); err != nil {
+					return errors.Wrap(errors.ErrCodeInternal, "evidence collection failed", err)
+				}
+				slog.Info("conformance evidence written", "dir", evidenceDir)
+				return nil
+			}
+
 			recipeFilePath := cmd.String("recipe")
 			snapshotFilePath := cmd.String("snapshot")
 			kubeconfig := cmd.String("kubeconfig")
 
@@ -0,0 +1,188 @@
+// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package evidence
+
+import (
+	"context"
+	"embed"
+	"io/fs"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/NVIDIA/aicr/pkg/errors"
+)
+
+//go:embed scripts/collect-evidence.sh
+var collectScript []byte
+
+//go:embed scripts/manifests
+var manifestsFS embed.FS
+
+// ValidFeatures lists all supported evidence collection features.
+var ValidFeatures = []string{
+	"dra",
+	"gang",
+	"secure",
+	"metrics",
+	"gateway",
+	"operator",
+	"hpa",
+	"cluster-autoscaling",
+}
+
+// FeatureDescriptions maps feature names to human-readable descriptions.
+var FeatureDescriptions = map[string]string{
+	"dra":                 "DRA GPU allocation test",
+	"gang":                "Gang scheduling co-scheduling test",
+	"secure":              "Secure accelerator access verification",
+	"metrics":             "Accelerator & AI service metrics",
+	"gateway":             "Inference API gateway conditions",
+	"operator":            "Robust AI operator + webhook test",
+	"hpa":                 "HPA pod autoscaling (scale-up + scale-down)",
+	"cluster-autoscaling": "Cluster autoscaling (ASG configuration)",
+}
+
+// CollectorOption configures the Collector.
+type CollectorOption func(*Collector)
+
+// Collector orchestrates behavioral evidence collection by invoking the
+// embedded collect-evidence.sh script against a live Kubernetes cluster.
+type Collector struct {
+	outputDir string
+	features  []string
+	noCleanup bool
+}
+
+// NewCollector creates a new evidence Collector.
+func NewCollector(outputDir string, opts ...CollectorOption) *Collector {
+	c := &Collector{
+		outputDir: outputDir,
+	}
+	for _, opt := range opts {
+		opt(c)
+	}
+	return c
+}
+
+// WithFeatures sets which features to collect evidence for.
+// If empty, all features are collected.
+func WithFeatures(features []string) CollectorOption {
+	return func(c *Collector) {
+		c.features = features
+	}
+}
+
+// WithNoCleanup skips test namespace cleanup after collection.
+func WithNoCleanup(noCleanup bool) CollectorOption {
+	return func(c *Collector) {
+		c.noCleanup = noCleanup
+	}
+}
+
+// Run executes evidence collection for the configured features.
+func (c *Collector) Run(ctx context.Context) error {
+	// Write embedded script and manifests to temp directory.
+	tmpDir, err := os.MkdirTemp("", "aicr-evidence-")
+	if err != nil {
+		return errors.Wrap(errors.ErrCodeInternal, "failed to create temp directory", err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	scriptPath := filepath.Join(tmpDir, "collect-evidence.sh")
+	if err := os.WriteFile(scriptPath, collectScript, 0o700); err != nil { //nolint:gosec // script needs execute permission
+		return errors.Wrap(errors.ErrCodeInternal, "failed to write evidence script", err)
+	}
+
+	manifestDir := filepath.Join(tmpDir, "manifests")
+	if err := writeEmbeddedManifests(manifestDir); err != nil {
+		return errors.Wrap(errors.ErrCodeInternal, "failed to write manifests", err)
+	}
+
+	// Create output directory.
+	if err := os.MkdirAll(c.outputDir, 0o755); err != nil {
+		return errors.Wrap(errors.ErrCodeInternal, "failed to create output directory", err)
+	}
+
+	// Determine sections to run. "all" or empty means run everything.
+	sections := c.features
+	if len(sections) == 0 {
+		sections = []string{"all"}
+	}
+	for _, s := range sections {
+		if s == "all" {
+			sections = []string{"all"}
+			break
+		}
+	}
+
+	// Run each feature.
+	var lastErr error
+	for _, section := range sections {
+		slog.Info("collecting evidence", "feature", section)
+		if err := c.runSection(ctx, scriptPath, tmpDir, section); err != nil {
+			slog.Warn("evidence collection failed for feature",
+				"feature", section, "error", err)
+			lastErr = err
+			// Continue with remaining features.
+		}
+	}
+
+	if lastErr != nil {
+		return errors.Wrap(errors.ErrCodeInternal,
+			"one or more evidence sections failed", lastErr)
+	}
+	return nil
+}
+
+// runSection executes the evidence script for a single section.
+func (c *Collector) runSection(ctx context.Context, scriptPath, scriptDir, section string) error {
+	cmd := exec.CommandContext(ctx, "bash", scriptPath, section)
+	cmd.Dir = scriptDir
+	cmd.Env = append(os.Environ(),
+		"EVIDENCE_DIR="+c.outputDir,
+		"SCRIPT_DIR="+scriptDir,
+	)
+	if c.noCleanup {
+		cmd.Env = append(cmd.Env, "NO_CLEANUP=true")
+	}
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+// writeEmbeddedManifests extracts the embedded manifests to the target directory.
+func writeEmbeddedManifests(targetDir string) error {
+	return fs.WalkDir(manifestsFS, "scripts/manifests", func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Compute relative path from "scripts/manifests" prefix.
+		relPath, _ := filepath.Rel("scripts/manifests", path)
+		targetPath := filepath.Join(targetDir, relPath)
+
+		if d.IsDir() {
+			return os.MkdirAll(targetPath, 0o755)
+		}
+
+		data, err := manifestsFS.ReadFile(path)
+		if err != nil {
+			return err
+		}
+		return os.WriteFile(targetPath, data, 0o600)
+	})
+}