Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 28 additions & 13 deletions pkg/cli/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ func parseSnapshotTemplateOptions(cmd *cli.Command, outFormat serializer.Format)
}, nil
}

// createSnapshotSerializer creates the output serializer based on template options.
func createSnapshotSerializer(tmplOpts *snapshotTemplateOptions) (serializer.Serializer, error) {
if tmplOpts.templatePath != "" {
return serializer.NewTemplateFileWriter(tmplOpts.templatePath, tmplOpts.outputPath)
}
return serializer.NewFileWriterOrStdout(tmplOpts.format, tmplOpts.outputPath)
}

func snapshotCmd() *cli.Command {
return &cli.Command{
Name: "snapshot",
Expand Down Expand Up @@ -177,6 +185,14 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
Name: "template",
Usage: "Path to Go template file for custom output formatting (requires YAML format)",
},
&cli.StringSliceFlag{
Name: "helm-namespaces",
Usage: "Namespaces for Helm release collection (creates scoped RBAC for secrets access). Mutually exclusive with --helm-all-namespaces.",
},
&cli.BoolFlag{
Name: "helm-all-namespaces",
Usage: "Grant cluster-wide secrets access for Helm release collection. Mutually exclusive with --helm-namespaces.",
},
outputFlag,
formatFlag,
kubeconfigFlag,
Expand All @@ -203,19 +219,9 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
factory := collector.NewDefaultFactory()

// Create output serializer
var ser serializer.Serializer
if tmplOpts.templatePath != "" {
// Use template writer
ser, err = serializer.NewTemplateFileWriter(tmplOpts.templatePath, tmplOpts.outputPath)
if err != nil {
return errors.Wrap(errors.ErrCodeInternal, "failed to create template writer", err)
}
} else {
// Use standard format writer
ser, err = serializer.NewFileWriterOrStdout(tmplOpts.format, tmplOpts.outputPath)
if err != nil {
return errors.Wrap(errors.ErrCodeInternal, "failed to create output writer", err)
}
ser, err := createSnapshotSerializer(tmplOpts)
if err != nil {
return errors.Wrap(errors.ErrCodeInternal, "failed to create output serializer", err)
}

// Build snapshotter configuration
Expand All @@ -238,6 +244,13 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
return errors.Wrap(errors.ErrCodeInvalidRequest, "invalid toleration", err)
}

// Validate mutual exclusivity of helm flags
helmNamespaces := cmd.StringSlice("helm-namespaces")
helmAllNamespaces := cmd.Bool("helm-all-namespaces")
if len(helmNamespaces) > 0 && helmAllNamespaces {
return errors.New(errors.ErrCodeInvalidRequest, "--helm-namespaces and --helm-all-namespaces are mutually exclusive")
}

// When running inside an agent Job, collect locally instead of
// deploying another agent (prevents infinite nesting).
if os.Getenv("AICR_AGENT_MODE") == "true" {
Expand All @@ -261,6 +274,8 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
Privileged: cmd.Bool("privileged"),
RequireGPU: cmd.Bool("require-gpu"),
TemplatePath: tmplOpts.templatePath,
HelmNamespaces: helmNamespaces,
HelmAllNamespaces: helmAllNamespaces,
}

return ns.Measure(ctx)
Expand Down
64 changes: 64 additions & 0 deletions pkg/cli/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"fmt"
"log/slog"
"sort"
"time"

"github.com/urfave/cli/v3"
Expand Down Expand Up @@ -47,6 +48,8 @@ type validateAgentConfig struct {
debug bool
privileged bool
requireGPU bool
helmNamespaces []string
helmAllNamespaces bool
}

// parseValidateAgentConfig parses agent deployment flags from the command.
Expand Down Expand Up @@ -75,6 +78,8 @@ func parseValidateAgentConfig(cmd *cli.Command) (*validateAgentConfig, error) {
debug: cmd.Bool("debug"),
privileged: cmd.Bool("privileged"),
requireGPU: cmd.Bool("require-gpu"),
helmNamespaces: cmd.StringSlice("helm-namespaces"),
helmAllNamespaces: cmd.Bool("helm-all-namespaces"),
}, nil
}

Expand Down Expand Up @@ -132,6 +137,8 @@ func deployAgentForValidation(ctx context.Context, cfg *validateAgentConfig) (*s
Debug: cfg.debug,
Privileged: cfg.privileged,
RequireGPU: cfg.requireGPU,
HelmNamespaces: cfg.helmNamespaces,
HelmAllNamespaces: cfg.helmAllNamespaces,
}

snap, err := snapshotter.DeployAndGetSnapshot(ctx, agentConfig)
Expand Down Expand Up @@ -263,6 +270,25 @@ func runValidation(
return nil
}

// helmNamespacesFromRecipe extracts unique namespaces from Helm ComponentRefs.
func helmNamespacesFromRecipe(rec *recipe.RecipeResult) []string {
seen := make(map[string]bool)
for _, ref := range rec.ComponentRefs {
if ref.Type == recipe.ComponentTypeHelm && ref.Namespace != "" {
seen[ref.Namespace] = true
}
}
if len(seen) == 0 {
return nil
}
namespaces := make([]string, 0, len(seen))
for ns := range seen {
namespaces = append(namespaces, ns)
}
sort.Strings(namespaces)
return namespaces
}

func validateCmdFlags() []cli.Flag {
return []cli.Flag{
&cli.StringFlag{
Expand Down Expand Up @@ -367,6 +393,18 @@ func validateCmdFlags() []cli.Flag {
Name: "result",
Usage: "Use a saved validation result file as the source for evidence rendering (live validation still runs). Note: saved results do not include diagnostic artifacts captured during live runs. Requires --phase conformance and --evidence-dir.",
},
&cli.BoolFlag{
Name: "skip-helm-check",
Usage: "Skip Helm values deployment check and don't create secrets RBAC",
},
&cli.StringSliceFlag{
Name: "helm-namespaces",
Usage: "Override namespaces for Helm release collection (creates scoped RBAC). Mutually exclusive with --helm-all-namespaces.",
},
&cli.BoolFlag{
Name: "helm-all-namespaces",
Usage: "Grant cluster-wide secrets access for Helm release collection. Mutually exclusive with --helm-namespaces.",
},
outputFlag,
formatFlag,
kubeconfigFlag,
Expand Down Expand Up @@ -494,6 +532,28 @@ Use a saved result file for evidence instead of the live run:
return errors.Wrap(errors.ErrCodeInternal, fmt.Sprintf("failed to load recipe from %q", recipeFilePath), err)
}

// Resolve helm namespace config for agent RBAC
skipHelmCheck := cmd.Bool("skip-helm-check")
helmNamespaces := cmd.StringSlice("helm-namespaces")
helmAllNamespaces := cmd.Bool("helm-all-namespaces")

if len(helmNamespaces) > 0 && helmAllNamespaces {
return errors.New(errors.ErrCodeInvalidRequest, "--helm-namespaces and --helm-all-namespaces are mutually exclusive")
}

if !skipHelmCheck && !cmd.IsSet("helm-namespaces") && !helmAllNamespaces {
// Auto-derive from recipe ComponentRefs
helmNamespaces = helmNamespacesFromRecipe(rec)
if len(helmNamespaces) > 0 {
slog.Info("auto-derived helm namespaces from recipe", "namespaces", helmNamespaces)
}
}

if skipHelmCheck {
helmNamespaces = nil
helmAllNamespaces = false
}

// Get snapshot - either from file or by deploying an agent
var snap *snapshotter.Snapshot
var snapshotSource string
Expand All @@ -515,6 +575,10 @@ Use a saved result file for evidence instead of the live run:
return cfgErr
}

// Apply resolved helm namespace config
agentCfg.helmNamespaces = helmNamespaces
agentCfg.helmAllNamespaces = helmAllNamespaces

var deployErr error
snap, snapshotSource, deployErr = deployAgentForValidation(ctx, agentCfg)
if deployErr != nil {
Expand Down
79 changes: 79 additions & 0 deletions pkg/cli/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"
"testing"

"github.com/NVIDIA/aicr/pkg/recipe"
"github.com/NVIDIA/aicr/pkg/validator"
)

Expand Down Expand Up @@ -213,6 +214,84 @@ func TestValidateCmd_AgentFlags(t *testing.T) {
}
}

func TestHelmNamespacesFromRecipe(t *testing.T) {
tests := []struct {
name string
rec *recipe.RecipeResult
expected []string
}{
{
name: "nil recipe",
rec: &recipe.RecipeResult{},
expected: nil,
},
{
name: "no helm components",
rec: &recipe.RecipeResult{
ComponentRefs: []recipe.ComponentRef{
{Name: "app", Type: recipe.ComponentTypeKustomize, Namespace: "default"},
},
},
expected: nil,
},
{
name: "helm components with namespaces",
rec: &recipe.RecipeResult{
ComponentRefs: []recipe.ComponentRef{
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
{Name: "network-operator", Type: recipe.ComponentTypeHelm, Namespace: "network-operator"},
},
},
expected: []string{"gpu-operator", "network-operator"},
},
{
name: "deduplicates namespaces",
rec: &recipe.RecipeResult{
ComponentRefs: []recipe.ComponentRef{
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
{Name: "gpu-feature-discovery", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
},
},
expected: []string{"gpu-operator"},
},
{
name: "skips helm without namespace",
rec: &recipe.RecipeResult{
ComponentRefs: []recipe.ComponentRef{
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
{Name: "orphan", Type: recipe.ComponentTypeHelm, Namespace: ""},
},
},
expected: []string{"gpu-operator"},
},
{
name: "mixed helm and kustomize",
rec: &recipe.RecipeResult{
ComponentRefs: []recipe.ComponentRef{
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
{Name: "kustomize-app", Type: recipe.ComponentTypeKustomize, Namespace: "default"},
{Name: "network-operator", Type: recipe.ComponentTypeHelm, Namespace: "network-operator"},
},
},
expected: []string{"gpu-operator", "network-operator"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := helmNamespacesFromRecipe(tt.rec)
if len(got) != len(tt.expected) {
t.Fatalf("got %d namespaces, want %d: %v", len(got), len(tt.expected), got)
}
for i, ns := range got {
if ns != tt.expected[i] {
t.Errorf("namespace[%d] = %q, want %q", i, ns, tt.expected[i])
}
}
})
}
}

// hasFlag checks if a cli.Flag has the given name
func hasFlag(flag interface{ Names() []string }, name string) bool {
return slices.Contains(flag.Names(), name)
Expand Down
13 changes: 12 additions & 1 deletion pkg/collector/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,19 @@ func WithSystemDServices(services []string) Option {
}
}

// WithHelmNamespaces configures the namespaces for Helm release collection.
// nil/empty = skip, ["*"] = all namespaces, ["ns1","ns2"] = scoped.
func WithHelmNamespaces(namespaces []string) Option {
return func(f *DefaultFactory) {
f.HelmNamespaces = namespaces
}
}

// DefaultFactory is the standard implementation of Factory that creates collectors
// with production dependencies. It configures default systemd services to monitor.
type DefaultFactory struct {
SystemDServices []string
HelmNamespaces []string
}

// NewDefaultFactory creates a new DefaultFactory with default configuration.
Expand Down Expand Up @@ -86,5 +95,7 @@ func (f *DefaultFactory) CreateOSCollector() Collector {

// CreateKubernetesCollector creates a Kubernetes API collector.
func (f *DefaultFactory) CreateKubernetesCollector() Collector {
return &k8s.Collector{}
return &k8s.Collector{
HelmNamespaces: f.HelmNamespaces,
}
}
45 changes: 45 additions & 0 deletions pkg/collector/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"testing"

"github.com/NVIDIA/aicr/pkg/collector/k8s"
"github.com/NVIDIA/aicr/pkg/collector/systemd"
)

Expand Down Expand Up @@ -87,6 +88,50 @@ func TestWithSystemDServices(t *testing.T) {
}
}

func TestWithHelmNamespaces(t *testing.T) {
tests := []struct {
name string
namespaces []string
wantLen int
}{
{
name: "nil namespaces",
namespaces: nil,
wantLen: 0,
},
{
name: "all namespaces",
namespaces: []string{"*"},
wantLen: 1,
},
{
name: "scoped namespaces",
namespaces: []string{"gpu-operator", "network-operator"},
wantLen: 2,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
factory := NewDefaultFactory(WithHelmNamespaces(tt.namespaces))

if len(factory.HelmNamespaces) != tt.wantLen {
t.Errorf("expected %d namespaces, got %d", tt.wantLen, len(factory.HelmNamespaces))
}

// Verify K8s collector gets the namespaces
col := factory.CreateKubernetesCollector()
k8sCol, ok := col.(*k8s.Collector)
if !ok {
t.Fatal("expected *k8s.Collector")
}
if len(k8sCol.HelmNamespaces) != tt.wantLen {
t.Errorf("K8s collector expected %d namespaces, got %d", tt.wantLen, len(k8sCol.HelmNamespaces))
}
})
}
}

func TestNewDefaultFactory_Defaults(t *testing.T) {
factory := NewDefaultFactory()

Expand Down
Loading
Loading