Skip to content

Commit dfa9da9

Browse files
committed
feat(agent): scope secrets RBAC to specific namespaces
Remove cluster-wide secrets access from the default ClusterRole. Add --helm-namespaces and --helm-all-namespaces flags to snapshot and validate commands. Validate auto-derives namespaces from recipe ComponentRefs. Add --skip-helm-check to suppress the helm-values deployment check entirely. RBAC modes: no-helm (default snapshot), scoped per-NS Roles (default validate), or cluster-wide (--helm-all-namespaces). Config flows to collector via AICR_HELM_NAMESPACES env var. Fixes #211
1 parent 37ebab4 commit dfa9da9

File tree

16 files changed

+960
-81
lines changed

16 files changed

+960
-81
lines changed

pkg/cli/snapshot.go

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ func parseSnapshotTemplateOptions(cmd *cli.Command, outFormat serializer.Format)
6262
}, nil
6363
}
6464

65+
// createSnapshotSerializer creates the output serializer based on template options.
66+
func createSnapshotSerializer(tmplOpts *snapshotTemplateOptions) (serializer.Serializer, error) {
67+
if tmplOpts.templatePath != "" {
68+
return serializer.NewTemplateFileWriter(tmplOpts.templatePath, tmplOpts.outputPath)
69+
}
70+
return serializer.NewFileWriterOrStdout(tmplOpts.format, tmplOpts.outputPath)
71+
}
72+
6573
func snapshotCmd() *cli.Command {
6674
return &cli.Command{
6775
Name: "snapshot",
@@ -177,6 +185,14 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
177185
Name: "template",
178186
Usage: "Path to Go template file for custom output formatting (requires YAML format)",
179187
},
188+
&cli.StringSliceFlag{
189+
Name: "helm-namespaces",
190+
Usage: "Namespaces for Helm release collection (creates scoped RBAC for secrets access). Mutually exclusive with --helm-all-namespaces.",
191+
},
192+
&cli.BoolFlag{
193+
Name: "helm-all-namespaces",
194+
Usage: "Grant cluster-wide secrets access for Helm release collection. Mutually exclusive with --helm-namespaces.",
195+
},
180196
outputFlag,
181197
formatFlag,
182198
kubeconfigFlag,
@@ -203,19 +219,9 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
203219
factory := collector.NewDefaultFactory()
204220

205221
// Create output serializer
206-
var ser serializer.Serializer
207-
if tmplOpts.templatePath != "" {
208-
// Use template writer
209-
ser, err = serializer.NewTemplateFileWriter(tmplOpts.templatePath, tmplOpts.outputPath)
210-
if err != nil {
211-
return errors.Wrap(errors.ErrCodeInternal, "failed to create template writer", err)
212-
}
213-
} else {
214-
// Use standard format writer
215-
ser, err = serializer.NewFileWriterOrStdout(tmplOpts.format, tmplOpts.outputPath)
216-
if err != nil {
217-
return errors.Wrap(errors.ErrCodeInternal, "failed to create output writer", err)
218-
}
222+
ser, err := createSnapshotSerializer(tmplOpts)
223+
if err != nil {
224+
return errors.Wrap(errors.ErrCodeInternal, "failed to create output serializer", err)
219225
}
220226

221227
// Build snapshotter configuration
@@ -238,6 +244,13 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
238244
return errors.Wrap(errors.ErrCodeInvalidRequest, "invalid toleration", err)
239245
}
240246

247+
// Validate mutual exclusivity of helm flags
248+
helmNamespaces := cmd.StringSlice("helm-namespaces")
249+
helmAllNamespaces := cmd.Bool("helm-all-namespaces")
250+
if len(helmNamespaces) > 0 && helmAllNamespaces {
251+
return errors.New(errors.ErrCodeInvalidRequest, "--helm-namespaces and --helm-all-namespaces are mutually exclusive")
252+
}
253+
241254
// When running inside an agent Job, collect locally instead of
242255
// deploying another agent (prevents infinite nesting).
243256
if os.Getenv("AICR_AGENT_MODE") == "true" {
@@ -261,6 +274,8 @@ See examples/templates/snapshot-template.md.tmpl for a sample template.
261274
Privileged: cmd.Bool("privileged"),
262275
RequireGPU: cmd.Bool("require-gpu"),
263276
TemplatePath: tmplOpts.templatePath,
277+
HelmNamespaces: helmNamespaces,
278+
HelmAllNamespaces: helmAllNamespaces,
264279
}
265280

266281
return ns.Measure(ctx)

pkg/cli/validate.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"fmt"
2020
"log/slog"
21+
"sort"
2122
"time"
2223

2324
"github.com/urfave/cli/v3"
@@ -47,6 +48,8 @@ type validateAgentConfig struct {
4748
debug bool
4849
privileged bool
4950
requireGPU bool
51+
helmNamespaces []string
52+
helmAllNamespaces bool
5053
}
5154

5255
// parseValidateAgentConfig parses agent deployment flags from the command.
@@ -75,6 +78,8 @@ func parseValidateAgentConfig(cmd *cli.Command) (*validateAgentConfig, error) {
7578
debug: cmd.Bool("debug"),
7679
privileged: cmd.Bool("privileged"),
7780
requireGPU: cmd.Bool("require-gpu"),
81+
helmNamespaces: cmd.StringSlice("helm-namespaces"),
82+
helmAllNamespaces: cmd.Bool("helm-all-namespaces"),
7883
}, nil
7984
}
8085

@@ -132,6 +137,8 @@ func deployAgentForValidation(ctx context.Context, cfg *validateAgentConfig) (*s
132137
Debug: cfg.debug,
133138
Privileged: cfg.privileged,
134139
RequireGPU: cfg.requireGPU,
140+
HelmNamespaces: cfg.helmNamespaces,
141+
HelmAllNamespaces: cfg.helmAllNamespaces,
135142
}
136143

137144
snap, err := snapshotter.DeployAndGetSnapshot(ctx, agentConfig)
@@ -263,6 +270,25 @@ func runValidation(
263270
return nil
264271
}
265272

273+
// helmNamespacesFromRecipe extracts unique namespaces from Helm ComponentRefs.
274+
func helmNamespacesFromRecipe(rec *recipe.RecipeResult) []string {
275+
seen := make(map[string]bool)
276+
for _, ref := range rec.ComponentRefs {
277+
if ref.Type == recipe.ComponentTypeHelm && ref.Namespace != "" {
278+
seen[ref.Namespace] = true
279+
}
280+
}
281+
if len(seen) == 0 {
282+
return nil
283+
}
284+
namespaces := make([]string, 0, len(seen))
285+
for ns := range seen {
286+
namespaces = append(namespaces, ns)
287+
}
288+
sort.Strings(namespaces)
289+
return namespaces
290+
}
291+
266292
func validateCmdFlags() []cli.Flag {
267293
return []cli.Flag{
268294
&cli.StringFlag{
@@ -367,6 +393,18 @@ func validateCmdFlags() []cli.Flag {
367393
Name: "result",
368394
Usage: "Use a saved validation result file as the source for evidence rendering (live validation still runs). Note: saved results do not include diagnostic artifacts captured during live runs. Requires --phase conformance and --evidence-dir.",
369395
},
396+
&cli.BoolFlag{
397+
Name: "skip-helm-check",
398+
Usage: "Skip Helm values deployment check and don't create secrets RBAC",
399+
},
400+
&cli.StringSliceFlag{
401+
Name: "helm-namespaces",
402+
Usage: "Override namespaces for Helm release collection (creates scoped RBAC). Mutually exclusive with --helm-all-namespaces.",
403+
},
404+
&cli.BoolFlag{
405+
Name: "helm-all-namespaces",
406+
Usage: "Grant cluster-wide secrets access for Helm release collection. Mutually exclusive with --helm-namespaces.",
407+
},
370408
outputFlag,
371409
formatFlag,
372410
kubeconfigFlag,
@@ -494,6 +532,28 @@ Use a saved result file for evidence instead of the live run:
494532
return errors.Wrap(errors.ErrCodeInternal, fmt.Sprintf("failed to load recipe from %q", recipeFilePath), err)
495533
}
496534

535+
// Resolve helm namespace config for agent RBAC
536+
skipHelmCheck := cmd.Bool("skip-helm-check")
537+
helmNamespaces := cmd.StringSlice("helm-namespaces")
538+
helmAllNamespaces := cmd.Bool("helm-all-namespaces")
539+
540+
if len(helmNamespaces) > 0 && helmAllNamespaces {
541+
return errors.New(errors.ErrCodeInvalidRequest, "--helm-namespaces and --helm-all-namespaces are mutually exclusive")
542+
}
543+
544+
if !skipHelmCheck && !cmd.IsSet("helm-namespaces") && !helmAllNamespaces {
545+
// Auto-derive from recipe ComponentRefs
546+
helmNamespaces = helmNamespacesFromRecipe(rec)
547+
if len(helmNamespaces) > 0 {
548+
slog.Info("auto-derived helm namespaces from recipe", "namespaces", helmNamespaces)
549+
}
550+
}
551+
552+
if skipHelmCheck {
553+
helmNamespaces = nil
554+
helmAllNamespaces = false
555+
}
556+
497557
// Get snapshot - either from file or by deploying an agent
498558
var snap *snapshotter.Snapshot
499559
var snapshotSource string
@@ -515,6 +575,10 @@ Use a saved result file for evidence instead of the live run:
515575
return cfgErr
516576
}
517577

578+
// Apply resolved helm namespace config
579+
agentCfg.helmNamespaces = helmNamespaces
580+
agentCfg.helmAllNamespaces = helmAllNamespaces
581+
518582
var deployErr error
519583
snap, snapshotSource, deployErr = deployAgentForValidation(ctx, agentCfg)
520584
if deployErr != nil {

pkg/cli/validate_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"strings"
2020
"testing"
2121

22+
"github.com/NVIDIA/aicr/pkg/recipe"
2223
"github.com/NVIDIA/aicr/pkg/validator"
2324
)
2425

@@ -213,6 +214,84 @@ func TestValidateCmd_AgentFlags(t *testing.T) {
213214
}
214215
}
215216

217+
func TestHelmNamespacesFromRecipe(t *testing.T) {
218+
tests := []struct {
219+
name string
220+
rec *recipe.RecipeResult
221+
expected []string
222+
}{
223+
{
224+
name: "nil recipe",
225+
rec: &recipe.RecipeResult{},
226+
expected: nil,
227+
},
228+
{
229+
name: "no helm components",
230+
rec: &recipe.RecipeResult{
231+
ComponentRefs: []recipe.ComponentRef{
232+
{Name: "app", Type: recipe.ComponentTypeKustomize, Namespace: "default"},
233+
},
234+
},
235+
expected: nil,
236+
},
237+
{
238+
name: "helm components with namespaces",
239+
rec: &recipe.RecipeResult{
240+
ComponentRefs: []recipe.ComponentRef{
241+
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
242+
{Name: "network-operator", Type: recipe.ComponentTypeHelm, Namespace: "network-operator"},
243+
},
244+
},
245+
expected: []string{"gpu-operator", "network-operator"},
246+
},
247+
{
248+
name: "deduplicates namespaces",
249+
rec: &recipe.RecipeResult{
250+
ComponentRefs: []recipe.ComponentRef{
251+
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
252+
{Name: "gpu-feature-discovery", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
253+
},
254+
},
255+
expected: []string{"gpu-operator"},
256+
},
257+
{
258+
name: "skips helm without namespace",
259+
rec: &recipe.RecipeResult{
260+
ComponentRefs: []recipe.ComponentRef{
261+
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
262+
{Name: "orphan", Type: recipe.ComponentTypeHelm, Namespace: ""},
263+
},
264+
},
265+
expected: []string{"gpu-operator"},
266+
},
267+
{
268+
name: "mixed helm and kustomize",
269+
rec: &recipe.RecipeResult{
270+
ComponentRefs: []recipe.ComponentRef{
271+
{Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Namespace: "gpu-operator"},
272+
{Name: "kustomize-app", Type: recipe.ComponentTypeKustomize, Namespace: "default"},
273+
{Name: "network-operator", Type: recipe.ComponentTypeHelm, Namespace: "network-operator"},
274+
},
275+
},
276+
expected: []string{"gpu-operator", "network-operator"},
277+
},
278+
}
279+
280+
for _, tt := range tests {
281+
t.Run(tt.name, func(t *testing.T) {
282+
got := helmNamespacesFromRecipe(tt.rec)
283+
if len(got) != len(tt.expected) {
284+
t.Fatalf("got %d namespaces, want %d: %v", len(got), len(tt.expected), got)
285+
}
286+
for i, ns := range got {
287+
if ns != tt.expected[i] {
288+
t.Errorf("namespace[%d] = %q, want %q", i, ns, tt.expected[i])
289+
}
290+
}
291+
})
292+
}
293+
}
294+
216295
// hasFlag checks if a cli.Flag has the given name
217296
func hasFlag(flag interface{ Names() []string }, name string) bool {
218297
return slices.Contains(flag.Names(), name)

pkg/collector/factory.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,19 @@ func WithSystemDServices(services []string) Option {
4141
}
4242
}
4343

44+
// WithHelmNamespaces configures the namespaces for Helm release collection.
45+
// nil/empty = skip, ["*"] = all namespaces, ["ns1","ns2"] = scoped.
46+
func WithHelmNamespaces(namespaces []string) Option {
47+
return func(f *DefaultFactory) {
48+
f.HelmNamespaces = namespaces
49+
}
50+
}
51+
4452
// DefaultFactory is the standard implementation of Factory that creates collectors
4553
// with production dependencies. It configures default systemd services to monitor.
4654
type DefaultFactory struct {
4755
SystemDServices []string
56+
HelmNamespaces []string
4857
}
4958

5059
// NewDefaultFactory creates a new DefaultFactory with default configuration.
@@ -86,5 +95,7 @@ func (f *DefaultFactory) CreateOSCollector() Collector {
8695

8796
// CreateKubernetesCollector creates a Kubernetes API collector.
8897
func (f *DefaultFactory) CreateKubernetesCollector() Collector {
89-
return &k8s.Collector{}
98+
return &k8s.Collector{
99+
HelmNamespaces: f.HelmNamespaces,
100+
}
90101
}

pkg/collector/factory_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"testing"
2020

21+
"github.com/NVIDIA/aicr/pkg/collector/k8s"
2122
"github.com/NVIDIA/aicr/pkg/collector/systemd"
2223
)
2324

@@ -87,6 +88,50 @@ func TestWithSystemDServices(t *testing.T) {
8788
}
8889
}
8990

91+
func TestWithHelmNamespaces(t *testing.T) {
92+
tests := []struct {
93+
name string
94+
namespaces []string
95+
wantLen int
96+
}{
97+
{
98+
name: "nil namespaces",
99+
namespaces: nil,
100+
wantLen: 0,
101+
},
102+
{
103+
name: "all namespaces",
104+
namespaces: []string{"*"},
105+
wantLen: 1,
106+
},
107+
{
108+
name: "scoped namespaces",
109+
namespaces: []string{"gpu-operator", "network-operator"},
110+
wantLen: 2,
111+
},
112+
}
113+
114+
for _, tt := range tests {
115+
t.Run(tt.name, func(t *testing.T) {
116+
factory := NewDefaultFactory(WithHelmNamespaces(tt.namespaces))
117+
118+
if len(factory.HelmNamespaces) != tt.wantLen {
119+
t.Errorf("expected %d namespaces, got %d", tt.wantLen, len(factory.HelmNamespaces))
120+
}
121+
122+
// Verify K8s collector gets the namespaces
123+
col := factory.CreateKubernetesCollector()
124+
k8sCol, ok := col.(*k8s.Collector)
125+
if !ok {
126+
t.Fatal("expected *k8s.Collector")
127+
}
128+
if len(k8sCol.HelmNamespaces) != tt.wantLen {
129+
t.Errorf("K8s collector expected %d namespaces, got %d", tt.wantLen, len(k8sCol.HelmNamespaces))
130+
}
131+
})
132+
}
133+
}
134+
90135
func TestNewDefaultFactory_Defaults(t *testing.T) {
91136
factory := NewDefaultFactory()
92137

0 commit comments

Comments
 (0)