Skip to content

Commit 88af95b

Browse files
yuanchen8911lockwobrmchmarny
authored
fix(bundler): re-enable aws-ebs-csi-driver by default and support --set disable (#397)
Signed-off-by: Yuan Chen <yuanchen97@gmail.com> Co-authored-by: Brian Lockwood <lockwobr@gmail.com> Co-authored-by: Mark Chmarny <mchmarny@users.noreply.github.com>
1 parent 482dc78 commit 88af95b

File tree

5 files changed

+202
-11
lines changed

5 files changed

+202
-11
lines changed

docs/user/cli-reference.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ aicr bundle [flags]
632632
| `--output` | `-o` | string | Output directory (default: current dir) |
633633
| `--deployer` | | string | Deployment method: helm (default), argocd |
634634
| `--repo` | | string | Git repository URL for ArgoCD applications (only used with `--deployer argocd`) |
635-
| `--set` | | string[] | Override values in bundle files (repeatable) |
635+
| `--set` | | string[] | Override values in bundle files (repeatable). Use `enabled` key to include/exclude components (e.g., `--set awsebscsidriver:enabled=false`) |
636636
| `--data` | | string | External data directory to overlay on embedded data (see [External Data](#external-data-directory)) |
637637
| `--system-node-selector` | | string[] | Node selector for system components (format: key=value, repeatable) |
638638
| `--system-node-toleration` | | string[] | Toleration for system components (format: key=value:effect, repeatable) |
@@ -717,6 +717,7 @@ Override any value in the generated bundle files using dot notation:
717717
- **Duplicate keys**: When the same `bundler:path` is specified multiple times, the **last value wins**
718718
- **Array values**: Individual array elements cannot be overridden (no `[0]` index syntax). Arrays can only be replaced entirely via recipe overrides, not via `--set` flags. Use recipe-level overrides in `componentRefs[].overrides` if you need to replace an entire array.
719719
- **Type conversion**: String values are automatically converted to appropriate types (`true`/`false` → bool, numeric strings → numbers)
720+
- **Component enable/disable**: The special `enabled` key controls whether a component is included in the bundle. `--set <component>:enabled=false` excludes the component; `--set <component>:enabled=true` re-enables a recipe-disabled component. The `enabled` key is consumed by the bundler and not passed to Helm chart values.
720721

721722
**Examples:**
722723
```shell
@@ -748,6 +749,11 @@ aicr bundle -r recipe.yaml \
748749
--set skyhook-operator:manager.resources.memory.limit=256Mi \
749750
-o ./bundles
750751

752+
# Disable a component at bundle time (e.g., EBS CSI already installed as EKS addon)
753+
aicr bundle -r recipe.yaml \
754+
--set awsebscsidriver:enabled=false \
755+
-o ./bundles
756+
751757
# Schedule system components on specific node pool
752758
aicr bundle -r recipe.yaml \
753759
--system-node-selector nodeGroup=system-pool \

pkg/bundler/bundler.go

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,21 @@ func (b *DefaultBundler) Make(ctx context.Context, input recipe.RecipeInput, dir
188188
"recipe must contain at least one component reference")
189189
}
190190

191-
// Filter out disabled components (overrides.enabled: false)
191+
// Filter out disabled components.
192+
// Check order: --set overrides take precedence over recipe overrides.
193+
// This allows users to enable/disable components at bundle time:
194+
// --set awsebscsidriver:enabled=false (disable)
195+
// --set awsebscsidriver:enabled=true (re-enable)
192196
enabledRefs := make([]recipe.ComponentRef, 0, len(recipeResult.ComponentRefs))
193197
enabledSet := make(map[string]struct{})
194198
for _, ref := range recipeResult.ComponentRefs {
195-
if !ref.IsEnabled() {
199+
if setEnabled, ok := b.getSetEnabledOverride(ref.Name); ok {
200+
if !setEnabled {
201+
slog.Info("skipping component disabled via --set", "component", ref.Name)
202+
continue
203+
}
204+
// --set enabled=true overrides recipe-level disabled
205+
} else if !ref.IsEnabled() {
196206
slog.Info("skipping disabled component", "component", ref.Name)
197207
continue
198208
}
@@ -441,8 +451,19 @@ func (b *DefaultBundler) extractComponentValues(ctx context.Context, recipeResul
441451
values = make(map[string]any)
442452
}
443453

444-
// Apply user value overrides from --set flags
454+
// Apply user value overrides from --set flags.
455+
// Strip "enabled" key — it controls component inclusion, not Helm chart values.
445456
if overrides := b.getValueOverridesForComponent(ref.Name); len(overrides) > 0 {
457+
if _, has := overrides["enabled"]; has {
458+
filtered := make(map[string]string, len(overrides)-1)
459+
for k, v := range overrides {
460+
if k == "enabled" {
461+
continue
462+
}
463+
filtered[k] = v
464+
}
465+
overrides = filtered
466+
}
446467
if applyErr := component.ApplyMapOverrides(values, overrides); applyErr != nil {
447468
slog.Warn("failed to apply some value overrides",
448469
"component", ref.Name,
@@ -506,6 +527,27 @@ func (b *DefaultBundler) getValueOverridesForComponent(componentName string) map
506527
return nil
507528
}
508529

530+
// getSetEnabledOverride checks if --set overrides contain an "enabled" key
531+
// for the given component. Returns (value, true) if found, (false, false) otherwise.
532+
// This allows --set awsebscsidriver:enabled=false to disable a component at bundle time.
533+
func (b *DefaultBundler) getSetEnabledOverride(componentName string) (bool, bool) {
534+
overrides := b.getValueOverridesForComponent(componentName)
535+
if overrides == nil {
536+
return false, false
537+
}
538+
val, ok := overrides["enabled"]
539+
if !ok {
540+
return false, false
541+
}
542+
parsed, parseErr := strconv.ParseBool(val)
543+
if parseErr != nil {
544+
slog.Warn("invalid --set enabled value, ignoring override",
545+
"component", componentName, "value", val, "error", parseErr)
546+
return false, false
547+
}
548+
return parsed, true
549+
}
550+
509551
// applyNodeSchedulingOverrides applies node selectors and tolerations to component values.
510552
// Uses the component registry to determine the correct paths for each component.
511553
func (b *DefaultBundler) applyNodeSchedulingOverrides(componentName string, values map[string]any) {

pkg/bundler/bundler_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,150 @@ func TestMake_DisabledComponentsFiltered(t *testing.T) {
335335
}
336336
}
337337

338+
func TestMake_SetEnabledOverridesPrecedence(t *testing.T) {
339+
t.Parallel()
340+
341+
tests := []struct {
342+
name string
343+
recipeEnabled *bool // nil = no override, true/false = overrides.enabled
344+
setEnabled string
345+
expectIncluded bool
346+
}{
347+
{
348+
name: "recipe disabled + --set enabled=true => included",
349+
recipeEnabled: boolPtr(false),
350+
setEnabled: "true",
351+
expectIncluded: true,
352+
},
353+
{
354+
name: "recipe enabled + --set enabled=false => excluded",
355+
recipeEnabled: nil,
356+
setEnabled: "false",
357+
expectIncluded: false,
358+
},
359+
{
360+
name: "recipe disabled + no --set => excluded",
361+
recipeEnabled: boolPtr(false),
362+
setEnabled: "",
363+
expectIncluded: false,
364+
},
365+
{
366+
name: "recipe enabled (default) + no --set => included",
367+
recipeEnabled: nil,
368+
setEnabled: "",
369+
expectIncluded: true,
370+
},
371+
{
372+
name: "invalid --set value ignored => falls back to recipe",
373+
recipeEnabled: boolPtr(false),
374+
setEnabled: "ture",
375+
expectIncluded: false,
376+
},
377+
}
378+
379+
for _, tt := range tests {
380+
t.Run(tt.name, func(t *testing.T) {
381+
t.Parallel()
382+
383+
var bundlerOpts []Option
384+
if tt.setEnabled != "" {
385+
cfg := config.NewConfig(
386+
config.WithValueOverrides(map[string]map[string]string{
387+
"awsebscsidriver": {"enabled": tt.setEnabled},
388+
}),
389+
)
390+
bundlerOpts = append(bundlerOpts, WithConfig(cfg))
391+
}
392+
393+
bundler, err := New(bundlerOpts...)
394+
if err != nil {
395+
t.Fatalf("New() error = %v", err)
396+
}
397+
398+
overrides := map[string]any{}
399+
if tt.recipeEnabled != nil {
400+
overrides["enabled"] = *tt.recipeEnabled
401+
}
402+
403+
recipeResult := &recipe.RecipeResult{
404+
APIVersion: "aicr.nvidia.com/v1alpha1",
405+
Kind: "Recipe",
406+
Criteria: &recipe.Criteria{Service: "eks", Accelerator: "h100", Intent: "training"},
407+
ComponentRefs: []recipe.ComponentRef{
408+
{Name: "gpu-operator", Version: "v25.3.3", Type: "helm", Source: "https://helm.ngc.nvidia.com/nvidia"},
409+
{Name: "aws-ebs-csi-driver", Version: "2.55.0", Type: "helm", Source: "https://kubernetes-sigs.github.io/aws-ebs-csi-driver", Overrides: overrides},
410+
},
411+
DeploymentOrder: []string{"gpu-operator", "aws-ebs-csi-driver"},
412+
}
413+
414+
ctx := context.Background()
415+
tmpDir := t.TempDir()
416+
_, makeErr := bundler.Make(ctx, recipeResult, tmpDir)
417+
if makeErr != nil {
418+
t.Fatalf("Make() error = %v", makeErr)
419+
}
420+
421+
_, statErr := os.Stat(filepath.Join(tmpDir, "aws-ebs-csi-driver"))
422+
included := !os.IsNotExist(statErr)
423+
424+
if included != tt.expectIncluded {
425+
t.Errorf("aws-ebs-csi-driver included=%v, want %v", included, tt.expectIncluded)
426+
}
427+
})
428+
}
429+
}
430+
431+
func TestMake_SetEnabledNotLeakedToHelmValues(t *testing.T) {
432+
t.Parallel()
433+
434+
cfg := config.NewConfig(
435+
config.WithValueOverrides(map[string]map[string]string{
436+
"awsebscsidriver": {"enabled": "true", "controller.replicaCount": "2"},
437+
}),
438+
)
439+
bundler, err := New(WithConfig(cfg))
440+
if err != nil {
441+
t.Fatalf("New() error = %v", err)
442+
}
443+
444+
recipeResult := &recipe.RecipeResult{
445+
APIVersion: "aicr.nvidia.com/v1alpha1",
446+
Kind: "Recipe",
447+
Criteria: &recipe.Criteria{Service: "eks", Accelerator: "h100", Intent: "training"},
448+
ComponentRefs: []recipe.ComponentRef{
449+
{Name: "gpu-operator", Version: "v25.3.3", Type: "helm", Source: "https://helm.ngc.nvidia.com/nvidia"},
450+
{Name: "aws-ebs-csi-driver", Version: "2.55.0", Type: "helm", Source: "https://kubernetes-sigs.github.io/aws-ebs-csi-driver"},
451+
},
452+
DeploymentOrder: []string{"gpu-operator", "aws-ebs-csi-driver"},
453+
}
454+
455+
ctx := context.Background()
456+
tmpDir := t.TempDir()
457+
_, makeErr := bundler.Make(ctx, recipeResult, tmpDir)
458+
if makeErr != nil {
459+
t.Fatalf("Make() error = %v", makeErr)
460+
}
461+
462+
valuesPath := filepath.Join(tmpDir, "aws-ebs-csi-driver", "values.yaml")
463+
valuesData, readErr := os.ReadFile(valuesPath)
464+
if readErr != nil {
465+
t.Fatalf("failed to read values.yaml: %v", readErr)
466+
}
467+
468+
// "enabled" must not appear as a top-level key in the values file
469+
valuesStr := string(valuesData)
470+
if strings.Contains(valuesStr, "enabled: true") {
471+
t.Errorf("enabled key leaked into Helm values:\n%s", valuesStr)
472+
}
473+
474+
// Other overrides should still be applied
475+
if !strings.Contains(valuesStr, "replicaCount") {
476+
t.Errorf("expected controller.replicaCount override in values, got:\n%s", valuesStr)
477+
}
478+
}
479+
480+
func boolPtr(b bool) *bool { return &b }
481+
338482
func TestMake_WithValueOverrides(t *testing.T) {
339483
cfg := config.NewConfig(
340484
config.WithValueOverrides(map[string]map[string]string{

pkg/cli/bundle.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ Package with explicit tag (overrides CLI version):
258258
&cli.StringSliceFlag{
259259
Name: "set",
260260
Usage: `Override values in generated bundle files
261-
(format: bundler:path.to.field=value, e.g., --set gpuoperator:gds.enabled=true)`,
261+
(format: component:path.to.field=value, e.g., --set gpuoperator:gds.enabled=true).
262+
Use the special 'enabled' key to include/exclude components at bundle time
263+
(e.g., --set awsebscsidriver:enabled=false to skip aws-ebs-csi-driver)`,
262264
Category: "Deployment",
263265
},
264266
&cli.StringSliceFlag{

recipes/overlays/eks.yaml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,14 @@ spec:
3232

3333
# EKS-specific components
3434
componentRefs:
35-
# Disabled by default: EKS clusters typically have EBS CSI driver
36-
# installed as a managed addon. Deploying it again causes ClusterRole
37-
# conflicts ("managed-by: EKS" vs "managed-by: Helm").
38-
# Users can re-enable with: --set aws-ebs-csi-driver.enabled=true
35+
# EBS CSI driver for persistent volume provisioning on EKS.
36+
# Enabled by default since EKS does not include it as a managed addon.
37+
# Disable with: --set awsebscsidriver:enabled=false (if installed as EKS addon)
3938
- name: aws-ebs-csi-driver
4039
type: Helm
4140
source: https://kubernetes-sigs.github.io/aws-ebs-csi-driver
4241
version: 2.55.0
4342
valuesFile: components/aws-ebs-csi-driver/values.yaml
44-
overrides:
45-
enabled: false
4643

4744
# Enable Prometheus persistent storage for EKS (requires EBS CSI driver)
4845
- name: kube-prometheus-stack

0 commit comments

Comments
 (0)