Skip to content

Commit 1d181d5

Browse files
committed
add debug output
Signed-off-by: Zoe Blevins <zblevins@nvidia.com>
1 parent fd3c61b commit 1d181d5

5 files changed

Lines changed: 92 additions & 2 deletions

File tree

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,8 @@ local-up:
718718
# Uses a pre-built config profile (default: local-superpod.yaml).
719719
# Override INSTALL_CONFIG with a copied installer config when needed.
720720
HELM_TIMEOUT ?= 15m
721+
HELM_DEBUG ?=
722+
HELM_DEBUG_FLAG = $(if $(HELM_DEBUG),--helm-debug,)
721723

722724
kind-up:
723725
@echo "🚀 Deploying NVIDIA Config Manager with installer to Kind (config: $(INSTALL_CONFIG))..."
@@ -733,7 +735,7 @@ kind-up:
733735
--install-envoy-gateway \
734736
--install-cnpg-operator \
735737
--install-cert-manager \
736-
--helm-timeout $(HELM_TIMEOUT)
738+
$(HELM_DEBUG_FLAG) --helm-timeout $(HELM_TIMEOUT)
737739

738740
# Create Kind cluster, deploy NVIDIA Config Manager, and populate with mock topology.
739741
# The topology job is declared in the config profile's content.run_after_deploy,

installer/src/nv_config_manager_installer/cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def generate_values(
180180
@click.option("--install-cert-manager", is_flag=True, help="Install cert-manager.")
181181
@click.option("--install-cnpg-operator", is_flag=True, help="Install CNPG operator.")
182182
@click.option("--helm-timeout", default="15m", help="Helm install/upgrade timeout.")
183+
@click.option("--helm-debug", is_flag=True, help="Enable Helm debug output during install/upgrade.")
183184
@click.option("--recreate-secrets", is_flag=True, help="Recreate existing K8s secrets.")
184185
@click.option("--dry-run", is_flag=True, help="Generate values only, skip helm install.")
185186
def deploy(
@@ -194,6 +195,7 @@ def deploy(
194195
install_cert_manager: bool,
195196
install_cnpg_operator: bool,
196197
helm_timeout: str,
198+
helm_debug: bool,
197199
recreate_secrets: bool,
198200
dry_run: bool,
199201
) -> None:
@@ -222,6 +224,7 @@ def deploy(
222224
install_cert_manager=install_cert_manager,
223225
install_cnpg_operator=install_cnpg_operator,
224226
helm_timeout=helm_timeout,
227+
helm_debug=helm_debug,
225228
recreate_secrets=recreate_secrets,
226229
dry_run=dry_run,
227230
)

installer/src/nv_config_manager_installer/deployer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class DeployOptions:
154154
install_cert_manager: bool = False
155155
install_cnpg_operator: bool = False
156156
helm_timeout: str = "15m"
157+
helm_debug: bool = False
157158
recreate_secrets: bool = False
158159
run_tests: bool = False
159160
dry_run: bool = False
@@ -2052,6 +2053,9 @@ def _helm_install(self) -> None:
20522053
self.options.helm_timeout,
20532054
]
20542055

2056+
if self.options.helm_debug:
2057+
helm_args.append("--debug")
2058+
20552059
if size_values.exists():
20562060
helm_args.extend(["-f", str(size_values)])
20572061

@@ -2069,7 +2073,8 @@ def _helm_install(self) -> None:
20692073
f"WARNING: observability enabled but {observability_values} not found"
20702074
)
20712075

2072-
self.callback.on_log(f"Running: helm upgrade --install {release} ...")
2076+
debug_suffix = " --debug" if "--debug" in helm_args else ""
2077+
self.callback.on_log(f"Running: helm upgrade --install {release}{debug_suffix} ...")
20732078
_run_logged(helm_args, step, self.callback, timeout=1200)
20742079
self._finish_step(step)
20752080

installer/tests/test_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,4 @@ def test_deploy_help(self):
110110
assert "--load-kind" in result.output
111111
assert "--dry-run" in result.output
112112
assert "--helm-timeout" in result.output
113+
assert "--helm-debug" in result.output

installer/tests/test_deployer.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,17 +366,20 @@ def test_defaults(self):
366366
assert opts.build_images is False
367367
assert opts.load_kind is False
368368
assert opts.helm_timeout == "15m"
369+
assert opts.helm_debug is False
369370
assert opts.dry_run is False
370371

371372
def test_custom_options(self):
372373
opts = DeployOptions(
373374
build_images=True,
374375
load_kind=True,
375376
kind_cluster="test-cluster",
377+
helm_debug=True,
376378
dry_run=True,
377379
)
378380
assert opts.build_images is True
379381
assert opts.kind_cluster == "test-cluster"
382+
assert opts.helm_debug is True
380383

381384

382385
class TestImageBuilds:
@@ -544,6 +547,82 @@ def fake_run_logged(cmd, step, callback, **kwargs):
544547
] in logged_commands
545548

546549

550+
class TestHelmInstall:
551+
def test_kind_deploy_does_not_enable_helm_debug_without_flag(self, monkeypatch, tmp_path):
552+
logged_commands: list[list[str]] = []
553+
554+
def fake_run_logged(cmd, step, callback, **kwargs):
555+
logged_commands.append(cmd)
556+
return MagicMock(returncode=0, stdout="", stderr="")
557+
558+
monkeypatch.setattr("nv_config_manager_installer.deployer._run_logged", fake_run_logged)
559+
560+
config = _make_config()
561+
config.cluster.airgapped = True
562+
deployer = Deployer(
563+
config,
564+
DeployOptions(load_kind=True, chart_dir="deploy/helm"),
565+
RecordingCallback(),
566+
)
567+
deployer._values_file = tmp_path / "values-generated.yaml"
568+
deployer._values_file.write_text("global: {}\n")
569+
570+
deployer._helm_install()
571+
572+
helm_cmd = next(
573+
cmd for cmd in logged_commands if cmd[:3] == ["helm", "upgrade", "--install"]
574+
)
575+
assert "--debug" not in helm_cmd
576+
577+
def test_helm_debug_can_be_enabled_without_kind(self, monkeypatch, tmp_path):
578+
logged_commands: list[list[str]] = []
579+
580+
def fake_run_logged(cmd, step, callback, **kwargs):
581+
logged_commands.append(cmd)
582+
return MagicMock(returncode=0, stdout="", stderr="")
583+
584+
monkeypatch.setattr("nv_config_manager_installer.deployer._run_logged", fake_run_logged)
585+
586+
config = _make_config()
587+
config.cluster.airgapped = True
588+
deployer = Deployer(
589+
config,
590+
DeployOptions(helm_debug=True, chart_dir="deploy/helm"),
591+
RecordingCallback(),
592+
)
593+
deployer._values_file = tmp_path / "values-generated.yaml"
594+
deployer._values_file.write_text("global: {}\n")
595+
596+
deployer._helm_install()
597+
598+
helm_cmd = next(
599+
cmd for cmd in logged_commands if cmd[:3] == ["helm", "upgrade", "--install"]
600+
)
601+
assert "--debug" in helm_cmd
602+
603+
def test_helm_debug_stays_off_by_default(self, monkeypatch, tmp_path):
604+
logged_commands: list[list[str]] = []
605+
606+
def fake_run_logged(cmd, step, callback, **kwargs):
607+
logged_commands.append(cmd)
608+
return MagicMock(returncode=0, stdout="", stderr="")
609+
610+
monkeypatch.setattr("nv_config_manager_installer.deployer._run_logged", fake_run_logged)
611+
612+
config = _make_config()
613+
config.cluster.airgapped = True
614+
deployer = Deployer(config, DeployOptions(chart_dir="deploy/helm"), RecordingCallback())
615+
deployer._values_file = tmp_path / "values-generated.yaml"
616+
deployer._values_file.write_text("global: {}\n")
617+
618+
deployer._helm_install()
619+
620+
helm_cmd = next(
621+
cmd for cmd in logged_commands if cmd[:3] == ["helm", "upgrade", "--install"]
622+
)
623+
assert "--debug" not in helm_cmd
624+
625+
547626
class TestContentHashing:
548627
def test_deterministic_hash(self):
549628
with tempfile.TemporaryDirectory() as d:

0 commit comments

Comments
 (0)