diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..2ea1426 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,7 @@ +# Repository Instructions + +## AIR Simulation Demo Credentials + +The AIR simulation intentionally uses hard-coded public demo credentials for ephemeral NVIDIA DSX Air demo nodes and generated demo services. These values are centralized in `installer/src/nv_config_manager_installer/air_sim/constants.py`, including `NVCM_BOX_PASSWORD`, `NVCM_SECRETS`, `NVCM_NETWORK_SECRETS`, and Nautobot demo user defaults. + +Do not report those AIR demo credentials as leaked production secrets. Continue to flag user/API credentials, private keys, NGC API keys, Git tokens, and any credentials outside the intentional AIR demo constants. diff --git a/.gitignore b/.gitignore index d705a5e..4c8c3bd 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,10 @@ local_dev.ini values-generated.yaml nv-config-manager-install.yaml installer/nv-config-manager-install.yaml +.nvcm-air-sim.yaml +installer/.nvcm-air-sim.yaml +nvcm-air-sim.yaml +installer/nvcm-air-sim.yaml # Spyder project settings .spyderproject diff --git a/Makefile b/Makefile index 8338a70..1441ca3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: help install dev test lint format clean docker-build docker-push ui-install ui-dev ui-build \ local-up local-down local-destroy local-status local-logs deploy kind-up kind-down topology install-cert \ - openapi openapi-check docs-assets docs-assets-check docs-format docs-lint docs-lint-fern docs-live docs-preview docs-publish docs-publish-in-ci docs-screenshots \ + openapi openapi-check docs-assets docs-assets-check docs-format docs-lint docs-lint-fern docs-live docs-preview docs-publish docs-publish-in-ci docs-screenshots docs-air-sim-screenshots \ obs-grafana obs-prometheus obs-loki obs-alloy obs-port-forward obs-port-forward-stop # Configuration @@ -97,6 +97,7 @@ help: @echo " make docs-preview - Generate a Fern docs preview" @echo " make docs-publish - Publish the Fern docs" @echo " make docs-screenshots - Regenerate installer TUI screenshots for docs" + @echo " make docs-air-sim-screenshots - Regenerate AIR sim TUI screenshots for docs" @echo "" @echo "Observability (local-dev stack only — requires observability to be enabled in installer config):" @echo " make obs-grafana - Port-forward Grafana -> http://localhost:3000 (admin/admin)" @@ -324,6 +325,9 @@ docs-publish-in-ci: docs-screenshots: cd installer && uv run python scripts/screenshot_tui.py +docs-air-sim-screenshots: + cd installer && uv run python scripts/screenshot_air_sim_tui.py + clean: find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true diff --git a/THIRD_PARTY_LICENSES.md b/THIRD_PARTY_LICENSES.md index 0021ded..d56a3e2 100644 --- a/THIRD_PARTY_LICENSES.md +++ b/THIRD_PARTY_LICENSES.md @@ -63,7 +63,7 @@ The following Python packages are dependencies of NVIDIA Config Manager. See `py | requests-aws4auth | MIT | https://github.com/tedder/requests-aws4auth | | brotli | MIT | https://github.com/google/brotli | | slack-sdk | MIT | https://github.com/slackapi/python-slack-sdk | -| air-sdk | MIT | https://pypi.org/project/air-sdk/ | +| nv-air-sdk | MIT | https://pypi.org/project/nv-air-sdk/ | | python-json-logger | BSD-2-Clause | https://github.com/madzak/python-json-logger | | py-markdown-table | MIT | https://github.com/hvalev/py-markdown-table | | mkdocs | BSD-2-Clause | https://github.com/mkdocs/mkdocs | diff --git a/components/nautobot/nautobot-nv-config-manager/nv_config_manager/management/__init__.py b/components/nautobot/nautobot-nv-config-manager/nv_config_manager/management/__init__.py index e69de29..bd904e8 100644 --- a/components/nautobot/nautobot-nv-config-manager/nv_config_manager/management/__init__.py +++ b/components/nautobot/nautobot-nv-config-manager/nv_config_manager/management/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/components/nautobot/nautobot-nv-config-manager/nv_config_manager/migrations/__init__.py b/components/nautobot/nautobot-nv-config-manager/nv_config_manager/migrations/__init__.py index e69de29..bd904e8 100644 --- a/components/nautobot/nautobot-nv-config-manager/nv_config_manager/migrations/__init__.py +++ b/components/nautobot/nautobot-nv-config-manager/nv_config_manager/migrations/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/components/nautobot/nv_config_manager_jobs/bootstrap/load_bootstrap_data.py b/components/nautobot/nv_config_manager_jobs/bootstrap/load_bootstrap_data.py index c9b6d7b..41b4029 100644 --- a/components/nautobot/nv_config_manager_jobs/bootstrap/load_bootstrap_data.py +++ b/components/nautobot/nv_config_manager_jobs/bootstrap/load_bootstrap_data.py @@ -106,6 +106,12 @@ def get_content_types(self, content_type_strings): self.logger.warning(f"Could not find content type: {ct_string}") return content_types + def add_content_types(self, obj, content_type_strings): + """Add content type memberships without removing existing memberships.""" + content_types = self.get_content_types(content_type_strings) + if content_types: + obj.content_types.add(*content_types) + def run(self): """Execute the job to load bootstrap data. @@ -314,10 +320,10 @@ def load_roles(self): }, ) - # Set content_types for both new and existing roles + # Add content_types for both new and existing roles without + # removing memberships created by other jobs. if "content_types" in role_data: - content_types = self.get_content_types(role_data["content_types"]) - role.content_types.set(content_types) + self.add_content_types(role, role_data["content_types"]) role.validated_save() if created: @@ -380,10 +386,9 @@ def load_tags(self): }, ) - # Set content_types for both new and existing tags + # Add content_types without removing memberships created by other jobs. if "content_types" in tag_data: - content_types = self.get_content_types(tag_data["content_types"]) - tag.content_types.set(content_types) + self.add_content_types(tag, tag_data["content_types"]) if created: self.logger.success( @@ -585,10 +590,9 @@ def load_location_types(self): }, ) - # Set content_types for both new and existing location types + # Add content_types without removing memberships created by other jobs. if "content_types" in lt_data: - content_types = self.get_content_types(lt_data["content_types"]) - lt.content_types.set(content_types) + self.add_content_types(lt, lt_data["content_types"]) if created: self.logger.success( @@ -709,10 +713,9 @@ def load_statuses(self): }, ) - # Set content_types for both new and existing statuses + # Add content_types without removing memberships created by other jobs. if "content_types" in status_data: - content_types = self.get_content_types(status_data["content_types"]) - status.content_types.set(content_types) + self.add_content_types(status, status_data["content_types"]) if created: self.logger.success( diff --git a/components/nautobot/nv_config_manager_jobs/data/config_contexts.yaml b/components/nautobot/nv_config_manager_jobs/data/config_contexts.yaml index 818009c..4e10e91 100644 --- a/components/nautobot/nv_config_manager_jobs/data/config_contexts.yaml +++ b/components/nautobot/nv_config_manager_jobs/data/config_contexts.yaml @@ -20,17 +20,6 @@ role: system-admin deployment_types: [all] -- name: Cumulus Linux Firmware 5.14.0 - description: Intended firmware version for Cumulus Linux switches - weight: 1000 - is_active: true - platforms: - - Cumulus Linux - data: - intended-firmware: - version: "5.14.0" - deployment_types: [all] - - name: NVIDIA Config Manager DHCP Custom Options description: Custom DHCP option definitions for KEA (cumulus-provision-url) weight: 1000 diff --git a/components/nautobot/tests/test_load_bootstrap_data.py b/components/nautobot/tests/test_load_bootstrap_data.py index e3254c4..cc859ca 100644 --- a/components/nautobot/tests/test_load_bootstrap_data.py +++ b/components/nautobot/tests/test_load_bootstrap_data.py @@ -361,7 +361,8 @@ def test_loads_role_with_content_types(self, tmp_path): job.load_roles() Role.objects.get_or_create.assert_called_once() - mock_role.content_types.set.assert_called_once() + mock_role.content_types.add.assert_called_once() + mock_role.content_types.set.assert_not_called() job.logger.success.assert_called() def test_missing_file(self, tmp_path): @@ -395,6 +396,25 @@ def test_creates_tag(self, tmp_path): Tag.objects.update_or_create.assert_called_once() job.logger.success.assert_called() + def test_adds_tag_content_types_without_replacing_existing(self, tmp_path): + mod = _import_module() + from django.contrib.contenttypes.models import ContentType + from nautobot.extras.models import Tag + + mock_tag = MagicMock() + mock_ct = MagicMock() + Tag.objects.update_or_create.return_value = (mock_tag, False) + ContentType.objects.get.return_value = mock_ct + + data = [{"name": "dhcp-subnet", "content_types": ["ipam.prefix"]}] + _write_yaml(tmp_path / "tags.yaml", data) + + job = _make_job(mod, tmp_path) + job.load_tags() + + mock_tag.content_types.add.assert_called_once_with(mock_ct) + mock_tag.content_types.set.assert_not_called() + # --------------------------------------------------------------------------- # load_platforms @@ -504,6 +524,51 @@ def test_creates_status(self, tmp_path): Status.objects.update_or_create.assert_called_once() + def test_adds_status_content_types_without_replacing_existing(self, tmp_path): + mod = _import_module() + from django.contrib.contenttypes.models import ContentType + from nautobot.extras.models import Status + + mock_status = MagicMock() + mock_ct = MagicMock() + Status.objects.update_or_create.return_value = (mock_status, False) + ContentType.objects.get.return_value = mock_ct + + data = [{"name": "Active", "content_types": ["ipam.prefix"]}] + _write_yaml(tmp_path / "statuses.yaml", data) + + job = _make_job(mod, tmp_path) + job.load_statuses() + + mock_status.content_types.add.assert_called_once_with(mock_ct) + mock_status.content_types.set.assert_not_called() + + +# --------------------------------------------------------------------------- +# load_location_types +# --------------------------------------------------------------------------- + + +class TestLoadLocationTypes: + def test_adds_location_type_content_types_without_replacing_existing(self, tmp_path): + mod = _import_module() + from django.contrib.contenttypes.models import ContentType + from nautobot.dcim.models import LocationType + + mock_location_type = MagicMock() + mock_ct = MagicMock() + LocationType.objects.get_or_create.return_value = (mock_location_type, False) + ContentType.objects.get.return_value = mock_ct + + data = [{"name": "Site", "content_types": ["dcim.device"]}] + _write_yaml(tmp_path / "location_types.yaml", data) + + job = _make_job(mod, tmp_path) + job.load_location_types() + + mock_location_type.content_types.add.assert_called_once_with(mock_ct) + mock_location_type.content_types.set.assert_not_called() + # --------------------------------------------------------------------------- # load_config_context_schemas @@ -534,6 +599,20 @@ def test_creates_schema(self, tmp_path): class TestLoadConfigContexts: + def test_bootstrap_config_contexts_do_not_set_intended_firmware(self): + data_path = Path(__file__).resolve().parents[1] / "nv_config_manager_jobs/data/config_contexts.yaml" + + with data_path.open() as f: + config_contexts = yaml.safe_load(f) + + firmware_contexts = [ + config_context.get("name") + for config_context in config_contexts + if "intended-firmware" in config_context.get("data", {}) + ] + + assert firmware_contexts == [] + def test_creates_config_context_with_roles_and_platforms(self, tmp_path): mod = _import_module() from nautobot.dcim.models import Platform diff --git a/deploy/airgapped/README.md b/deploy/airgapped/README.md index d154b30..016c3d1 100644 --- a/deploy/airgapped/README.md +++ b/deploy/airgapped/README.md @@ -56,8 +56,8 @@ cd nv-config-manager-airgapped-v1.0.0-amd64 --username '' \ --password-stdin ./installer/install.sh -./installer/nv-config-manager-installer init --config install.yaml -./installer/nv-config-manager-installer deploy install.yaml --chart-dir helm --image-source registry +./installer/nvcm-installer init --config install.yaml +./installer/nvcm-installer deploy install.yaml --chart-dir helm --image-source registry ``` The upload helper uses bundled Skopeo when present, then system Skopeo, then Docker in `--mode auto`. It uploads the packaged chart with `helm push` and writes `image-map.tsv` for image source-to-target mapping. Use `--plain-http` only for local HTTP registries such as `registry:2` test containers. When using Docker mode with an architecture-specific bundle, pass `--platform linux/amd64` or `--platform linux/arm64` so Docker pushes a single-platform manifest. The installer uses local dependency charts and manifests when `cluster.airgapped` is enabled in the config. diff --git a/deploy/airgapped/create-airgapped.sh b/deploy/airgapped/create-airgapped.sh index d695ac7..e7b937e 100755 --- a/deploy/airgapped/create-airgapped.sh +++ b/deploy/airgapped/create-airgapped.sh @@ -1399,14 +1399,15 @@ echo "Installing nv-config-manager-installer from vendored wheels (offline)..." "$SCRIPT_DIR"/nv_config_manager_installer-*.whl ln -sf "$VENV_DIR/bin/nv-config-manager-installer" "$SCRIPT_DIR/nv-config-manager-installer" +ln -sf "$VENV_DIR/bin/nvcm-installer" "$SCRIPT_DIR/nvcm-installer" echo "" -echo "nv-config-manager-installer is ready." +echo "nvcm-installer is ready. The longer nv-config-manager-installer command is also available." echo "" echo "Usage:" -echo " $SCRIPT_DIR/nv-config-manager-installer init --config install.yaml" -echo " $SCRIPT_DIR/nv-config-manager-installer validate install.yaml" -echo " $SCRIPT_DIR/nv-config-manager-installer deploy install.yaml --chart-dir ../helm --image-source registry" +echo " $SCRIPT_DIR/nvcm-installer init --config install.yaml" +echo " $SCRIPT_DIR/nvcm-installer validate install.yaml" +echo " $SCRIPT_DIR/nvcm-installer deploy install.yaml --chart-dir ../helm --image-source registry" echo "" echo "Or add to PATH:" echo " export PATH=\"$VENV_DIR/bin:\$PATH\"" @@ -1497,8 +1498,8 @@ The helper uploads images from images/image-list.txt and the packaged Helm chart ## Install From Bundle ./installer/install.sh - ./installer/nv-config-manager-installer init --config install.yaml - ./installer/nv-config-manager-installer deploy install.yaml --chart-dir helm --image-source registry + ./installer/nvcm-installer init --config install.yaml + ./installer/nvcm-installer deploy install.yaml --chart-dir helm --image-source registry Configure install.yaml image settings to point at the registry image paths written in image-map.tsv. If the target environment preloads node runtimes instead of using a registry, use manifests/load-airgapped-images.sh before deploying. BUNDLE_README @@ -1544,7 +1545,7 @@ print_summary() { echo " 3. cd nv-config-manager-airgapped-${VERSION}-/" echo " 4. Upload images and chart: ./upload-to-registry.sh --registry registry.example.com/nv-config-manager --chart-registry registry.example.com/nv-config-manager/charts --username '' --password-stdin" echo " 5. Install CLI: ./installer/install.sh" - echo " 6. Configure/deploy with ./installer/nv-config-manager-installer" + echo " 6. Configure/deploy with ./installer/nvcm-installer" echo "" } diff --git a/deploy/airgapped/manifests/load-airgapped-images.sh b/deploy/airgapped/manifests/load-airgapped-images.sh index 0271ee6..849262b 100644 --- a/deploy/airgapped/manifests/load-airgapped-images.sh +++ b/deploy/airgapped/manifests/load-airgapped-images.sh @@ -463,7 +463,7 @@ if [[ "$USE_DAEMONSET" == "true" ]]; then echo "Next steps:" echo " Deploy with --airgapped flag:" echo "" - echo " ./installer/nv-config-manager-installer deploy install.yaml --chart-dir helm --image-source registry \\" + echo " ./installer/nvcm-installer deploy install.yaml --chart-dir helm --image-source registry \\" echo " --airgapped \\" echo " --auto-generate-secrets --yes" echo "" @@ -656,7 +656,7 @@ echo "" echo "Next steps:" echo " Deploy with --airgapped flag (images are already loaded, no DaemonSet needed):" echo "" -echo " ./installer/nv-config-manager-installer deploy install.yaml --chart-dir helm --image-source registry \\" +echo " ./installer/nvcm-installer deploy install.yaml --chart-dir helm --image-source registry \\" echo " --airgapped \\" echo " --auto-generate-secrets --yes" echo "" diff --git a/deploy/helm/sample-eso-config.yaml b/deploy/helm/sample-eso-config.yaml index be2122f..5b17a8f 100644 --- a/deploy/helm/sample-eso-config.yaml +++ b/deploy/helm/sample-eso-config.yaml @@ -124,13 +124,6 @@ secrets: keys: password: "password" - # AIR service credentials - air: - path: "prod/air" - keys: - ssaClientId: "ssa_client_id" - ssaClientSecret: "ssa_client_secret" - # Jira service account credentials (used by DiagnosticsWorkflow) jira: path: "prod/jira" diff --git a/deploy/helm/sample-nv-config-manager.ini b/deploy/helm/sample-nv-config-manager.ini index 41bca48..6ec5ba7 100644 --- a/deploy/helm/sample-nv-config-manager.ini +++ b/deploy/helm/sample-nv-config-manager.ini @@ -122,13 +122,6 @@ local = false server = https://elasticsearch.example.com domain = nv-config-manager -[temporal.air] -ssa_client_id = -ssa_client_secret = -org_id = org-12345 -air_api_url = https://air.nvidia.com/api -air_node_user = nv-config-manager - # ----------------------------------------------------------------- # Temporal API Configuration (REST API for workflow operations) # ----------------------------------------------------------------- @@ -136,7 +129,6 @@ air_node_user = nv-config-manager # CORS origins allowed to make cross-origin requests with credentials # Comma-separated list of allowed origins (e.g., "https://config-manager.example.com") cors_origins = https://config-manager.example.com -air_node_password = # ============================================================================= # DEVICE / NETWORK CREDENTIALS diff --git a/deploy/helm/templates/_vault-agent.tpl b/deploy/helm/templates/_vault-agent.tpl index 1c894fc..c79448f 100644 --- a/deploy/helm/templates/_vault-agent.tpl +++ b/deploy/helm/templates/_vault-agent.tpl @@ -219,10 +219,6 @@ Consul-template prelude: declare $secret vars for nv-config-manager.ini (same KV {{- printf "{{- $network := secret %q -}}\n" (printf "%s/data/%s" $m $np) -}} {{- $rf := include "nv-config-manager.vault.secretPath" (dict "root" $root "secret" "redfish") -}} {{- printf "{{- $redfish := secret %q -}}\n" (printf "%s/data/%s" $m $rf) -}} -{{- if $root.Values.temporal.air.orgId -}} -{{- $ap := include "nv-config-manager.vault.secretPath" (dict "root" $root "secret" "air") -}} -{{- printf "{{- $air := secret %q -}}\n" (printf "%s/data/%s" $m $ap) -}} -{{- end -}} {{- end -}} {{- if $root.Values.networkDhcp.enabled -}} {{- printf "{{- $leasedb := secret %q -}}\n" (printf "%s/data/%s" $m $pg) -}} @@ -372,16 +368,6 @@ nv-config-manager.ini body (consul-template): must stay in sync with vault-secre domain = {{ $root.Values.externalServices.elasticsearch.domain }} {{- end }} - {{- if $root.Values.temporal.air.orgId }} - [temporal.air] - ssa_client_id = {{ include "nv-config-manager.vaultAgent.ctKv2Key" (dict "var" "air" "key" (include "nv-config-manager.vault.keyName" (dict "root" $root "secret" "air" "key" "ssaClientId"))) }} - ssa_client_secret = {{ include "nv-config-manager.vaultAgent.ctKv2Key" (dict "var" "air" "key" (include "nv-config-manager.vault.keyName" (dict "root" $root "secret" "air" "key" "ssaClientSecret"))) }} - org_id = {{ $root.Values.temporal.air.orgId }} - air_api_url = {{ $root.Values.temporal.air.airApiUrl }} - air_node_user = {{ $root.Values.temporal.air.airNodeUser }} - air_node_password = {{ $root.Values.temporal.air.airNodePassword }} - {{- end }} - # ----------------------------------------------------------------- # Temporal API Configuration (REST API for workflow operations) # ----------------------------------------------------------------- diff --git a/deploy/helm/templates/kubernetes-secrets.yaml b/deploy/helm/templates/kubernetes-secrets.yaml index aa7333c..5584b20 100644 --- a/deploy/helm/templates/kubernetes-secrets.yaml +++ b/deploy/helm/templates/kubernetes-secrets.yaml @@ -37,9 +37,6 @@ # lenovo-nv-config-manager-password, bluefield-default-user, # bluefield-default-password, bluefield-nv-config-manager-password # -# Optional: -# - air-creds: ssa-client-id, ssa-client-secret (if temporal.air.orgId set) -# # KEDA Autoscaling (if renderService.autoscaling.prometheus.tls.enabled): # - keda-prometheus-certs: ca.crt, tls.crt, tls.key # (Standalone secret, not assembled into nv-config-manager.ini. @@ -204,16 +201,6 @@ spec: domain = {{ .Values.externalServices.elasticsearch.domain }} {{- end }} - {{- if .Values.temporal.air.orgId }} - [temporal.air] - ssa_client_id = $(cat /secrets/air-creds/ssa-client-id) - ssa_client_secret = $(cat /secrets/air-creds/ssa-client-secret) - org_id = {{ .Values.temporal.air.orgId }} - air_api_url = {{ .Values.temporal.air.airApiUrl }} - air_node_user = {{ .Values.temporal.air.airNodeUser }} - air_node_password = {{ .Values.temporal.air.airNodePassword }} - {{- end }} - # ----------------------------------------------------------------- # Temporal API Configuration (REST API for workflow operations) # ----------------------------------------------------------------- @@ -390,11 +377,6 @@ spec: mountPath: /secrets/redfish-creds readOnly: true {{- end }} - {{- if .Values.temporal.air.orgId }} - - name: air-creds - mountPath: /secrets/air-creds - readOnly: true - {{- end }} {{- end }} {{- if .Values.networkDhcp.enabled }} # DHCP secrets @@ -440,11 +422,6 @@ spec: secret: secretName: redfish-creds {{- end }} - {{- if .Values.temporal.air.orgId }} - - name: air-creds - secret: - secretName: air-creds - {{- end }} {{- end }} {{- if .Values.networkDhcp.enabled }} # DHCP secrets diff --git a/deploy/helm/templates/vault-secrets.yaml b/deploy/helm/templates/vault-secrets.yaml index 257c953..26badaf 100644 --- a/deploy/helm/templates/vault-secrets.yaml +++ b/deploy/helm/templates/vault-secrets.yaml @@ -356,32 +356,6 @@ spec: property: {{ include "nv-config-manager.vault.keyName" (dict "root" . "secret" "network" "key" "password") }} refreshInterval: 300s -# AIR credentials (optional) -{{- if .Values.temporal.air.orgId }} ---- -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: air-creds-eso - namespace: {{ .Values.global.namespace }} -spec: - secretStoreRef: - name: {{ $vaultSecretStoreName }} - kind: SecretStore - target: - name: air-creds - creationPolicy: Owner - data: - - secretKey: ssa-client-id - remoteRef: - key: "{{ include "nv-config-manager.vault.secretPath" (dict "root" . "secret" "air") }}" - property: {{ include "nv-config-manager.vault.keyName" (dict "root" . "secret" "air" "key" "ssaClientId") }} - - secretKey: ssa-client-secret - remoteRef: - key: "{{ include "nv-config-manager.vault.secretPath" (dict "root" . "secret" "air") }}" - property: {{ include "nv-config-manager.vault.keyName" (dict "root" . "secret" "air" "key" "ssaClientSecret") }} - refreshInterval: 300s -{{- end }} {{- end }} # Redfish/BMC credentials (optional) @@ -868,16 +842,6 @@ spec: domain = {{ .Values.externalServices.elasticsearch.domain }} {{- end }} - {{- if .Values.temporal.air.orgId }} - [temporal.air] - ssa_client_id = {{ "{{ index (fromJson .air_data) \"" }}{{ include "nv-config-manager.vault.keyName" (dict "root" . "secret" "air" "key" "ssaClientId") }}{{ "\" }}" }} - ssa_client_secret = {{ "{{ index (fromJson .air_data) \"" }}{{ include "nv-config-manager.vault.keyName" (dict "root" . "secret" "air" "key" "ssaClientSecret") }}{{ "\" }}" }} - org_id = {{ .Values.temporal.air.orgId }} - air_api_url = {{ .Values.temporal.air.airApiUrl }} - air_node_user = {{ .Values.temporal.air.airNodeUser }} - air_node_password = {{ .Values.temporal.air.airNodePassword }} - {{- end }} - # ----------------------------------------------------------------- # Temporal API Configuration (REST API for workflow operations) # ----------------------------------------------------------------- @@ -1043,12 +1007,6 @@ spec: remoteRef: key: "{{ include "nv-config-manager.vault.secretPath" (dict "root" . "secret" "network") }}" - # Fetch air secrets (if configured) - {{- if .Values.temporal.air.orgId }} - - secretKey: air_data - remoteRef: - key: "{{ include "nv-config-manager.vault.secretPath" (dict "root" . "secret" "air") }}" - {{- end }} {{- end }} {{- if .Values.temporal.redfish.enabled }} diff --git a/deploy/helm/values-rbac-open.yaml b/deploy/helm/values-rbac-open.yaml index f61bddb..92a79d7 100644 --- a/deploy/helm/values-rbac-open.yaml +++ b/deploy/helm/values-rbac-open.yaml @@ -114,26 +114,6 @@ rbac: - all execute_roles: - all - - name: AIRCreateSimulationWorkflow - read_roles: - - all - execute_roles: - - all - - name: AIRCreateBlueprintSimulationWorkflow - read_roles: - - all - execute_roles: - - all - - name: AIRDeleteSimulationWorkflow - read_roles: - - all - execute_roles: - - all - - name: AIRValidateSiteWorkflow - read_roles: - - all - execute_roles: - - all - name: ValidateHardwareWorkflow read_roles: - all @@ -179,4 +159,3 @@ rbac: - all execute_roles: - all - diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml index 1af7adc..7377776 100644 --- a/deploy/helm/values.yaml +++ b/deploy/helm/values.yaml @@ -221,7 +221,6 @@ secrets: # redfish-creds → lenovo-default-user, lenovo-default-password, # lenovo-nv-config-manager-password, bluefield-default-user, # bluefield-default-password, bluefield-nv-config-manager-password - # air-creds → ssa-client-id, ssa-client-secret # bmc-creds → bmc-creds.json (file body; ESO pulls bmc_data then templates) # -network-secrets → config-secrets.ini (single key; helper networkSecretsName) # -ini → nv-config-manager.ini (single key; helper iniSecretName) @@ -378,11 +377,6 @@ secrets: keys: token: "token" # Note: DHCP lease database password comes from postgres path (dhcpPassword key) - air: - path: "" # Required: e.g., "my-org/nv-config-manager/dev/air" - keys: - ssaClientId: "ssa_client_id" - ssaClientSecret: "ssa_client_secret" nautobotApp: path: "" # Required: e.g., "my-org/nv-config-manager/dev/nautobot-app" keys: @@ -1759,12 +1753,6 @@ temporal: # value: true # constraints: [] - # AIR integration - air: - orgId: "" - airApiUrl: "" - airNodeUser: "" - airNodePassword: "" # Redfish / BMC workflows # Enable only if Redfish provisioning workflows are in use. # When disabled, redfish-creds secret is not required. diff --git a/deploy/scripts/setup-vm-prereqs.sh b/deploy/scripts/setup-vm-prereqs.sh index d807011..af9dac5 100755 --- a/deploy/scripts/setup-vm-prereqs.sh +++ b/deploy/scripts/setup-vm-prereqs.sh @@ -1666,13 +1666,6 @@ secrets: keys: password: "password" - # AIR service credentials - air: - path: ${ENVIRONMENT_PATH}/air - keys: - ssaClientId: "ssa_client_id" - ssaClientSecret: "ssa_client_secret" - # UFM API credentials ufm: path: ${ENVIRONMENT_PATH}/ufm diff --git a/development/air_sim/configs/air_trial.yaml b/development/air_sim/configs/air_trial.yaml new file mode 100644 index 0000000..a78f196 --- /dev/null +++ b/development/air_sim/configs/air_trial.yaml @@ -0,0 +1,28 @@ +topology_path: '' +mock_blueprint: air_trial +deployment_name: demo +simulation_name: nv-config-manager-air-trial-demo +oob_server_name: oob-mgmt-server +server_mode: use-existing +attach_switch: '' +attach_interface: '' +auto_configure: true +git_token: '' +config_manager_repo: https://github.com/NVIDIA/nv-config-manager +size: small +config_manager_ref: main +cumulus_version: '' +deploy: true +run_mock_topology_job: true +mock_topology_path: development/mock_topology +template_plugin_paths: +- development/air_sim/template_plugins/superpod-template-plugin +extra_job_paths: [] +extra_run_after_deploy: [] +use_internal: false +org_id: '' +ngc_api_key: '' +wait_timeout: 1800 +deploy_timeout: 3600 +no_aggressive_dhcp: false +no_reset_before_dhcp: false diff --git a/development/air_sim/template_plugins/superpod-template-plugin/README.md b/development/air_sim/template_plugins/superpod-template-plugin/README.md new file mode 100644 index 0000000..0789f53 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/README.md @@ -0,0 +1,10 @@ +# AIR Demo Template Plugin + +This sample plugin provides template entrypoints for the public AIR demo role names: +`OOB-HLEAF`, `OOB-MLEAF`, `TAN-BLEAF`, `TAN-HLEAF`, `TAN-SLEAF`, +`OOB-SPINE`, `TAN-SPINE`, `CIN-LEAF`, and `CIN-SPINE`. + +Each role has a dedicated Cumulus Linux 5.16.1 entrypoint for the demo +topology. The templates inherit only from `cumulus-linux/role_common` in +`nv-config-manager` and keep demo-specific interface, bridge, router, and +VRF content inside this plugin. diff --git a/development/air_sim/template_plugins/superpod-template-plugin/pyproject.toml b/development/air_sim/template_plugins/superpod-template-plugin/pyproject.toml new file mode 100644 index 0000000..ee5d05d --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "nv-config-manager-superpod-templates" +version = "0.0.1" +description = "Dedicated templates for the public SuperPOD demo topology" +requires-python = ">=3.11" +dependencies = [] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points."nv_config_manager_templates.plugins"] +nv_config_manager_superpod_templates = "nv_config_manager_superpod_templates" diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/__init__.py b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/__init__.py new file mode 100644 index 0000000..fdaef8e --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/__init__.py @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Template plugin for the public SuperPOD demo topology.""" + +from pathlib import Path + + +def get_template_paths() -> list[Path]: + """Return additional template search paths.""" + return [Path(__file__).parent / "templates"] + + +def get_custom_filters() -> dict: + """Return custom Jinja2 filters.""" + return {} + + +def get_graphql_queries() -> dict: + """Return additional GraphQL queries.""" + return {} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..c488967 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-leaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = true %} +{% set superpod_vtep = true %} +{% set superpod_cin_leaf = true %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..3341a2a --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/cin-spine/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = true %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-hleaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..e043895 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-mleaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/superpod-demo-common/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/oob-spine/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/bridge.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/bridge.j2 new file mode 100644 index 0000000..94d18ac --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/bridge.j2 @@ -0,0 +1,26 @@ +{% set vlan_interfaces = device_data|interfaces(prefix="vlan") %} +{% if vlan_interfaces %} + bridge: + domain: + br_default: + stp: + priority: 28672 + state: + up: {} + vlan: +{% set vxlans = device_data.data.vxlans|default([]) %} +{% for vlan_intf in vlan_interfaces %} +{% set mapped_vxlans = [] %} +{% for vxlan in vxlans %} +{% if vxlan.vni_type|default("")|lower == "l2" and vxlan.vlan and vxlan.vlan.vid == vlan_intf.vlan_number %} +{% set _ = mapped_vxlans.append(vxlan) %} +{% endif %} +{% endfor %} + '{{ vlan_intf.vlan_number }}':{% if not mapped_vxlans %} {}{% else %} + vni: +{% for vxlan in mapped_vxlans %} + '{{ vxlan.vnid }}': {} +{% endfor %} +{% endif %} +{% endfor %} +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/evpn.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/evpn.j2 new file mode 100644 index 0000000..de2b507 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/evpn.j2 @@ -0,0 +1,31 @@ +{% set spectrumx = superpod_spectrumx|default(false) %} +{% set cin_leaf = superpod_cin_leaf|default(false) %} +{% if spectrumx %} + evpn: + state: enabled +{% if cin_leaf %} +{% set attached_vrfs = device_data|attached_vrfs|rejectattr("vni", "none")|sort(attribute="vni") %} +{% if attached_vrfs %} + vni: +{% for vrf in attached_vrfs %} + '{{ vrf.vni }}': + rd: {{ device_data|router_id }}:{{ vrf.vni }} +{% if vrf.export_targets or vrf.import_targets %} + route-target: +{% if vrf.export_targets %} + export: +{% for target in vrf.export_targets %} + {{ target }}: {} +{% endfor %} +{% endif %} +{% if vrf.import_targets %} + import: +{% for target in vrf.import_targets %} + {{ target }}: {} +{% endfor %} +{% endif %} +{% endif %} +{% endfor %} +{% endif %} +{% endif %} +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/interface.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/interface.j2 new file mode 100644 index 0000000..366e220 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/interface.j2 @@ -0,0 +1,97 @@ + interface: +{% set spectrumx = superpod_spectrumx|default(false) %} +{% set eth0 = device_data|interface_by_name("eth0", false) %} +{% if eth0 %} + eth0: +{% if eth0.description %} + description: {{ eth0.description }} +{% endif %} +{% if eth0.primary_ipv4 %} + ipv4: + address: + {{ eth0.primary_ipv4 }}: {} + gateway: + {{ eth0.primary_ipv4|gateway }}: {} +{% endif %} + type: eth + vrf: mgmt +{% endif %} +{% set lo = device_data|interface_by_name("lo", false) %} +{% if lo %} + lo: +{% if lo.description %} + description: {{ lo.description }} +{% endif %} +{% if lo.primary_ipv4 %} + ipv4: + address: + {{ lo.primary_ipv4 }}: {} +{% endif %} + type: loopback +{% endif %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% set breakout = device_data|breakout_count(intf.name) if not intf.parent and not intf.primary_ipv4 and not intf.untagged_vlan else 0 %} + {{ intf.name }}: +{% if intf.description %} + description: {{ intf.description }} +{% endif %} +{% if intf.primary_ipv4 %} + ipv4: + address: + {{ intf.primary_ipv4 }}: {} +{% endif %} +{% if intf.primary_ipv6 %} + ipv6: + address: + {{ intf.primary_ipv6 }}: {} +{% endif %} +{% if intf.vrf != "default" %} + vrf: {{ intf.vrf }} +{% endif %} +{% if intf.untagged_vlan %} + bridge: + domain: + br_default: + access: {{ intf.untagged_vlan }} +{% endif %} +{% if spectrumx %} + qos: + pfc-watchdog: + state: enable +{% endif %} + link: +{% if breakout %} + breakout: + {{ breakout }}x: {} +{% else %} + mtu: {{ intf.mtu or 9216 }} + state: +{% if intf.enabled %} + up: {} +{% else %} + down: {} +{% endif %} +{% endif %} +{% if spectrumx and intf.connected_interface and intf.connected_interface.device.peer_group in ["CIN-LEAF", "CIN-SPINE"] %} + router: + adaptive-routing: + state: enabled +{% endif %} + type: swp +{% endfor %} +{% for vlan in device_data|interfaces(prefix="vlan") %} + {{ vlan.name }}: +{% if vlan.description %} + description: {{ vlan.description }} +{% endif %} +{% if vlan.primary_ipv4 %} + ipv4: + address: + {{ vlan.primary_ipv4 }}: {} +{% endif %} +{% if vlan.vrf != "default" %} + vrf: {{ vlan.vrf }} +{% endif %} + type: svi + vlan: {{ vlan.vlan_number }} +{% endfor %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/nve.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/nve.j2 new file mode 100644 index 0000000..77fd496 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/nve.j2 @@ -0,0 +1,14 @@ +{% if superpod_vtep|default(false) %} + nve: + vxlan: + arp-nd-suppress: enabled + decapsulation: + dscp: + action: preserve + encapsulation: + dscp: + action: copy + source: + address: {{ device_data|router_id }} + state: enabled +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/qos.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/qos.j2 new file mode 100644 index 0000000..b1a19e1 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/qos.j2 @@ -0,0 +1,18 @@ +{% if superpod_spectrumx|default(false) %} + qos: + pfc: + default-global: + port-buffer: 363000 + switch-priority: + '3': {} + xoff-threshold: 75000 + xon-threshold: 75000 + roce: + state: enabled + mode: lossless + traffic-pool: + default-lossy: + memory-percent: 10 + roce-lossless: + memory-percent: 90 +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/router.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/router.j2 new file mode 100644 index 0000000..478a4cd --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/router.j2 @@ -0,0 +1,126 @@ +{% set spectrumx = superpod_spectrumx|default(false) %} +{% set routed_swp = [] %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.primary_ipv4 %} +{% set _ = routed_swp.append(intf.name) %} +{% endif %} +{% endfor %} +{% if routed_swp %} + router: + bfd: + state: enabled + profile: + default-bfd: + detect-multiplier: 3 + min-rx-interval: 300 + min-tx-interval: 300 + default-bfd-multihop: + detect-multiplier: 3 + min-rx-interval: 1000 + min-tx-interval: 1000 +{% if spectrumx %} + adaptive-routing: + state: enabled + profile: profile-custom +{% endif %} + bgp: + autonomous-system: {{ device_data|asn|asplain }} + graceful-restart: + mode: full +{% if spectrumx %} + path-selection-deferral-time: 180 + restart-time: 180 + stale-routes-time: 180 +{% endif %} + router-id: {{ device_data|router_id }} + state: enabled +{% if spectrumx %} + policy: + prefix-list: + SUPERPOD-CIN-HOST-LINKS: + rule: +{% set host_ns = namespace(index=0) %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.vrf != "default" and intf.primary_ipv4 %} +{% set host_ns.index = host_ns.index + 1 %} + '{{ host_ns.index * 10 }}': + action: permit + match: + {{ intf.primary_ipv4|network_address }}: + max-prefix-len: 31 + min-prefix-len: 31 +{% endif %} +{% endfor %} + '9999': + action: deny + match: + any: {} + type: ipv4 + SUPERPOD-CIN-LOOPBACK: + rule: + '10': + action: permit + match: + {{ device_data|router_id }}/32: + max-prefix-len: 32 + min-prefix-len: 32 + '9999': + action: deny + match: + any: {} + type: ipv4 + route-map: + SUPERPOD-CIN-HOSTS: + rule: + '10': + action: + permit: {} + match: + ip-prefix-list: SUPERPOD-CIN-HOST-LINKS + type: ipv4 + '9999': + action: + deny: {} + match: + type: ipv4 + SUPERPOD-CIN-UNDERLAY: + rule: + '10': + action: + permit: {} + match: + ip-prefix-list: SUPERPOD-CIN-LOOPBACK + type: ipv4 + '9999': + action: + deny: {} + match: + type: ipv4 + SUPERPOD-WECMP-CUMULATIVE: + rule: + '10': + action: + permit: {} + set: + ext-community-bw: cumulative + '9999': + action: + deny: {} + match: + type: ipv4 + SUPERPOD-WECMP-ORIGIN: + rule: + '10': + action: + permit: {} + set: + ext-community-bw: multipaths + '9999': + action: + deny: {} + match: + type: ipv4 +{% else %} + policy: {} +{% endif %} +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/service.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/service.j2 new file mode 100644 index 0000000..3337818 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/service.j2 @@ -0,0 +1,38 @@ +{% extends "cumulus-linux/role_common/5.16.1/include/service.j2" %} + +{% block dhcp_relay %} +{% set dhcp_helpers = device_data|dhcp_servers("nvcm", true) %} +{% set downstream = [] %} +{% set upstream = [] %} +{% for intf in device_data|interfaces(prefix="vlan") %} +{% if intf.primary_ipv4 %} +{% set _ = downstream.append(intf.name) %} +{% endif %} +{% endfor %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.primary_ipv4 %} +{% set _ = upstream.append(intf.name) %} +{% endif %} +{% endfor %} +{% if dhcp_helpers and downstream and upstream %} + dhcp-relay: + default: + downstream-interface: +{% for intf_name in downstream|sort %} + {{ intf_name }}: + server-group-name: group_1 +{% endfor %} + server-group: + group_1: + server: +{% for helper_ip in dhcp_helpers %} + {{ helper_ip }}: {} +{% endfor %} + upstream-interface: +{% for intf_name in upstream|sort %} + {{ intf_name }}: {} +{% endfor %} +{% else %} + dhcp-relay: {} +{% endif %} +{% endblock dhcp_relay %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/system.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/system.j2 new file mode 100644 index 0000000..118cdc0 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/system.j2 @@ -0,0 +1,7 @@ +{% extends "cumulus-linux/role_common/5.16.1/include/system.j2" %} + +{% block config_snippet %} +{% if superpod_spectrumx|default(false) %} +{% include "cumulus-linux/role_common/5.16.1/include/ar_profile_snippet.j2" %} +{% endif %} +{% endblock config_snippet %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/vrf.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/vrf.j2 new file mode 100644 index 0000000..aad4e14 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/superpod-demo-common/include/vrf.j2 @@ -0,0 +1,145 @@ +{% set spectrumx = superpod_spectrumx|default(false) %} +{% set cin_leaf = superpod_cin_leaf|default(false) %} +{% set routed_swp = [] %} +{% set management_prefixes = device_data|management_prefixes %} +{% set management_next_hops = [] %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.primary_ipv4 %} +{% set _ = routed_swp.append(intf.name) %} +{% endif %} +{% if intf.connected_interface and intf.connected_interface.device.role == "OOB-Server" and intf.connected_interface.device.peer_ipv4 and intf.connected_interface.device.peer_ipv4 not in management_next_hops %} +{% set _ = management_next_hops.append(intf.connected_interface.device.peer_ipv4) %} +{% endif %} +{% endfor %} + vrf: +{% if routed_swp or spectrumx or (management_prefixes and management_next_hops) %} + default: +{% if spectrumx %} + evpn: + state: enabled +{% endif %} +{% if routed_swp or (management_prefixes and management_next_hops) %} + router: +{% if routed_swp %} + bgp: + address-family: + ipv4-unicast: + state: enabled + redistribute: + connected: + state: enabled +{% if spectrumx %} + route-map: SUPERPOD-CIN-UNDERLAY + l2vpn-evpn: + state: enabled +{% endif %} + state: enabled +{% set ns = namespace(has_neighbors=false) %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.vrf == "default" and intf.connected_interface and intf.connected_interface.device.peer_ipv4 and intf.connected_interface.device.asn %} +{% set ns.has_neighbors = true %} +{% endif %} +{% endfor %} +{% if ns.has_neighbors %} + neighbor: +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.vrf == "default" and intf.connected_interface and intf.connected_interface.device.peer_ipv4 and intf.connected_interface.device.asn %} +{% set neighbor = intf.connected_interface.device %} + {{ neighbor.peer_ipv4 }}: + description: {{ neighbor.name }} +{% if spectrumx %} + peer-group: {{ neighbor.peer_group }} +{% endif %} + remote-as: {{ neighbor.asn|asplain }} + type: numbered +{% endif %} +{% endfor %} +{% endif %} + path-selection: + multipath: + aspath-ignore: enabled +{% if spectrumx and ns.has_neighbors %} + peer-group: +{% set peer_groups = [] %} +{% for intf in device_data|interfaces(prefix="swp") %} +{% if intf.vrf == "default" and intf.connected_interface and intf.connected_interface.device.peer_ipv4 and intf.connected_interface.device.asn and intf.connected_interface.device.peer_group not in peer_groups %} +{% set _ = peer_groups.append(intf.connected_interface.device.peer_group) %} +{% endif %} +{% endfor %} +{% for peer_group in peer_groups|sort %} + {{ peer_group }}: + address-family: + ipv4-unicast: + community-advertise: {} +{% if peer_group == "CIN-LEAF" %} + policy: + outbound: + route-map: SUPERPOD-WECMP-CUMULATIVE +{% elif peer_group == "CIN-SPINE" %} + policy: + outbound: + route-map: SUPERPOD-WECMP-ORIGIN +{% endif %} + soft-reconfiguration: enabled + l2vpn-evpn: + state: enabled + bfd: + profile: default-bfd +{% endfor %} +{% endif %} +{% endif %} +{% if management_prefixes and management_next_hops %} + static: +{% for prefix in management_prefixes|sort %} + {{ prefix }}: + address-family: ipv4-unicast + via: +{% for next_hop in management_next_hops|sort %} + {{ next_hop }}: + type: ipv4-address +{% endfor %} +{% endfor %} +{% endif %} +{% endif %} +{% endif %} +{% set eth0 = device_data|interface_by_name("eth0", false) %} +{% if eth0 and eth0.primary_ipv4 %} + mgmt: + router: + static: + 0.0.0.0/0: + address-family: ipv4-unicast + via: + {{ eth0.primary_ipv4|gateway }}: + type: ipv4-address +{% endif %} +{% if cin_leaf %} +{% for vrf in device_data|attached_vrfs %} + {{ vrf.name }}: + evpn: + state: enabled + vni: + '{{ vrf.vni }}': {} + router: + bgp: + address-family: + ipv4-unicast: + state: enabled + redistribute: + connected: + state: enabled + route-map: SUPERPOD-CIN-HOSTS + route-export: + to-evpn: + state: enabled + l2vpn-evpn: + state: enabled + autonomous-system: {{ device_data|asn|asplain }} + path-selection: + multipath: + aspath-ignore: enabled + rd: {{ device_data|router_id }}:{{ vrf.vni }} + router-id: {{ device_data|router_id }} + state: enabled +{% endfor %} +{% endif %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-bleaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-hleaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-sleaf/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/boot-script.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/boot-script.j2 new file mode 100644 index 0000000..d5530f3 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/boot-script.j2 @@ -0,0 +1 @@ +{% extends "cumulus-linux/role_common/base/boot-script.j2" %} diff --git a/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/startup.yaml.j2 b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/startup.yaml.j2 new file mode 100644 index 0000000..df9d724 --- /dev/null +++ b/development/air_sim/template_plugins/superpod-template-plugin/src/nv_config_manager_superpod_templates/templates/cumulus-linux/tan-spine/5.16.1/entrypoint/startup.yaml.j2 @@ -0,0 +1,40 @@ +{% set superpod_spectrumx = false %} +{% set superpod_vtep = false %} +{% set superpod_cin_leaf = false %} +{% extends "cumulus-linux/role_common/base/startup.yaml.j2" %} + +{% block bridge %} +{% include "cumulus-linux/superpod-demo-common/include/bridge.j2" %} +{% endblock bridge %} + +{% block evpn %} +{% include "cumulus-linux/superpod-demo-common/include/evpn.j2" %} +{% endblock evpn %} + +{% block interfaces %} +{% include "cumulus-linux/superpod-demo-common/include/interface.j2" %} +{% endblock interfaces %} + +{% block nve %} +{% include "cumulus-linux/superpod-demo-common/include/nve.j2" %} +{% endblock nve %} + +{% block qos %} +{% include "cumulus-linux/superpod-demo-common/include/qos.j2" %} +{% endblock qos %} + +{% block router %} +{% include "cumulus-linux/superpod-demo-common/include/router.j2" %} +{% endblock router %} + +{% block service %} +{% include "cumulus-linux/role_common/5.16.1/include/service.j2" %} +{% endblock service %} + +{% block system %} +{% include "cumulus-linux/superpod-demo-common/include/system.j2" %} +{% endblock system %} + +{% block vrf %} +{% include "cumulus-linux/superpod-demo-common/include/vrf.j2" %} +{% endblock vrf %} diff --git a/development/mock_topology/context/__init__.py b/development/mock_topology/context/__init__.py index 2fe66a0..0314235 100644 --- a/development/mock_topology/context/__init__.py +++ b/development/mock_topology/context/__init__.py @@ -79,6 +79,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self._load_device_types() self._load_aggregate_prefixes() self._load_prefixes() + self._assign_ip_address_parent_prefixes() self._load_vrfs() self._load_vlans() self._load_overlays() @@ -102,7 +103,8 @@ def _load_devices(self) -> None: device = data.get("device") if not device: continue - self._ensure_mock_device_serial(device) + self._ensure_mock_device_serial(device, json_file) + self._require_cumulus_eth0_mac(device, json_file) self.json["devices"].append(device) self.json["overlay_payloads"].append( { @@ -115,13 +117,42 @@ def _load_devices(self) -> None: print(f"Warning: Could not load {json_file}: {e}") @staticmethod - def _ensure_mock_device_serial(device: dict[str, Any]) -> None: - """Give mock devices stable serials so ZTP/DHCP can identify them.""" + def _ensure_mock_device_serial(device: dict[str, Any], source: Path) -> None: + """Require Cumulus serials and give other mock devices stable serials.""" if device.get("serial"): return + platform_name = (device.get("platform") or {}).get("name", "") + if "Cumulus" in platform_name: + device_name = device.get("name") or source.name + raise ValueError( + f"Mock Cumulus device {device_name} in {source} must define " + "serial for ZTP validation" + ) device_name = device.get("name") or str(device.get("id") or "device") device["serial"] = f"MOCK-{slugify(device_name).upper().replace('-', '')}" + @staticmethod + def _require_cumulus_eth0_mac(device: dict[str, Any], source: Path) -> None: + """Require explicit eth0 MACs for Cumulus mock devices.""" + platform_name = (device.get("platform") or {}).get("name", "") + if "Cumulus" not in platform_name: + return + eth0 = next( + ( + interface + for interface in device.get("interfaces", []) + if interface.get("name") == "eth0" + ), + None, + ) + if eth0 and eth0.get("mac_address"): + return + device_name = device.get("name") or source.name + raise ValueError( + f"Mock Cumulus device {device_name} in {source} must define " + "interfaces[].name=eth0 mac_address for DHCP/ZTP reservations" + ) + def _prune_dangling_connected_interfaces(self) -> None: """Remove interfaces connected to devices that are not in this mock sample.""" device_names = {device["name"] for device in self.json["devices"] if device.get("name")} @@ -273,6 +304,40 @@ def _load_prefixes(self) -> None: self.json["prefixes"] = prefix_list + def _assign_ip_address_parent_prefixes(self) -> None: + """Annotate interface IPs with their most specific containing prefix.""" + prefixes = [] + for prefix in self.json.get("prefixes", []): + try: + prefixes.append( + ( + ipaddress.ip_network(prefix["prefix"], strict=False), + prefix["prefix"], + ) + ) + except ValueError: + continue + + for device in self.json["devices"]: + for interface in device.get("interfaces", []): + for address in interface.get("ip_addresses", []): + ip_address = address.get("address") + if not ip_address: + continue + try: + host = ipaddress.ip_interface(ip_address).ip + except ValueError: + continue + + containing_prefixes = [ + (prefix.prefixlen, prefix_str) + for prefix, prefix_str in prefixes + if host in prefix + ] + if containing_prefixes: + containing_prefixes.sort(reverse=True) + address["parent_prefix"] = containing_prefixes[0][1] + def _load_vrfs(self) -> None: """Load VRF data referenced by devices and interfaces.""" vrfs = {} @@ -555,24 +620,28 @@ def _load_roles(self) -> None: Excludes roles that are managed by the bootstrap job to avoid conflicts. """ - # Get roles from devices and interfaces - roles = self._extract_content_type_data("role") - # Filter out bootstrap-managed roles - roles = [r for r in roles if r["name"] not in self.BOOTSTRAP_MANAGED_ROLES] - role_names = {r["name"] for r in roles} + role_content_types = { + role["name"]: set(role["content_types"]) + for role in self._extract_content_type_data("role") + } - # Add roles from aggregate prefixes (excluding bootstrap-managed ones) + # Add prefix role membership from aggregate prefixes. This can extend an + # existing role such as Loopback from dcim.interface to ipam.prefix. for agg in self.json.get("aggregate_prefixes", []): role_name = agg.get("role") - if ( - role_name - and role_name not in role_names - and role_name not in self.BOOTSTRAP_MANAGED_ROLES - ): - roles.append({"name": role_name, "content_types": ["ipam.prefix"]}) - role_names.add(role_name) + if role_name: + role_content_types.setdefault(role_name, set()).add("ipam.prefix") - self.json["roles"] = roles + self.json["roles"] = [ + {"name": name, "content_types": sorted(content_types)} + for name, content_types in sorted(role_content_types.items()) + if name not in self.BOOTSTRAP_MANAGED_ROLES + ] + self.json["role_content_type_extensions"] = [ + {"name": name, "content_types": sorted(content_types)} + for name, content_types in sorted(role_content_types.items()) + if name in self.BOOTSTRAP_MANAGED_ROLES + ] # Tags managed by bootstrap job - skip creating these in designs BOOTSTRAP_MANAGED_TAGS = { @@ -632,10 +701,37 @@ def _load_config_contexts(self) -> None: try: with open(locations_file) as f: data = yaml.safe_load(f) - self.json["config_contexts"] = data.get("config_contexts", []) + self.json["config_contexts"] = [ + self._render_config_context_metadata(config_context) + for config_context in data.get("config_contexts", []) + ] except (OSError, yaml.YAMLError) as e: print(f"Warning: Could not load config contexts from {locations_file}: {e}") + def _render_config_context_metadata(self, config_context: dict[str, Any]) -> dict[str, Any]: + """Render deployment-scoped config context lookups without touching data.""" + rendered = dict(config_context) + + for key in ("name", "description"): + if isinstance(rendered.get(key), str): + rendered[key] = self._render_deployment_name(rendered[key]) + + for key in ("locations", "roles", "tenants", "tags"): + if isinstance(rendered.get(key), list): + rendered[key] = [ + self._render_deployment_name(value) if isinstance(value, str) else value + for value in rendered[key] + ] + + return rendered + + def _render_deployment_name(self, value: str) -> str: + """Render the deployment name placeholder used by static context YAML.""" + deployment_name = getattr(self, "deployment_name", "") + return value.replace("{{ deployment_name }}", deployment_name).replace( + "{{deployment_name}}", deployment_name + ) + def get_device_ref(self, manufacturer: str, model: str) -> str: """Get the device reference for a given manufacturer and model.""" return slugify(f"{manufacturer}_{model}") diff --git a/development/mock_topology/context/air_superpod/README.md b/development/mock_topology/context/air_superpod/README.md new file mode 100644 index 0000000..3781b12 --- /dev/null +++ b/development/mock_topology/context/air_superpod/README.md @@ -0,0 +1,6 @@ +# AIR SuperPOD Mock Topology Context + +This context is the source of truth for the AIR SuperPOD demo topology and the +Nautobot mock data loaded by the Design Builder mock topology job. The AIR sim +generates its temporary AIR topology YAML from these device JSON files, so there +is no separate maintained topology export for the built-in SuperPOD demo. diff --git a/development/mock_topology/context/air_superpod/devices/oob-mgmt-server.json b/development/mock_topology/context/air_superpod/devices/oob-mgmt-server.json new file mode 100644 index 0000000..8ae6f7c --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/oob-mgmt-server.json @@ -0,0 +1,81 @@ +{ + "data": { + "device": { + "id": "air-superpod-oob-mgmt-server", + "name": "oob-mgmt-server", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-Server" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "exit" + }, + { + "name": "eth1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-mleaf01:swp1", + "role": { + "name": "OOB-Uplink" + }, + "mac_address": "44:38:39:01:00:01", + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 16, + "memory": 32768, + "storage": 100 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-cin-spine01.json b/development/mock_topology/context/air_superpod/devices/su00-cin-spine01.json new file mode 100644 index 0000000..35f837d --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-cin-spine01.json @@ -0,0 +1,158 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-cin-spine01", + "name": "su00-cin-spine01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "CIN-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:02", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65301 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.15/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.25/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:02", + "connected_interface": { + "name": "swp6", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp6", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-cin-leaf-r01", + "role": { + "name": "CIN-LEAF" + } + } + } + }, + { + "name": "swp2", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.5/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-cin-leaf-r02", + "role": { + "name": "CIN-LEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-cin-spine02.json b/development/mock_topology/context/air_superpod/devices/su00-cin-spine02.json new file mode 100644 index 0000000..bd3acea --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-cin-spine02.json @@ -0,0 +1,158 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-cin-spine02", + "name": "su00-cin-spine02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "CIN-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:03", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65302 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.16/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.149/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:03", + "connected_interface": { + "name": "swp6", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp6", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.3/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-cin-leaf-r01", + "role": { + "name": "CIN-LEAF" + } + } + } + }, + { + "name": "swp2", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.7/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-cin-leaf-r02", + "role": { + "name": "CIN-LEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-control01.json b/development/mock_topology/context/air_superpod/devices/su00-control01.json new file mode 100644 index 0000000..93473d6 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-control01.json @@ -0,0 +1,87 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-control01", + "name": "su00-control01", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "Control-Server" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "bmc0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-oob-mleaf01:swp10", + "mtu": 9216, + "role": { + "name": "Management" + }, + "connected_interface": { + "name": "swp10", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "ens1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-bleaf01:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-bleaf01", + "role": { + "name": "TAN-BLEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 2, + "memory": 4096 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-control02.json b/development/mock_topology/context/air_superpod/devices/su00-control02.json new file mode 100644 index 0000000..6e6796f --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-control02.json @@ -0,0 +1,87 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-control02", + "name": "su00-control02", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "Control-Server" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "bmc0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-oob-mleaf02:swp10", + "mtu": 9216, + "role": { + "name": "Management" + }, + "connected_interface": { + "name": "swp10", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "ens1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-bleaf02:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-bleaf02", + "role": { + "name": "TAN-BLEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 2, + "memory": 4096 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-oob-spine01.json b/development/mock_topology/context/air_superpod/devices/su00-oob-spine01.json new file mode 100644 index 0000000..31a6dc8 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-oob-spine01.json @@ -0,0 +1,198 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-oob-spine01", + "name": "su00-oob-spine01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:06", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65121 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.5/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:06" + }, + { + "name": "swp1", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-mleaf01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "swp2", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.5/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-mleaf02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "swp3", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.9/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-hleaf01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-oob-hleaf01", + "role": { + "name": "OOB-HLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.13/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-hleaf02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-oob-hleaf02", + "role": { + "name": "OOB-HLEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-oob-spine02.json b/development/mock_topology/context/air_superpod/devices/su00-oob-spine02.json new file mode 100644 index 0000000..7e5d0c9 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-oob-spine02.json @@ -0,0 +1,198 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-oob-spine02", + "name": "su00-oob-spine02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:07", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65122 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.6/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:07" + }, + { + "name": "swp1", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.3/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-mleaf01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "swp2", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.7/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-mleaf02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "swp3", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.11/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-hleaf01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-oob-hleaf01", + "role": { + "name": "OOB-HLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.15/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-oob-hleaf02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-oob-hleaf02", + "role": { + "name": "OOB-HLEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-tan-spine01.json b/development/mock_topology/context/air_superpod/devices/su00-tan-spine01.json new file mode 100644 index 0000000..2114c0d --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-tan-spine01.json @@ -0,0 +1,278 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-tan-spine01", + "name": "su00-tan-spine01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:08", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65201 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.7/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.17/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:08", + "connected_interface": { + "name": "swp2", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp2", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-hleaf01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-hleaf01", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp2", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.5/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-hleaf02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-hleaf02", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp3", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.9/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-sleaf01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-sleaf01", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.13/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-sleaf02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-sleaf02", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp5", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.17/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-bleaf01:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-bleaf01", + "role": { + "name": "TAN-BLEAF" + } + } + } + }, + { + "name": "swp6", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.21/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-bleaf02:swp49", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp49", + "device": { + "name": "su01-tan-bleaf02", + "role": { + "name": "TAN-BLEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su00-tan-spine02.json b/development/mock_topology/context/air_superpod/devices/su00-tan-spine02.json new file mode 100644 index 0000000..f2b34ee --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su00-tan-spine02.json @@ -0,0 +1,278 @@ +{ + "data": { + "device": { + "id": "air-superpod-su00-tan-spine02", + "name": "su00-tan-spine02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-SPINE" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:09", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65202 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.8/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.145/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:09", + "connected_interface": { + "name": "swp2", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp2", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.3/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-hleaf01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-hleaf01", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp2", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.7/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-hleaf02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-hleaf02", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp3", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.11/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-sleaf01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-sleaf01", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.15/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-sleaf02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-sleaf02", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp5", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.19/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-bleaf01:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-bleaf01", + "role": { + "name": "TAN-BLEAF" + } + } + } + }, + { + "name": "swp6", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.23/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-tan-bleaf02:swp50", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "swp50", + "device": { + "name": "su01-tan-bleaf02", + "role": { + "name": "TAN-BLEAF" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r01.json b/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r01.json new file mode 100644 index 0000000..94b2533 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r01.json @@ -0,0 +1,188 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-cin-leaf-r01", + "name": "su01-cin-leaf-r01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "CIN-LEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:0a", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65311 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.17/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.27/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:0a", + "connected_interface": { + "name": "swp7", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp7", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "172.16.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-compute-tray-r01:ens3", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens3", + "device": { + "name": "su01-compute-tray-r01", + "role": { + "name": "Compute-Tray" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-cin-spine01:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-cin-spine01", + "role": { + "name": "CIN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.2/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-cin-spine02:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-cin-spine02", + "role": { + "name": "CIN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r02.json b/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r02.json new file mode 100644 index 0000000..31d8cd9 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-cin-leaf-r02.json @@ -0,0 +1,188 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-cin-leaf-r02", + "name": "su01-cin-leaf-r02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "CIN-LEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:0b", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65312 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.18/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.150/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:0b", + "connected_interface": { + "name": "swp7", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp7", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "172.16.0.129/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-compute-tray-r02:ens3", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens3", + "device": { + "name": "su01-compute-tray-r02", + "role": { + "name": "Compute-Tray" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.4/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-cin-spine01:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-cin-spine01", + "role": { + "name": "CIN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.103.0.6/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-cin-spine02:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-cin-spine02", + "role": { + "name": "CIN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r01.json b/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r01.json new file mode 100644 index 0000000..97f7c37 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r01.json @@ -0,0 +1,117 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-compute-tray-r01", + "name": "su01-compute-tray-r01", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "Compute-Tray" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "bmc0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-oob-hleaf01:swp1", + "mtu": 9216, + "role": { + "name": "Management" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-oob-hleaf01", + "role": { + "name": "OOB-HLEAF" + } + } + } + }, + { + "name": "ens1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-hleaf01:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-hleaf01", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "ens3", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "172.16.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r01:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-cin-leaf-r01", + "role": { + "name": "CIN-LEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 2, + "memory": 4096 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r02.json b/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r02.json new file mode 100644 index 0000000..ec8ccc7 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-compute-tray-r02.json @@ -0,0 +1,117 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-compute-tray-r02", + "name": "su01-compute-tray-r02", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "Compute-Tray" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "bmc0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-oob-hleaf02:swp1", + "mtu": 9216, + "role": { + "name": "Management" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-oob-hleaf02", + "role": { + "name": "OOB-HLEAF" + } + } + } + }, + { + "name": "ens1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-hleaf02:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-hleaf02", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "ens3", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "172.16.0.128/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su01-cin-leaf-r02:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-cin-leaf-r02", + "role": { + "name": "CIN-LEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 2, + "memory": 4096 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf01.json b/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf01.json new file mode 100644 index 0000000..130a440 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf01.json @@ -0,0 +1,162 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-oob-hleaf01", + "name": "su01-oob-hleaf01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN2201" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:0e", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65111 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.3/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:0e" + }, + { + "name": "swp49", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.8/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine01:swp3", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp3", + "device": { + "name": "su00-oob-spine01", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.10/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine02:swp3", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp3", + "device": { + "name": "su00-oob-spine02", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-compute-tray-r01:bmc0", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "bmc0", + "device": { + "name": "su01-compute-tray-r01", + "role": { + "name": "Compute-Tray" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf02.json b/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf02.json new file mode 100644 index 0000000..0fde1ed --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-oob-hleaf02.json @@ -0,0 +1,162 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-oob-hleaf02", + "name": "su01-oob-hleaf02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN2201" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:0f", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65112 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.4/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:0f" + }, + { + "name": "swp49", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.12/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine01:swp4", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp4", + "device": { + "name": "su00-oob-spine01", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.14/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine02:swp4", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp4", + "device": { + "name": "su00-oob-spine02", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-compute-tray-r02:bmc0", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "bmc0", + "device": { + "name": "su01-compute-tray-r02", + "role": { + "name": "Compute-Tray" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf01.json b/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf01.json new file mode 100644 index 0000000..c2afa64 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf01.json @@ -0,0 +1,414 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-oob-mleaf01", + "name": "su01-oob-mleaf01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-MLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN2201" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:10", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65101 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.1/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:10" + }, + { + "name": "Vlan100", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.1/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "description": "OOB management gateway for devices cabled to su01-oob-mleaf01", + "mode": "access", + "role": { + "name": "Management" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + } + }, + { + "name": "swp1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "oob-mgmt-server:eth1", + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "eth1", + "device": { + "name": "oob-mgmt-server", + "role": { + "name": "OOB-Server" + } + } + } + }, + { + "name": "swp2", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-tan-spine01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp3", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-bleaf01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-bleaf01", + "role": { + "name": "TAN-BLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-hleaf01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-hleaf01", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp5", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-sleaf01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-sleaf01", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp6", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-cin-spine01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su00-cin-spine01", + "role": { + "name": "CIN-SPINE" + } + } + } + }, + { + "name": "swp7", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-cin-leaf-r01:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-cin-leaf-r01", + "role": { + "name": "CIN-LEAF" + } + } + } + }, + { + "name": "swp49", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine01:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-oob-spine01", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.2/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine02:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-oob-spine02", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp10", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-control01:bmc0", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "bmc0", + "device": { + "name": "su00-control01", + "role": { + "name": "Control-Server" + } + } + } + }, + { + "name": "swp11", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-storage01:bmc0", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "bmc0", + "device": { + "name": "su01-storage01", + "role": { + "name": "Storage-Server" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf02.json b/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf02.json new file mode 100644 index 0000000..e9e52db --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-oob-mleaf02.json @@ -0,0 +1,361 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-oob-mleaf02", + "name": "su01-oob-mleaf02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-MLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN2201" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:11", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65102 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.2/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "ip_addresses": [], + "mac_address": "44:38:39:00:00:11" + }, + { + "name": "Vlan100", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.129/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "description": "OOB management gateway for devices cabled to su01-oob-mleaf02", + "mode": "access", + "role": { + "name": "Management" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + } + }, + { + "name": "swp2", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-tan-spine02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp3", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-bleaf02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-bleaf02", + "role": { + "name": "TAN-BLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-hleaf02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-hleaf02", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp5", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-sleaf02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-tan-sleaf02", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "swp6", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-cin-spine02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su00-cin-spine02", + "role": { + "name": "CIN-SPINE" + } + } + } + }, + { + "name": "swp7", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-cin-leaf-r02:eth0", + "mode": "access", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "connected_interface": { + "name": "eth0", + "device": { + "name": "su01-cin-leaf-r02", + "role": { + "name": "CIN-LEAF" + } + } + } + }, + { + "name": "swp49", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.4/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine01:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-oob-spine01", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "100gbase-x-qsfp28", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.101.0.6/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-oob-spine02:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-oob-spine02", + "role": { + "name": "OOB-SPINE" + } + } + } + }, + { + "name": "swp10", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-control02:bmc0", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "bmc0", + "device": { + "name": "su00-control02", + "role": { + "name": "Control-Server" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-storage01.json b/development/mock_topology/context/air_superpod/devices/su01-storage01.json new file mode 100644 index 0000000..5f8236c --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-storage01.json @@ -0,0 +1,111 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-storage01", + "name": "su01-storage01", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "Storage-Server" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "auto", + "config_context": {}, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "bmc0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-oob-mleaf01:swp11", + "mtu": 9216, + "role": { + "name": "Management" + }, + "connected_interface": { + "name": "swp11", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + } + }, + { + "name": "ens1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-sleaf01:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-sleaf01", + "role": { + "name": "TAN-SLEAF" + } + } + } + }, + { + "name": "ens2", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-tan-sleaf02:swp1", + "mtu": 9216, + "role": { + "name": "Data" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su01-tan-sleaf02", + "role": { + "name": "TAN-SLEAF" + } + } + } + } + ], + "platform": { + "name": "Ubuntu" + }, + "_air": { + "os": "generic/ubuntu2404", + "cpu": 4, + "memory": 8192 + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf01.json b/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf01.json new file mode 100644 index 0000000..bc42db3 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf01.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-bleaf01", + "name": "su01-tan-bleaf01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-BLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:13", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65211 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.9/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.19/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:13", + "connected_interface": { + "name": "swp3", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp3", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-control01:ens1", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens1", + "device": { + "name": "su00-control01", + "role": { + "name": "Control-Server" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.16/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp5", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp5", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.18/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp5", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp5", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf02.json b/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf02.json new file mode 100644 index 0000000..2b736eb --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-bleaf02.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-bleaf02", + "name": "su01-tan-bleaf02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-BLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:14", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65212 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.10/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.146/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:14", + "connected_interface": { + "name": "swp3", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp3", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su00-control02:ens1", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens1", + "device": { + "name": "su00-control02", + "role": { + "name": "Control-Server" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.20/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp6", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp6", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.22/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp6", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp6", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf01.json b/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf01.json new file mode 100644 index 0000000..dc5e697 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf01.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-hleaf01", + "name": "su01-tan-hleaf01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:15", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65221 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.11/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.21/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:15", + "connected_interface": { + "name": "swp4", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp4", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-compute-tray-r01:ens1", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens1", + "device": { + "name": "su01-compute-tray-r01", + "role": { + "name": "Compute-Tray" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.2/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp1", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp1", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf02.json b/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf02.json new file mode 100644 index 0000000..8bf7a34 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-hleaf02.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-hleaf02", + "name": "su01-tan-hleaf02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:16", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65222 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.12/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.147/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:16", + "connected_interface": { + "name": "swp4", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp4", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-compute-tray-r02:ens1", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens1", + "device": { + "name": "su01-compute-tray-r02", + "role": { + "name": "Compute-Tray" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.4/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.6/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp2", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp2", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf01.json b/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf01.json new file mode 100644 index 0000000..73388a4 --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf01.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-sleaf01", + "name": "su01-tan-sleaf01", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-SLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:17", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65231 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.13/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.23/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:17", + "connected_interface": { + "name": "swp5", + "device": { + "name": "su01-oob-mleaf01", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf01:swp5", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-storage01:ens1", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens1", + "device": { + "name": "su01-storage01", + "role": { + "name": "Storage-Server" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.8/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp3", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp3", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.10/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp3", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp3", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf02.json b/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf02.json new file mode 100644 index 0000000..04637be --- /dev/null +++ b/development/mock_topology/context/air_superpod/devices/su01-tan-sleaf02.json @@ -0,0 +1,182 @@ +{ + "data": { + "device": { + "id": "air-superpod-su01-tan-sleaf02", + "name": "su01-tan-sleaf02", + "tags": [ + { + "name": "demo" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-SLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:18", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65232 + } + }, + "location": { + "name": "SPO01" + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.255.0.14/32", + "mask_length": 32, + "ip_version": 4 + } + ], + "role": { + "name": "Loopback" + } + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.100.1.148/25", + "mask_length": 25, + "ip_version": 4 + } + ], + "mac_address": "44:38:39:00:00:18", + "connected_interface": { + "name": "swp5", + "device": { + "name": "su01-oob-mleaf02", + "role": { + "name": "OOB-MLEAF" + } + } + }, + "description": "su01-oob-mleaf02:swp5", + "mtu": 9216, + "role": { + "name": "Management" + } + }, + { + "name": "swp1", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [], + "description": "su01-storage01:ens2", + "mtu": 9216, + "role": { + "name": "Downlink" + }, + "connected_interface": { + "name": "ens2", + "device": { + "name": "su01-storage01", + "role": { + "name": "Storage-Server" + } + } + } + }, + { + "name": "swp49", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.12/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine01:swp4", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp4", + "device": { + "name": "su00-tan-spine01", + "role": { + "name": "TAN-SPINE" + } + } + } + }, + { + "name": "swp50", + "type": "400gbase-x-qsfpdd", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "ip_addresses": [ + { + "address": "10.102.0.14/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "description": "su00-tan-spine02:swp4", + "mtu": 9216, + "role": { + "name": "Uplink" + }, + "connected_interface": { + "name": "swp4", + "device": { + "name": "su00-tan-spine02", + "role": { + "name": "TAN-SPINE" + } + } + } + } + ], + "platform": { + "name": "Cumulus Linux" + } + } + } +} diff --git a/development/mock_topology/context/air_superpod/locations.yaml b/development/mock_topology/context/air_superpod/locations.yaml new file mode 100644 index 0000000..d941699 --- /dev/null +++ b/development/mock_topology/context/air_superpod/locations.yaml @@ -0,0 +1,157 @@ +global_defaults: + tenant: Public Demo + namespace: SuperPOD Demo + status: Active + cable_status: Connected + prefix_status: Active + device_status: Active + ip_address_status: Active +tenants: +- name: Public Demo +namespaces: +- SuperPOD Demo +locations: +- name: Public Demo + location_type: Region +- name: SuperPOD Demo + location_type: Region + parent: Public Demo +- name: SPO01 + location_type: Site + parent: SuperPOD Demo + ref: location +config_contexts: +- name: superpod-demo-dhcp-option-def + weight: 1000 + is_active: true + data: + Dhcp4: + option-def: + - code: 239 + name: cumulus-provision-url + type: string + array: false + space: dhcp4 + encapsulate: '' + record-types: '' + locations: + - SPO01 + tags: + - no-propagate +- name: superpod-demo-oob-dhcp-options + weight: 1000 + is_active: true + data: + dhcp: + options: + interface_roles: + Downlink: + subnet_config: + reservations-global: false + reservations-in-subnet: false + subnet_options: + boot-file-name: http://{{ztp_server}}/v1/device/{{device_id}}/onie + broadcast-address: 255.255.255.255 + cumulus-provision-url: http://{{ztp_server}}/v1/device/{{device_id}}/boot-script + Uplink: + subnet_config: + reservations-global: false + reservations-in-subnet: false + subnet_options: + boot-file-name: http://{{ztp_server}}/v1/device/{{device_id}}/onie + broadcast-address: 255.255.255.255 + cumulus-provision-url: http://{{ztp_server}}/v1/device/{{device_id}}/boot-script + roles: + - OOB-HLEAF + - OOB-MLEAF + - OOB-SPINE +- name: superpod-demo-dhcp-options + weight: 1000 + is_active: true + data: + dhcp: + options: + interface_names: + eth0: + subnet_config: + reservations-global: true + reservations-in-subnet: false + reservation_options: + boot-file-name: http://{{ztp_server}}/v1/device/{{device_id}}/onie + cumulus-provision-url: http://{{ztp_server}}/v1/device/{{device_id}}/boot-script + roles: + - TAN-BLEAF + - TAN-HLEAF + - TAN-SLEAF + - TAN-SPINE + - CIN-LEAF + - CIN-SPINE +- name: superpod-demo-firmware-targets + weight: 1000 + is_active: true + data: + oob-hleaf: + Cumulus Linux: 5.16.1 + oob-mleaf: + Cumulus Linux: 5.16.1 + tan-bleaf: + Cumulus Linux: 5.16.1 + tan-hleaf: + Cumulus Linux: 5.16.1 + tan-sleaf: + Cumulus Linux: 5.16.1 + oob-spine: + Cumulus Linux: 5.16.1 + tan-spine: + Cumulus Linux: 5.16.1 + cin-leaf: + Cumulus Linux: 5.16.1 + cin-spine: + Cumulus Linux: 5.16.1 + locations: + - SPO01 + tenants: + - Public Demo + tags: + - no-propagate +- name: superpod-demo-network-management-services + weight: 1000 + is_active: true + data: + ztp: + ipv4: + - 172.18.255.201 + dhcp: + nvcm: + ipv4: + - 172.18.255.202 + management_prefixes: + ipv4: + - 172.18.0.0/16 + provisioning_servers: + ipv4: + - 10.100.0.0 + description: Network management services for the public SuperPOD demo + locations: + - SPO01 + tenants: + - Public Demo + tags: + - no-propagate +- name: Intended Password Mappings + weight: 1000 + is_active: true + data: + password_mappings: + cumulus: + role: system-admin + password: root_password + rotation: r1 + nvConfigManager: + role: system-admin + password: api_user_key + rotation: r1 + locations: + - SPO01 + tenants: + - Public Demo diff --git a/development/mock_topology/context/air_superpod/prefixes.yaml b/development/mock_topology/context/air_superpod/prefixes.yaml new file mode 100644 index 0000000..dd42961 --- /dev/null +++ b/development/mock_topology/context/air_superpod/prefixes.yaml @@ -0,0 +1,65 @@ +aggregate_prefixes: +- prefix: 172.18.255.0/24 + role: Service-LB + tags: [] +- prefix: 172.18.0.0/16 + role: Service-LB-Source + tags: + - lb-allowed +- prefix: 10.0.0.0/8 + role: Service-LB-Source + tags: + - lb-allowed +- prefix: 10.100.0.0/16 + role: OOB-Aggregate + tags: + - relay-return + - role-aggregate +- prefix: 10.100.0.0/31 + role: OOB-Server-p2p + tags: + - dhcp-subnet +- prefix: 10.100.1.0/24 + role: OOB-Management-Aggregate + tags: + - role-aggregate +- prefix: 10.100.1.0/25 + role: OOB-Management + tags: + - dhcp-subnet +- prefix: 10.100.1.128/25 + role: OOB-Management + tags: + - dhcp-subnet +- prefix: 10.101.0.0/24 + role: OOB-p2p + tags: + - role-aggregate +- prefix: 10.102.0.0/24 + role: TAN-p2p + tags: + - role-aggregate +- prefix: 10.103.0.0/24 + role: CIN-p2p + tags: + - role-aggregate +- prefix: 172.16.0.0/24 + role: CIN-RDMA + tags: + - role-aggregate +- prefix: 172.16.0.0/25 + role: CIN-RDMA + tags: [] +- prefix: 172.16.0.0/31 + role: CIN-RDMA + tags: [] +- prefix: 172.16.0.128/25 + role: CIN-RDMA + tags: [] +- prefix: 172.16.0.128/31 + role: CIN-RDMA + tags: [] +- prefix: 10.255.0.0/24 + role: Loopback + tags: + - role-aggregate diff --git a/development/mock_topology/context/air_trial/README.md b/development/mock_topology/context/air_trial/README.md new file mode 100644 index 0000000..b311771 --- /dev/null +++ b/development/mock_topology/context/air_trial/README.md @@ -0,0 +1,16 @@ +# AIR Trial Mock Topology Context + +This context is the source of truth for the AIR free trial demo topology and the +Nautobot mock data loaded by the Design Builder mock topology job. The AIR sim +generates its temporary AIR topology YAML from these device JSON files, so there +is no separate maintained topology export for this built-in demo. + +Coverage: + +- one `OOB-MLEAF` switch, +- five `TAN-HLEAF` switches for multi-deploy testing, +- one `oob-mgmt-server`, +- OOB-MLEAF DHCP/ZTP over FPP to the server, +- TAN leaf `eth0` ports on VLAN 100 behind the OOB-MLEAF, +- Config Manager service load balancers on `172.18.255.201` and `172.18.255.202`, +- load balancer source ranges for `172.18.0.0/16` and `10.0.0.0/8`. diff --git a/development/mock_topology/context/air_trial/devices/oob-mgmt-server.json b/development/mock_topology/context/air_trial/devices/oob-mgmt-server.json new file mode 100644 index 0000000..6f4e033 --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/oob-mgmt-server.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-oob-mgmt-server", + "name": "oob-mgmt-server", + "tags": [], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-Server" + }, + "device_type": { + "manufacturer": { + "name": "Generic" + }, + "model": "Generic Server" + }, + "status": { + "name": "Active" + }, + "serial": "44:38:39:00:00:01", + "config_context": { + "bgp": { + "asn": 65000 + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "description": "exit", + "ip_addresses": [] + }, + { + "name": "eth1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "description": "oob-mleaf-01:swp1", + "mac_address": "44:38:39:01:00:01", + "role": { + "name": "OOB-Uplink" + }, + "ip_addresses": [ + { + "address": "10.120.0.0/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp1", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/oob-mleaf-01.json b/development/mock_topology/context/air_trial/devices/oob-mleaf-01.json new file mode 100644 index 0000000..5e75114 --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/oob-mleaf-01.json @@ -0,0 +1,275 @@ +{ + "data": { + "device": { + "id": "air-trial-oob-mleaf-01", + "name": "oob-mleaf-01", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "OOB-MLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN2201" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:02", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + }, + "bgp": { + "asn": 65101 + } + }, + "interfaces": [ + { + "name": "lo", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "role": { + "name": "Loopback" + }, + "ip_addresses": [ + { + "address": "10.255.20.1/32", + "mask_length": 32, + "ip_version": 4 + } + ] + }, + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:02", + "ip_addresses": [] + }, + { + "name": "Vlan100", + "type": "virtual", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "description": "Trial OOB management gateway", + "role": { + "name": "Management" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [ + { + "address": "10.120.1.1/24", + "mask_length": 24, + "ip_version": 4 + } + ] + }, + { + "name": "swp1", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "oob-mgmt-server:eth1", + "role": { + "name": "Uplink" + }, + "ip_addresses": [ + { + "address": "10.120.0.1/31", + "mask_length": 31, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "eth1", + "device": { + "name": "oob-mgmt-server", + "role": { + "name": "OOB-Server" + } + } + } + }, + { + "name": "swp2", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "tan-leaf-01:eth0", + "role": { + "name": "Downlink" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [], + "connected_interface": { + "name": "eth0", + "device": { + "name": "tan-leaf-01", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp3", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "tan-leaf-02:eth0", + "role": { + "name": "Downlink" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [], + "connected_interface": { + "name": "eth0", + "device": { + "name": "tan-leaf-02", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp4", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "tan-leaf-03:eth0", + "role": { + "name": "Downlink" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [], + "connected_interface": { + "name": "eth0", + "device": { + "name": "tan-leaf-03", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp5", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "tan-leaf-04:eth0", + "role": { + "name": "Downlink" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [], + "connected_interface": { + "name": "eth0", + "device": { + "name": "tan-leaf-04", + "role": { + "name": "TAN-HLEAF" + } + } + } + }, + { + "name": "swp6", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": false, + "mtu": 9216, + "description": "tan-leaf-05:eth0", + "role": { + "name": "Downlink" + }, + "mode": "access", + "untagged_vlan": { + "vid": 100, + "name": "OOB-MGMT" + }, + "ip_addresses": [], + "connected_interface": { + "name": "eth0", + "device": { + "name": "tan-leaf-05", + "role": { + "name": "TAN-HLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/tan-leaf-01.json b/development/mock_topology/context/air_trial/devices/tan-leaf-01.json new file mode 100644 index 0000000..8442d50 --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/tan-leaf-01.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-tan-leaf-01", + "name": "tan-leaf-01", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:03", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:03", + "mtu": 9216, + "description": "oob-mleaf-01:swp2", + "role": { + "name": "Management" + }, + "ip_addresses": [ + { + "address": "10.120.1.11/24", + "mask_length": 24, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp2", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/tan-leaf-02.json b/development/mock_topology/context/air_trial/devices/tan-leaf-02.json new file mode 100644 index 0000000..55fcf38 --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/tan-leaf-02.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-tan-leaf-02", + "name": "tan-leaf-02", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:04", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:04", + "mtu": 9216, + "description": "oob-mleaf-01:swp3", + "role": { + "name": "Management" + }, + "ip_addresses": [ + { + "address": "10.120.1.12/24", + "mask_length": 24, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp3", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/tan-leaf-03.json b/development/mock_topology/context/air_trial/devices/tan-leaf-03.json new file mode 100644 index 0000000..f520f9d --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/tan-leaf-03.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-tan-leaf-03", + "name": "tan-leaf-03", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:05", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:05", + "mtu": 9216, + "description": "oob-mleaf-01:swp4", + "role": { + "name": "Management" + }, + "ip_addresses": [ + { + "address": "10.120.1.13/24", + "mask_length": 24, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp4", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/tan-leaf-04.json b/development/mock_topology/context/air_trial/devices/tan-leaf-04.json new file mode 100644 index 0000000..ce84424 --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/tan-leaf-04.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-tan-leaf-04", + "name": "tan-leaf-04", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:06", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:06", + "mtu": 9216, + "description": "oob-mleaf-01:swp5", + "role": { + "name": "Management" + }, + "ip_addresses": [ + { + "address": "10.120.1.14/24", + "mask_length": 24, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp5", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/devices/tan-leaf-05.json b/development/mock_topology/context/air_trial/devices/tan-leaf-05.json new file mode 100644 index 0000000..b670f4e --- /dev/null +++ b/development/mock_topology/context/air_trial/devices/tan-leaf-05.json @@ -0,0 +1,73 @@ +{ + "data": { + "device": { + "id": "air-trial-tan-leaf-05", + "name": "tan-leaf-05", + "tags": [ + { + "name": "air-trial" + }, + { + "name": "ztp" + } + ], + "tenant": { + "name": "Public Demo" + }, + "role": { + "name": "TAN-HLEAF" + }, + "device_type": { + "manufacturer": { + "name": "NVIDIA" + }, + "model": "SN5600" + }, + "platform": { + "name": "Cumulus Linux" + }, + "status": { + "name": "Provisioning" + }, + "serial": "44:38:39:00:00:07", + "config_context": { + "intended-firmware": { + "version": "5.16.1" + } + }, + "interfaces": [ + { + "name": "eth0", + "type": "1000base-t", + "status": { + "name": "Active" + }, + "enabled": true, + "mgmt_only": true, + "mac_address": "44:38:39:00:00:07", + "mtu": 9216, + "description": "oob-mleaf-01:swp6", + "role": { + "name": "Management" + }, + "ip_addresses": [ + { + "address": "10.120.1.15/24", + "mask_length": 24, + "ip_version": 4 + } + ], + "connected_interface": { + "name": "swp6", + "device": { + "name": "oob-mleaf-01", + "role": { + "name": "OOB-MLEAF" + } + } + } + } + ] + } + } +} diff --git a/development/mock_topology/context/air_trial/locations.yaml b/development/mock_topology/context/air_trial/locations.yaml new file mode 100644 index 0000000..4bbe102 --- /dev/null +++ b/development/mock_topology/context/air_trial/locations.yaml @@ -0,0 +1,134 @@ +--- + +global_defaults: + tenant: Public Demo + namespace: AIR Trial Demo + status: Active + cable_status: Connected + prefix_status: Active + device_status: Active + ip_address_status: Active + +tenants: + - name: Public Demo + +namespaces: + - AIR Trial Demo + +locations: + - name: Public Demo + location_type: Provider + + - name: AIR Trial Demo + location_type: Region + parent: Public Demo + + - name: "TRIAL01 - {{ deployment_name }}" + location_type: Site + parent: AIR Trial Demo + ref: location + +config_contexts: + - name: "air-trial-dhcp-option-def - {{ deployment_name }}" + description: DHCP option definitions for the AIR trial demo + weight: 1000 + is_active: true + locations: + - "TRIAL01 - {{ deployment_name }}" + data: + Dhcp4: + option-def: + - code: 239 + name: cumulus-provision-url + type: string + array: false + space: dhcp4 + encapsulate: "" + record-types: "" + + - name: "air-trial-oob-dhcp-options - {{ deployment_name }}" + description: DHCP/ZTP options for OOB switches DHCPing over FPP + weight: 1000 + is_active: true + roles: + - OOB-MLEAF + data: + dhcp: + options: + interface_roles: + Uplink: + subnet_config: + reservations-global: false + reservations-in-subnet: false + subnet_options: + boot-file-name: http://{{ztp_server}}/v1/device/{{device_id}}/onie + broadcast-address: 255.255.255.255 + cumulus-provision-url: http://{{ztp_server}}/v1/device/{{device_id}}/boot-script + + - name: "air-trial-tan-dhcp-options - {{ deployment_name }}" + description: DHCP/ZTP reservation options for TAN leaf eth0 interfaces + weight: 1000 + is_active: true + roles: + - TAN-HLEAF + data: + dhcp: + options: + interface_names: + eth0: + subnet_config: + reservations-global: true + reservations-in-subnet: false + reservation_options: + boot-file-name: http://{{ztp_server}}/v1/device/{{device_id}}/onie + cumulus-provision-url: http://{{ztp_server}}/v1/device/{{device_id}}/boot-script + + - name: "air-trial-firmware-targets - {{ deployment_name }}" + description: Intended Cumulus Linux versions for the AIR trial demo + weight: 1000 + is_active: true + locations: + - "TRIAL01 - {{ deployment_name }}" + data: + oob-mleaf: + Cumulus Linux: 5.16.1 + tan-hleaf: + Cumulus Linux: 5.16.1 + + - name: "air-trial-network-management-services - {{ deployment_name }}" + description: Network management services for the AIR trial demo + weight: 1000 + is_active: true + locations: + - "TRIAL01 - {{ deployment_name }}" + data: + ztp: + ipv4: + - 172.18.255.201 + dhcp: + nvcm: + ipv4: + - 172.18.255.202 + management_prefixes: + ipv4: + - 172.18.0.0/16 + provisioning_servers: + ipv4: + - 10.120.0.0 + + - name: "air-trial-password-mappings - {{ deployment_name }}" + description: Intended password mappings for the AIR trial demo + weight: 1000 + is_active: true + locations: + - "TRIAL01 - {{ deployment_name }}" + data: + password_mappings: + cumulus: + role: system-admin + password: root_password + rotation: r1 + nvConfigManager: + role: system-admin + password: api_user_key + rotation: r1 diff --git a/development/mock_topology/context/air_trial/prefixes.yaml b/development/mock_topology/context/air_trial/prefixes.yaml new file mode 100644 index 0000000..697ce32 --- /dev/null +++ b/development/mock_topology/context/air_trial/prefixes.yaml @@ -0,0 +1,31 @@ +--- +# Aggregate prefixes for the public AIR free trial mock topology. + +aggregate_prefixes: + - prefix: 172.18.255.0/24 + role: Service-LB + tags: [role-aggregate] + + - prefix: 172.18.0.0/16 + role: Service-LB-Source + tags: [lb-allowed] + + - prefix: 10.0.0.0/8 + role: Service-LB-Source + tags: [lb-allowed] + + - prefix: 10.120.0.0/16 + role: OOB-Aggregate + tags: [relay-return, role-aggregate] + + - prefix: 10.120.0.0/31 + role: OOB-Server-p2p + tags: [dhcp-subnet] + + - prefix: 10.120.1.0/24 + role: OOB-Management + tags: [dhcp-subnet] + + - prefix: 10.255.20.0/24 + role: Loopback + tags: [role-aggregate] diff --git a/development/mock_topology/context/dgx_cloud/devices/core1-cg1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core1-cg1-cp1-tan1-dc01.json index 2ba02bc..7599d27 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core1-cg1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core1-cg1-cp1-tan1-dc01.json @@ -244,6 +244,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f2", "description": "leaf1-cp1-smn1-dc01:swp13", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/core1-cg2-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core1-cg2-cp1-tan1-dc01.json index 7e156cf..82e8eed 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core1-cg2-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core1-cg2-cp1-tan1-dc01.json @@ -244,6 +244,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f3", "description": "leaf2-cp1-smn1-dc01:swp13", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/core1-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core1-cp1-smn1-dc01.json index 1700a6d..a86b522 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core1-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core1-cp1-smn1-dc01.json @@ -4659,6 +4659,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:03:ec", "description": "ztpleaf1-cp1-smn1-dc01:ethernet1", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/core2-cg1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core2-cg1-cp1-tan1-dc01.json index 5c39455..3cbeca7 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core2-cg1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core2-cg1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f1", "description": "leaf1-cp1-smn1-dc01:swp14", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/core2-cg2-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core2-cg2-cp1-tan1-dc01.json index 56ca44c..01f5d65 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core2-cg2-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core2-cg2-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f0", "description": "leaf2-cp1-smn1-dc01:swp14", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/core2-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/core2-cp1-smn1-dc01.json index 22977fe..e2b5a07 100644 --- a/development/mock_topology/context/dgx_cloud/devices/core2-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/core2-cp1-smn1-dc01.json @@ -4659,6 +4659,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:03:eb", "description": "ztpleaf2-cp1-smn1-dc01:ethernet1", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf1-cno1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf1-cno1-cp1-tan1-dc01.json index 5feed7a..c5a7fed 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf1-cno1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf1-cno1-cp1-tan1-dc01.json @@ -145,6 +145,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:ef", "description": "leaf8-cp1-smn1-dc01:swp1", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf1-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf1-cp1-smn1-dc01.json index 84fd4ed..c787343 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf1-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf1-cp1-smn1-dc01.json @@ -252,6 +252,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:03:fe", "description": "ztpleaf1-cp1-smn1-dc01:ethernet3", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf1-hss1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf1-hss1-cp1-tan1-dc01.json index 17754d4..8b99c03 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf1-hss1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf1-hss1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:fd", "description": "leaf1-cp1-smn1-dc01:swp19", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf2-cno1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf2-cno1-cp1-tan1-dc01.json index ecd1669..2b8aa27 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf2-cno1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf2-cno1-cp1-tan1-dc01.json @@ -143,6 +143,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:fc", "description": "leaf8-cp1-smn1-dc01:swp2", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf2-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf2-cp1-smn1-dc01.json index 138c14c..c2b6ec6 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf2-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf2-cp1-smn1-dc01.json @@ -264,6 +264,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:04:00", "description": "ztpleaf2-cp1-smn1-dc01:ethernet3", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/leaf2-hss1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/leaf2-hss1-cp1-tan1-dc01.json index 1e744e4..caa301e 100644 --- a/development/mock_topology/context/dgx_cloud/devices/leaf2-hss1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/leaf2-hss1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:e9", "description": "leaf2-cp1-smn1-dc01:swp19", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/spine1-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/spine1-cp1-smn1-dc01.json index 803f624..8b5b945 100644 --- a/development/mock_topology/context/dgx_cloud/devices/spine1-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/spine1-cp1-smn1-dc01.json @@ -77,6 +77,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:03:ee", "description": "ztpleaf1-cp1-smn1-dc01:ethernet2", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/spine2-cp1-smn1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/spine2-cp1-smn1-dc01.json index da66ec0..59061db 100644 --- a/development/mock_topology/context/dgx_cloud/devices/spine2-cp1-smn1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/spine2-cp1-smn1-dc01.json @@ -77,6 +77,7 @@ } ], "name": "eth0", + "mac_address": "44:38:39:10:03:ea", "description": "ztpleaf2-cp1-smn1-dc01:ethernet2", "vrf": null, "mgmt_only": false, diff --git a/development/mock_topology/context/dgx_cloud/devices/spine3-cno1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/spine3-cno1-cp1-tan1-dc01.json index bdbb1ee..ef3454b 100644 --- a/development/mock_topology/context/dgx_cloud/devices/spine3-cno1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/spine3-cno1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f6", "description": "leaf1-cp1-smn1-dc01:swp16", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/spine4-cno1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/spine4-cno1-cp1-tan1-dc01.json index 6db89e2..86d7bef 100644 --- a/development/mock_topology/context/dgx_cloud/devices/spine4-cno1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/spine4-cno1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:ff", "description": "leaf2-cp1-smn1-dc01:swp16", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/dgx_cloud/devices/spine4-hss1-cp1-tan1-dc01.json b/development/mock_topology/context/dgx_cloud/devices/spine4-hss1-cp1-tan1-dc01.json index 2e4b512..62f0435 100644 --- a/development/mock_topology/context/dgx_cloud/devices/spine4-hss1-cp1-tan1-dc01.json +++ b/development/mock_topology/context/dgx_cloud/devices/spine4-hss1-cp1-tan1-dc01.json @@ -73,6 +73,7 @@ "enabled": true, "tags": [], "name": "eth0", + "mac_address": "44:38:39:10:03:f4", "description": "leaf2-cp1-smn1-dc01:swp18", "vrf": null, "mgmt_only": true, diff --git a/development/mock_topology/context/superpod/devices/a04-u44-p01-tor-01.json b/development/mock_topology/context/superpod/devices/a04-u44-p01-tor-01.json index d76c8a4..19ef55a 100644 --- a/development/mock_topology/context/superpod/devices/a04-u44-p01-tor-01.json +++ b/development/mock_topology/context/superpod/devices/a04-u44-p01-tor-01.json @@ -28,7 +28,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "44:38:39:20:00:01", "config_context": { "intended-firmware": { "version": "5.13.1" @@ -267,7 +267,7 @@ }, "type": "A_1000BASE_T", "mtu": null, - "mac_address": null, + "mac_address": "44:38:39:20:00:01", "enabled": true, "tags": [], "name": "eth0", diff --git a/development/mock_topology/context/superpod/devices/a08-u28-p01-oobspine-01.json b/development/mock_topology/context/superpod/devices/a08-u28-p01-oobspine-01.json index 0f5da4a..20ddbb9 100644 --- a/development/mock_topology/context/superpod/devices/a08-u28-p01-oobspine-01.json +++ b/development/mock_topology/context/superpod/devices/a08-u28-p01-oobspine-01.json @@ -35,7 +35,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "54:9B:24:41:33:12", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a08-u32-p01-cleaf-01.json b/development/mock_topology/context/superpod/devices/a08-u32-p01-cleaf-01.json index edd999a..f7b136f 100644 --- a/development/mock_topology/context/superpod/devices/a08-u32-p01-cleaf-01.json +++ b/development/mock_topology/context/superpod/devices/a08-u32-p01-cleaf-01.json @@ -28,7 +28,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "7C:8C:09:B9:F8:8E", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a08-u44-p01-mleaf-01.json b/development/mock_topology/context/superpod/devices/a08-u44-p01-mleaf-01.json index 3f33c71..5ed78d3 100644 --- a/development/mock_topology/context/superpod/devices/a08-u44-p01-mleaf-01.json +++ b/development/mock_topology/context/superpod/devices/a08-u44-p01-mleaf-01.json @@ -28,7 +28,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "2C:5E:AB:12:5A:38", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a09-u28-p01-bleaf-01.json b/development/mock_topology/context/superpod/devices/a09-u28-p01-bleaf-01.json index efe6265..07d738d 100644 --- a/development/mock_topology/context/superpod/devices/a09-u28-p01-bleaf-01.json +++ b/development/mock_topology/context/superpod/devices/a09-u28-p01-bleaf-01.json @@ -42,7 +42,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "E8:9E:49:CF:4E:90", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a09-u32-p01-sleaf-01.json b/development/mock_topology/context/superpod/devices/a09-u32-p01-sleaf-01.json index 4e2b9ae..757f11f 100644 --- a/development/mock_topology/context/superpod/devices/a09-u32-p01-sleaf-01.json +++ b/development/mock_topology/context/superpod/devices/a09-u32-p01-sleaf-01.json @@ -28,7 +28,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "7C:8C:09:B9:F8:9E", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a09-u36-p01-spine-01.json b/development/mock_topology/context/superpod/devices/a09-u36-p01-spine-01.json index f590de1..9390005 100644 --- a/development/mock_topology/context/superpod/devices/a09-u36-p01-spine-01.json +++ b/development/mock_topology/context/superpod/devices/a09-u36-p01-spine-01.json @@ -20,7 +20,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "7C:8C:09:B9:F8:A6", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/context/superpod/devices/a09-u44-p01-pleaf-01.json b/development/mock_topology/context/superpod/devices/a09-u44-p01-pleaf-01.json index 585d48c..132aa74 100644 --- a/development/mock_topology/context/superpod/devices/a09-u44-p01-pleaf-01.json +++ b/development/mock_topology/context/superpod/devices/a09-u44-p01-pleaf-01.json @@ -28,7 +28,7 @@ "platform": { "name": "Cumulus Linux" }, - "serial": "", + "serial": "2C:5E:AB:12:5A:68", "config_context": { "intended-firmware": { "version": "5.13.1" diff --git a/development/mock_topology/jobs/designs/devices.yaml.j2 b/development/mock_topology/jobs/designs/devices.yaml.j2 index d7f2f15..22d1e72 100644 --- a/development/mock_topology/jobs/designs/devices.yaml.j2 +++ b/development/mock_topology/jobs/designs/devices.yaml.j2 @@ -17,7 +17,7 @@ devices: role__name: {{ device.role.name }} {% endif %} {% if device.serial is defined and device.serial %} - serial: {{ device.serial }} + serial: "{{ device.serial }}" {% endif %} {% if device.config_context is defined and device.config_context %} local_config_context_data: {{ device.config_context | tojson }} diff --git a/development/mock_topology/jobs/designs/interfaces.yaml.j2 b/development/mock_topology/jobs/designs/interfaces.yaml.j2 index 5ebc004..3572a9b 100644 --- a/development/mock_topology/jobs/designs/interfaces.yaml.j2 +++ b/development/mock_topology/jobs/designs/interfaces.yaml.j2 @@ -20,7 +20,7 @@ interfaces: description: {{ intf.description }} {% endif %} {% if intf.mac_address is defined and intf.mac_address %} - mac_address: {{ intf.mac_address }} + mac_address: "{{ intf.mac_address }}" {% endif %} {% if intf.mode is defined and intf.mode %} mode: {{ intf.mode }} diff --git a/development/mock_topology/jobs/designs/ip_addresses.yaml.j2 b/development/mock_topology/jobs/designs/ip_addresses.yaml.j2 index c4613c9..71fc36b 100644 --- a/development/mock_topology/jobs/designs/ip_addresses.yaml.j2 +++ b/development/mock_topology/jobs/designs/ip_addresses.yaml.j2 @@ -7,6 +7,10 @@ ip_addresses: {% for ip in intf.ip_addresses %} {% if ip.address %} - "!create_or_update:address": {{ ip.address }} + {% if ip.parent_prefix is defined and ip.parent_prefix %} + "!create_or_update:parent__prefix": {{ ip.parent_prefix }} + "!create_or_update:parent__namespace__name": {{ global_defaults.namespace }} + {% endif %} status__name: {{ global_defaults.ip_address_status }} {% set is_mgmt = intf.name == 'eth0' or intf.name.startswith('Management') or intf.mgmt_only %} {% set is_uplink = intf.role is defined and intf.role and intf.role.name == 'Uplink' %} diff --git a/development/mock_topology/jobs/designs/managed_devices.yaml.j2 b/development/mock_topology/jobs/designs/managed_devices.yaml.j2 index 108140c..6fffea3 100644 --- a/development/mock_topology/jobs/designs/managed_devices.yaml.j2 +++ b/development/mock_topology/jobs/designs/managed_devices.yaml.j2 @@ -3,7 +3,7 @@ {# Only add network devices (Cumulus Linux) to NVIDIA Config Manager, not servers #} config_manager_devices: {% for device in json.devices %} - {% if device.platform and device.platform.name == "Cumulus Linux" %} + {% if device.platform is defined and device.platform and device.platform.name == "Cumulus Linux" %} - "!create_or_update:device": "!get:name": "{{ device.name }}" "!get:location": "!ref:location" diff --git a/development/mock_topology/jobs/designs/roles.yaml.j2 b/development/mock_topology/jobs/designs/roles.yaml.j2 index 4cf57e1..75f8e58 100644 --- a/development/mock_topology/jobs/designs/roles.yaml.j2 +++ b/development/mock_topology/jobs/designs/roles.yaml.j2 @@ -4,13 +4,8 @@ roles: {% for role in json.roles %} - "!create_or_update:name": {{ role.name }} - content_types: - {% for content_type in role.content_types %} - - "!get:app_label": {{ content_type.split(".")[0] }} - "!get:model": {{ content_type.split(".")[1] }} - {% endfor %} color: {{ role.color | default(color("blue")) }} {% endfor %} {% else %} roles: [] -{% endif %} \ No newline at end of file +{% endif %} diff --git a/development/mock_topology/jobs/mock_topology_design.py b/development/mock_topology/jobs/mock_topology_design.py index 7643d5e..1b9c90a 100644 --- a/development/mock_topology/jobs/mock_topology_design.py +++ b/development/mock_topology/jobs/mock_topology_design.py @@ -18,9 +18,13 @@ the Design Builder pattern compatible with Nautobot git repository mounts. """ +import logging from typing import Any +from django.contrib.contenttypes.models import ContentType +from django.db import transaction from nautobot.apps.jobs import StringVar, register_jobs +from nautobot.extras.models import Role from nautobot_design_builder.choices import DesignModeChoices from nautobot_design_builder.contrib.ext import CableConnectionExtension, LookupExtension from nautobot_design_builder.design_job import DesignJob @@ -28,6 +32,7 @@ from ..context import BaseContext, get_mock_topology_context_class name = "Mock Topology" +logger = logging.getLogger(__name__) class MockTopologyDesign(DesignJob): @@ -50,7 +55,51 @@ def run(self, *args: Any, **kwargs: Any) -> Any: self.Meta.context_class = get_mock_topology_context_class( kwargs.get("blueprint", "superpod") ) - return super().run(*args, **kwargs) + with transaction.atomic(): + self._ensure_role_content_type_memberships(kwargs) + return super().run(*args, **kwargs) + + def _ensure_role_content_type_memberships(self, data: dict[str, Any]) -> None: + """Add required role content types without removing existing memberships.""" + try: + job_result = self.job_result + except AttributeError: + job_result = None + + context = self.Meta.context_class(data=data, job_result=job_result) + role_data = [ + *context.json.get("role_content_type_extensions", []), + *context.json.get("roles", []), + ] + + seen_roles = set() + for role in role_data: + name = role.get("name") + if not name or name in seen_roles: + continue + seen_roles.add(name) + + content_types = [ + self._get_content_type(content_type) + for content_type in role.get("content_types", []) + ] + content_types = [content_type for content_type in content_types if content_type] + if not content_types: + continue + + role_obj, _ = Role.objects.get_or_create(name=name, defaults={"color": "2196f3"}) + role_obj.content_types.add(*content_types) + role_obj.validated_save() + + @staticmethod + def _get_content_type(content_type: str) -> ContentType | None: + """Resolve an app.model content type string.""" + try: + app_label, model = content_type.split(".") + return ContentType.objects.get(app_label=app_label, model=model) + except (ValueError, ContentType.DoesNotExist) as exc: + logger.warning("Could not resolve content type %r: %s", content_type, exc) + return None class Meta: """Metadata.""" diff --git a/docs/README.md b/docs/README.md index bd59466..8025614 100644 --- a/docs/README.md +++ b/docs/README.md @@ -25,5 +25,6 @@ make docs-preview - Navigation is defined in `docs/fern/docs.yml`. - OpenAPI specs are generated into `docs/api-specs/` with `make openapi`. - Installer TUI screenshots are generated into `docs/assets/images/installer/` with `make docs-screenshots`. +- AIR sim TUI screenshots are generated into `docs/assets/images/air-sim/` with `make docs-air-sim-screenshots`. Fern publishing in GitHub Actions uses a repository secret named `FERN_TOKEN`. diff --git a/docs/api-specs/temporal.openapi.json b/docs/api-specs/temporal.openapi.json index 95f8b55..abbadd1 100644 --- a/docs/api-specs/temporal.openapi.json +++ b/docs/api-specs/temporal.openapi.json @@ -1,102 +1,6 @@ { "components": { "schemas": { - "AIRCreateBlueprintSimulationInput": { - "description": "AIR Workflow Input Definition.", - "properties": { - "blueprint_name": { - "title": "Blueprint Name", - "type": "string" - }, - "user": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "User" - }, - "user_domain": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "User Domain" - } - }, - "required": [ - "blueprint_name", - "user", - "user_domain" - ], - "title": "AIRCreateBlueprintSimulationInput", - "type": "object" - }, - "AIRCreateSimulationInput": { - "description": "AIR Workflow Input Definition.", - "properties": { - "name": { - "title": "Name", - "type": "string" - }, - "topology": { - "additionalProperties": true, - "title": "Topology", - "type": "object" - }, - "user": { - "title": "User", - "type": "string" - } - }, - "required": [ - "name", - "topology", - "user" - ], - "title": "AIRCreateSimulationInput", - "type": "object" - }, - "AIRDeleteInput": { - "description": "AIR Delete Workflow Input Definition.", - "properties": { - "simulation_id": { - "title": "Simulation Id", - "type": "string" - } - }, - "required": [ - "simulation_id" - ], - "title": "AIRDeleteInput", - "type": "object" - }, - "AIRValidateSiteInput": { - "description": "AIR Validate Site Workflow Input Definition.", - "properties": { - "site_name": { - "title": "Site Name", - "type": "string" - }, - "user": { - "title": "User", - "type": "string" - } - }, - "required": [ - "site_name", - "user" - ], - "title": "AIRValidateSiteInput", - "type": "object" - }, "BackupInput": { "description": "Backup Workflow Input Definiton.", "properties": { @@ -1304,30 +1208,6 @@ "title": "Secret", "type": "object" }, - "Simulation": { - "description": "Model representing an AIR simulation.", - "properties": { - "id": { - "title": "Id", - "type": "string" - }, - "name": { - "title": "Name", - "type": "string" - }, - "state": { - "title": "State", - "type": "string" - } - }, - "required": [ - "id", - "name", - "state" - ], - "title": "Simulation", - "type": "object" - }, "SiteCableValidationInput": { "description": "Input for Site Cable Validation Workflow.", "properties": { @@ -2815,32 +2695,6 @@ ] } }, - "/v1/parameter/simulations": { - "get": { - "description": "Return a list of NVIDIA Config Manager-managed AIR simulations.", - "operationId": "get_simulations_v1_parameter_simulations_get", - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "items": { - "$ref": "#/components/schemas/Simulation" - }, - "title": "Response Get Simulations V1 Parameter Simulations Get", - "type": "array" - } - } - }, - "description": "Successful Response" - } - }, - "summary": "Get AIR Simulations", - "tags": [ - "parameters" - ] - } - }, "/v1/parameter/site": { "get": { "description": "Return a list of NVIDIA Config Manager-managed sites.", @@ -3247,178 +3101,6 @@ ] } }, - "/v1/workflow/ngc/air_create_blueprint_simulation": { - "post": { - "description": "Create AIR simulation from blueprint template for standardized network testing", - "operationId": "aircreateblueprintsimulationworkflow_endpoint_v1_workflow_ngc_air_create_blueprint_simulation_post", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AIRCreateBlueprintSimulationInput" - } - } - }, - "required": true - }, - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/WorkflowResponse" - } - } - }, - "description": "Successful Response" - }, - "422": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - "description": "Validation Error" - } - }, - "summary": "Execute AIRCreateBlueprintSimulationWorkflow", - "tags": [ - "workflow", - "workflow" - ] - } - }, - "/v1/workflow/ngc/air_create_simulation": { - "post": { - "description": "Create AIR network simulation from topology for configuration testing", - "operationId": "aircreatesimulationworkflow_endpoint_v1_workflow_ngc_air_create_simulation_post", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AIRCreateSimulationInput" - } - } - }, - "required": true - }, - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/WorkflowResponse" - } - } - }, - "description": "Successful Response" - }, - "422": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - "description": "Validation Error" - } - }, - "summary": "Execute AIRCreateSimulationWorkflow", - "tags": [ - "workflow", - "workflow" - ] - } - }, - "/v1/workflow/ngc/air_delete": { - "post": { - "description": "Delete AIR simulation and clean up associated resources", - "operationId": "airdeletesimulationworkflow_endpoint_v1_workflow_ngc_air_delete_post", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AIRDeleteInput" - } - } - }, - "required": true - }, - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/WorkflowResponse" - } - } - }, - "description": "Successful Response" - }, - "422": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - "description": "Validation Error" - } - }, - "summary": "Execute AIRDeleteSimulationWorkflow", - "tags": [ - "workflow", - "workflow" - ] - } - }, - "/v1/workflow/ngc/air_validate_site": { - "post": { - "description": "Validate site network configuration using AIR simulation environment", - "operationId": "airvalidatesiteworkflow_endpoint_v1_workflow_ngc_air_validate_site_post", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/AIRValidateSiteInput" - } - } - }, - "required": true - }, - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/WorkflowResponse" - } - } - }, - "description": "Successful Response" - }, - "422": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - }, - "description": "Validation Error" - } - }, - "summary": "Execute AIRValidateSiteWorkflow", - "tags": [ - "workflow", - "workflow" - ] - } - }, "/v1/workflow/ngc/backup": { "post": { "description": "Backup network device configuration to the Config Store and NVIDIA Config Manager plugin", diff --git a/docs/assets/images/air-sim/01-topology.svg b/docs/assets/images/air-sim/01-topology.svg new file mode 100644 index 0000000..93e1f4a --- /dev/null +++ b/docs/assets/images/air-sim/01-topology.svg @@ -0,0 +1,351 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Topology + + + + + + + + + + +NVCM AIR Sim WizardTopology + +───────────────────────────────────────────────────────────── + +Pre-built Config +  Topology▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +Custom / manual +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +  OptionsSelecting a preset replaces the wizard fields but keeps your save file unchanged. + +──────────────────────────────────────── +  Launch +Mock Topology +▔▔▔▔▔▔▔▔ +Build AIR topology from development/mock_topology context +▁▁▁▁▁▁▁▁ + + +Blueprint +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +air_trial +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +Deployment Name +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +demo +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +Mock Topology Path +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +development/mock_topology +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +──────────────────────────────────────── + +Template Plugins +Paths to template plugin directories or .tar.gz files paired with this topology + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + + Add Template Plugin  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +development/air_sim/template_plugins/superpod-template-plugin +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Remove  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + + +Simulation Name  (leave blank to auto-generate) +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +nv-config-manager-air-trial-demo +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +OOB Management Server Name +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +oob-mgmt-server +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +──────────────────────────────────────── + +Server Mode +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Use existing server (e.g. oob-mgmt-server)  + Create new server node attached to a switch  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/02-options.svg b/docs/assets/images/air-sim/02-options.svg new file mode 100644 index 0000000..aba9aa7 --- /dev/null +++ b/docs/assets/images/air-sim/02-options.svg @@ -0,0 +1,348 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Options + + + + + + + + + + +NVCM AIR Sim WizardOptions + +───────────────────────────────────────────────────────────── + +AIR / Auth +  Topology +NGC API Key  (or set NGC_API_KEY env var) +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Options•••••••••••••••••••••••••••••• +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +▔▔▔▔▔▔▔▔ +  LaunchUse Public Air +▁▁▁▁▁▁▁▁ + +──────────────────────────────────────── + +Auto-Configure +▔▔▔▔▔▔▔▔ +Auto-configure server on boot  (attach cloud-init) +▁▁▁▁▁▁▁▁ + + +Git Token  (optional; only needed for private forks) +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +token for a private fork +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +nv-config-manager repo URL +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +https://github.com/NVIDIA/nv-config-manager +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +──────────────────────────────────────── + +Deployment + +nv-config-manager Git Ref +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +main +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +Images will be built locally from nv-config-manager ref 'main'; registry pulls are disabled for AIR demos. + +Cumulus Version Override  (leave blank to use topology values) +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +5.16.1 +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +Deployment Size +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + small  + medium  + large  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +▔▔▔▔▔▔▔▔ +Run nv-config-manager-installer deploy after setup +▁▁▁▁▁▁▁▁ + +──────────────────────────────────────── + +Advanced +▔▔▔▔▔▔▔▔▇▇ +Disable aggressive DHCP tuning on Cumulus switches +▁▁▁▁▁▁▁▁ + +▔▔▔▔▔▔▔▔ +Skip reset of Cumulus nodes before DHCP refresh +▁▁▁▁▁▁▁▁ + + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/03-launch-ready.svg b/docs/assets/images/air-sim/03-launch-ready.svg new file mode 100644 index 0000000..6ca36c1 --- /dev/null +++ b/docs/assets/images/air-sim/03-launch-ready.svg @@ -0,0 +1,345 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  OptionsReady to create AIR simulation from mock topology air_trial. + + +  Launch──────────────────────────────────────── + +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[ ]  Resolve topology  (~~5s)Provisioned: — +[ ]  Create AIR simulation  (~~10s) +[ ]  Attach cloud-init  (~~5s)────────────────────────────── +[ ]  Start simulation  (~4-6m) +[ ]  Create SSH service  (~~15s) NAME  READY  STATUS  RESTARTS  AGE  +[ ]  Wait for cloud-init  (~3-5m) +[ ]  Upload installer config  (~~5s) +[ ]  Run nvcm installer  (~~20m) +[ ]  Post-deploy setup  (~~1m) + + + + +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy Log +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + + + + + + + + + + + + + + + + + + + + + + + +────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + + + + + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/04-launch-running.svg b/docs/assets/images/air-sim/04-launch-running.svg new file mode 100644 index 0000000..4c5b881 --- /dev/null +++ b/docs/assets/images/air-sim/04-launch-running.svg @@ -0,0 +1,354 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch / Running + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  OptionsRunning...  log -> /tmp/nvcm-deploy-20260530-000000.log + + +  Launch──────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +SSHsshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -p▔▔▔▔▔ +17117 nvcm@eb515e50.workers.ngc.air.nvidia.com ⧉  +▁▁▁▁▁ +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[*]  Resolve topology  0:04Provisioned: 0/6 +[*]  Create AIR simulation  0:13Waiting for first ZTP callback +[*]  Attach cloud-init  0:07────────────────────────────── +[*]  Start simulation  4:46 +[*]  Create SSH service  0:18 NAME                                        READY  STATUS   RESTARTS  AGE    +[*]  Wait for cloud-init  4:01 cluster-dhcp-1                              1/1    Running  0         53m    +[*]  Upload installer config  0:06 cluster-nautobot-1                          1/1    Running  0         53m    +[>]  Run nvcm installer  0:00  (~~20m) nv-config-manager-dhcp-c49966454-wvlts      4/4    Running  0         54m    +[ ]  Post-deploy setup  (~~1m) nv-config-manager-dhcp-refresh-5cc75b56...  1/1    Running  0         40m    + nv-config-manager-nautobot-7c6c5b566-2kqq2  2/2    Running  0         54m   ▄▄ + nv-config-manager-nautobot-celery-559d9...  1/1    Running  0         54m    + nv-config-manager-nautobot-celery-beat-...  1/1    Running  0         54m    + nv-config-manager-render-api-5858dcb947...  1/1    Running  0         9m43s  +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy LogDHCPZTP +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +23:52:44  Found 1 exit interface(s) for SSH access +23:52:44  Overriding existing server 'oob-mgmt-server' with nvcm-box image  +(generic/ubuntu2404, 16 CPU, 32768MB RAM, 100GB storage) +23:52:44  Built topology with 7 nodes and 8 links▃▃ +23:52:45  Created simulation: 9e1f8be2-43a0-4797-9e14-91e5b170b656 +23:53:31  Created SSH service for oob-mgmt-server:eth0 ->  +eb515e50.workers.ngc.air.nvidia.com:17117 +SSH ready: nvcm@eb515e50.workers.ngc.air.nvidia.com:17117 +23:56:03  Uploading /tmp/nv-config-manager-install-sujopgqf.yaml ->  +eb515e50.workers.ngc.air.nvidia.com:/home/nvcm/nv-config-manager-install.yaml  +... +23:56:03  Upload complete: /home/nvcm/nv-config-manager-install.yaml +Uploaded nv-config-manager-install.yaml +Running deploy command: +  sudo NO_COLOR=1 KUBECONFIG=/home/nvcm/.kube/config uv run --directory  +/home/nvcm/nv-config-manager --project /home/nvcm/nv-config-manager/installer  +nv-config-manager-installer deploy /home/nvcm/nv-config-manager-install.yaml  +--chart-dir /home/nvcm/nv-config-manager/deploy/helm --kind-cluster nvcm  +--install-envoy-gateway --install-cert-manager --install-cnpg-operator  +--image-source local --build-images --load-kind +23:56:03  Running installer (this may take 15-30 min)...▇▇ +23:56:07  [oob-mgmt-server] [>]  Check prerequisites + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/05-launch-pod-status.svg b/docs/assets/images/air-sim/05-launch-pod-status.svg new file mode 100644 index 0000000..39405b7 --- /dev/null +++ b/docs/assets/images/air-sim/05-launch-pod-status.svg @@ -0,0 +1,354 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch / Pod Status + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  OptionsDeployment running - monitoring Kubernetes pods over SSH. + + +  Launch──────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +SSHsshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -p▔▔▔▔▔ +17117 nvcm@eb515e50.workers.ngc.air.nvidia.com ⧉  +▁▁▁▁▁ +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[*]  Resolve topology  0:04Provisioned: 4/6 +[*]  Create AIR simulation  0:13Pending: tan-leaf-04, tan-leaf-05 +[*]  Attach cloud-init  0:07────────────────────────────── +[*]  Start simulation  4:46 +[*]  Create SSH service  0:18 NAME                                        READY  STATUS   RESTARTS  AGE    +[*]  Wait for cloud-init  4:01 cluster-dhcp-1                              1/1    Running  0         53m    +[*]  Upload installer config  0:06 cluster-nautobot-1                          1/1    Running  0         53m    +[*]  Run nvcm installer  19:48 nv-config-manager-dhcp-c49966454-wvlts      4/4    Running  0         54m    +[>]  Post-deploy setup  0:00  (~~1m) nv-config-manager-dhcp-refresh-5cc75b56...  1/1    Running  0         40m    + nv-config-manager-nautobot-7c6c5b566-2kqq2  2/2    Running  0         54m   ▄▄ + nv-config-manager-nautobot-celery-559d9...  1/1    Running  0         54m    + nv-config-manager-nautobot-celery-beat-...  1/1    Running  0         54m    + nv-config-manager-render-api-5858dcb947...  1/1    Running  0         9m43s  +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy LogDHCPZTP +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +23:52:44  Found 1 exit interface(s) for SSH access +23:52:44  Overriding existing server 'oob-mgmt-server' with nvcm-box image  +(generic/ubuntu2404, 16 CPU, 32768MB RAM, 100GB storage) +23:52:44  Built topology with 7 nodes and 8 links▃▃ +23:52:45  Created simulation: 9e1f8be2-43a0-4797-9e14-91e5b170b656 +23:53:31  Created SSH service for oob-mgmt-server:eth0 ->  +eb515e50.workers.ngc.air.nvidia.com:17117 +SSH ready: nvcm@eb515e50.workers.ngc.air.nvidia.com:17117 +23:56:03  Uploading /tmp/nv-config-manager-install-sujopgqf.yaml ->  +eb515e50.workers.ngc.air.nvidia.com:/home/nvcm/nv-config-manager-install.yaml  +... +23:56:03  Upload complete: /home/nvcm/nv-config-manager-install.yaml +Uploaded nv-config-manager-install.yaml +Running deploy command: +  sudo NO_COLOR=1 KUBECONFIG=/home/nvcm/.kube/config uv run --directory  +/home/nvcm/nv-config-manager --project /home/nvcm/nv-config-manager/installer  +nv-config-manager-installer deploy /home/nvcm/nv-config-manager-install.yaml  +--chart-dir /home/nvcm/nv-config-manager/deploy/helm --kind-cluster nvcm  +--install-envoy-gateway --install-cert-manager --install-cnpg-operator  +--image-source local --build-images --load-kind +23:56:03  Running installer (this may take 15-30 min)...▇▇ +23:56:07  [oob-mgmt-server] [>]  Check prerequisites + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/06-launch-dhcp-log.svg b/docs/assets/images/air-sim/06-launch-dhcp-log.svg new file mode 100644 index 0000000..f7261cc --- /dev/null +++ b/docs/assets/images/air-sim/06-launch-dhcp-log.svg @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch / DHCP Log + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  OptionsDeployment running - monitoring Kubernetes pods over SSH. + + +  Launch──────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +SSHsshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -p▔▔▔▔▔ +17117 nvcm@eb515e50.workers.ngc.air.nvidia.com ⧉  +▁▁▁▁▁ +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[*]  Resolve topology  0:04Provisioned: 4/6 +[*]  Create AIR simulation  0:13Pending: tan-leaf-04, tan-leaf-05 +[*]  Attach cloud-init  0:07────────────────────────────── +[*]  Start simulation  4:46 +[*]  Create SSH service  0:18 NAME                                        READY  STATUS   RESTARTS  AGE    +[*]  Wait for cloud-init  4:01 cluster-dhcp-1                              1/1    Running  0         53m    +[*]  Upload installer config  0:06 cluster-nautobot-1                          1/1    Running  0         53m    +[*]  Run nvcm installer  19:48 nv-config-manager-dhcp-c49966454-wvlts      4/4    Running  0         54m    +[>]  Post-deploy setup  0:00  (~~1m) nv-config-manager-dhcp-refresh-5cc75b56...  1/1    Running  0         40m    + nv-config-manager-nautobot-7c6c5b566-2kqq2  2/2    Running  0         54m   ▄▄ + nv-config-manager-nautobot-celery-559d9...  1/1    Running  0         54m    + nv-config-manager-nautobot-celery-beat-...  1/1    Running  0         54m    + nv-config-manager-render-api-5858dcb947...  1/1    Running  0         9m43s  +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy LogDHCPZTP +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +DHCP4_PACKET_RECEIVED [hwtype=1 44:38:39:00:00:08], cid=[no info],  +tid=0xbec5f077: DHCPREQUEST (type 3) received from 172.18.0.1 to 10.244.0.20 on  +interface eth0 +DHCP4_INIT_REBOOT [hwtype=1 44:38:39:00:00:08], cid=[no info], tid=0xbec5f077:  +client is in INIT-REBOOT state and requests address 10.120.0.1 +DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:08], cid=[no info], tid=0xbec5f077:  +lease 10.120.0.1 has been allocated for 7200 seconds +DHCP4_LEASE_OFFER [hwtype=1 44:38:39:00:00:04], cid=[no info], tid=0xaf0466f:  +lease 10.120.1.12 will be offered +DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:04], cid=[no info], tid=0xaf0466f:  +lease 10.120.1.12 has been allocated for 7200 seconds +DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:07], cid=[no info], tid=0x4a0ae633:  +lease 10.120.1.15 has been allocated for 7200 seconds +DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:03], cid=[no info], tid=0x4bb0432d:  +lease 10.120.1.11 has been allocated for 7200 seconds +DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:06], cid=[no info], tid=0x1204853d:  +lease 10.120.1.14 has been allocated for 7200 seconds +DHCP4_PACKET_SEND [hwtype=1 44:38:39:00:00:07], cid=[no info], tid=0x4a0ae633:  +trying to send packet DHCPACK (type 5) from 10.244.0.20:67 to 10.120.1.1:67 on  +interface eth0 +▇▇ + + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/07-launch-ztp-log.svg b/docs/assets/images/air-sim/07-launch-ztp-log.svg new file mode 100644 index 0000000..d7b0ee5 --- /dev/null +++ b/docs/assets/images/air-sim/07-launch-ztp-log.svg @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch / ZTP Log + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  OptionsDeployment running - watching ZTP callbacks over SSH. + + +  Launch──────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +SSHsshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -p▔▔▔▔▔ +17117 nvcm@eb515e50.workers.ngc.air.nvidia.com ⧉  +▁▁▁▁▁ +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[*]  Resolve topology  0:04Provisioned: 6/6 +[*]  Create AIR simulation  0:13All devices reported provisioned +[*]  Attach cloud-init  0:07────────────────────────────── +[*]  Start simulation  4:46 +[*]  Create SSH service  0:18 NAME                                        READY  STATUS   RESTARTS  AGE    +[*]  Wait for cloud-init  4:01 cluster-dhcp-1                              1/1    Running  0         53m    +[*]  Upload installer config  0:06 cluster-nautobot-1                          1/1    Running  0         53m    +[*]  Run nvcm installer  19:48 nv-config-manager-dhcp-c49966454-wvlts      4/4    Running  0         54m    +[>]  Post-deploy setup  0:00  (~~1m) nv-config-manager-dhcp-refresh-5cc75b56...  1/1    Running  0         40m    + nv-config-manager-nautobot-7c6c5b566-2kqq2  2/2    Running  0         54m   ▄▄ + nv-config-manager-nautobot-celery-559d9...  1/1    Running  0         54m    + nv-config-manager-nautobot-celery-beat-...  1/1    Running  0         54m    + nv-config-manager-render-api-5858dcb947...  1/1    Running  0         9m43s  +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy LogDHCPZTP +━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +10.120.0.1:40294 - "GET  +/v1/device/8f5a1532-e155-4119-937e-86e8aa8f4007/boot-script HTTP/1.1" 200 +10.120.0.1:40304 - "POST  +/v1/device/8f5a1532-e155-4119-937e-86e8aa8f4007/validate_serial HTTP/1.1" 200 +10.120.0.1:40320 - "GET  +/v1/device/8f5a1532-e155-4119-937e-86e8aa8f4007/config/startup.yaml HTTP/1.1"  +200 +10.120.0.1:53424 - "POST  +/v1/device/8f5a1532-e155-4119-937e-86e8aa8f4007/provisioned HTTP/1.1" 200 +10.120.1.13:56094 - "GET  +/v1/device/38065dde-1abe-41ef-865b-60fbb6405d06/boot-script HTTP/1.1" 200 +10.120.1.13:46012 - "POST  +/v1/device/38065dde-1abe-41ef-865b-60fbb6405d06/validate_serial HTTP/1.1" 200 +10.120.1.13:46020 - "GET  +/v1/device/38065dde-1abe-41ef-865b-60fbb6405d06/config/startup.yaml HTTP/1.1"  +200 +10.120.1.13:46026 - "POST  +/v1/device/38065dde-1abe-41ef-865b-60fbb6405d06/provisioned HTTP/1.1" 200 +10.120.1.12:59136 - "GET  +/v1/device/48331931-a577-4ad7-ac03-dc22461a9d0c/boot-script HTTP/1.1" 200 +10.120.1.15:35374 - "GET ▇▇ +/v1/device/9cae2d62-00a3-457e-b8c1-6bc9a18d8e0a/boot-script HTTP/1.1" 200 + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/air-sim/08-launch-access.svg b/docs/assets/images/air-sim/08-launch-access.svg new file mode 100644 index 0000000..b575bae --- /dev/null +++ b/docs/assets/images/air-sim/08-launch-access.svg @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NVCM AIR Sim Wizard - Launch / Access + + + + + + + + + + +NVCM AIR Sim WizardLaunch + +───────────────────────────────────────────────────────────── + +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  Topology Launch Simulation  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  Options[*] Bringup complete! + + +  Launch──────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +SSHsshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -p▔▔▔▔▔ +17117 nvcm@eb515e50.workers.ngc.air.nvidia.com ⧉  +▁▁▁▁▁ +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +StepsPod Status + +────────────────────────────────────────────────────── + +[*]  Resolve topology  0:04Provisioned: 6/6 +[*]  Create AIR simulation  0:13 +[*]  Attach cloud-init  0:07────────────────────────────── +[*]  Start simulation  4:46 +[*]  Create SSH service  0:18 NAME                                        READY  STATUS   RESTARTS  AGE    +[*]  Wait for cloud-init  4:01 cluster-dhcp-1                              1/1    Running  0         53m    +[*]  Upload installer config  0:06 cluster-nautobot-1                          1/1    Running  0         53m    +[*]  Run nvcm installer  19:48 nv-config-manager-dhcp-c49966454-wvlts      4/4    Running  0         54m    +[*]  Post-deploy setup  0:51 nv-config-manager-dhcp-refresh-5cc75b56...  1/1    Running  0         40m    + nv-config-manager-nautobot-7c6c5b566-2kqq2  2/2    Running  0         54m   ▄▄ + nv-config-manager-nautobot-celery-559d9...  1/1    Running  0         54m    + nv-config-manager-nautobot-celery-beat-...  1/1    Running  0         54m    + nv-config-manager-render-api-5858dcb947...  1/1    Running  0         9m43s  +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Output + +Deploy LogDHCPZTPAccess +━━━━━╺━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + + +Proxy Access +Start the SOCKS tunnel, then open the browser with the proxy. + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Linux / macOS - SOCKS tunnel▔▔▔▔▔ + ⧉  +▁▁▁▁▁ +sshpass -p NVCMDemo1! ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -D +8080 -N -p 17117 nvcm@eb515e50.workers.ngc.air.nvidia.com + + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── + +──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +Windows OpenSSH - SOCKS tunnel▔▔▔▔▔ + ⧉  +▁▁▁▁▁ +ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PreferredAuthentications=password -D 8080 -N -p 17117 +nvcm@eb515e50.workers.ngc.air.nvidia.com▇▇ + + + F2 Save  F9 Launch  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + diff --git a/docs/assets/images/installer/19-values-preview-generated.svg b/docs/assets/images/installer/19-values-preview-generated.svg index fb4f148..57f2012 100644 --- a/docs/assets/images/installer/19-values-preview-generated.svg +++ b/docs/assets/images/installer/19-values-preview-generated.svg @@ -19,332 +19,332 @@ font-weight: 700; } - .terminal-930726579-matrix { + .terminal-3325412508-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-930726579-title { + .terminal-3325412508-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-930726579-r1 { fill: #c5c8c6 } -.terminal-930726579-r2 { fill: #616161 } -.terminal-930726579-r3 { fill: #e0e0e0;font-weight: bold } -.terminal-930726579-r4 { fill: #a0a0a0 } -.terminal-930726579-r5 { fill: #e0e0e0 } -.terminal-930726579-r6 { fill: #a9a9a9 } -.terminal-930726579-r7 { fill: #2d2d2d } -.terminal-930726579-r8 { fill: #6b6b6b;font-weight: bold } -.terminal-930726579-r9 { fill: #3a3a3a } -.terminal-930726579-r10 { fill: #0d0d0d } -.terminal-930726579-r11 { fill: #121212 } -.terminal-930726579-r12 { fill: #191919 } -.terminal-930726579-r13 { fill: #e1e1e1 } -.terminal-930726579-r14 { fill: #e0e0e0;font-style: italic; } -.terminal-930726579-r15 { fill: #e4e4e4;font-weight: bold } -.terminal-930726579-r16 { fill: #282828 } -.terminal-930726579-r17 { fill: #1e1e1e } -.terminal-930726579-r18 { fill: #b0b0b0;font-weight: bold } + .terminal-3325412508-r1 { fill: #c5c8c6 } +.terminal-3325412508-r2 { fill: #616161 } +.terminal-3325412508-r3 { fill: #e0e0e0;font-weight: bold } +.terminal-3325412508-r4 { fill: #a0a0a0 } +.terminal-3325412508-r5 { fill: #e0e0e0 } +.terminal-3325412508-r6 { fill: #a9a9a9 } +.terminal-3325412508-r7 { fill: #2d2d2d } +.terminal-3325412508-r8 { fill: #6b6b6b;font-weight: bold } +.terminal-3325412508-r9 { fill: #3a3a3a } +.terminal-3325412508-r10 { fill: #0d0d0d } +.terminal-3325412508-r11 { fill: #121212 } +.terminal-3325412508-r12 { fill: #191919 } +.terminal-3325412508-r13 { fill: #e1e1e1 } +.terminal-3325412508-r14 { fill: #e0e0e0;font-style: italic; } +.terminal-3325412508-r15 { fill: #e4e4e4;font-weight: bold } +.terminal-3325412508-r16 { fill: #282828 } +.terminal-3325412508-r17 { fill: #1e1e1e } +.terminal-3325412508-r18 { fill: #b0b0b0;font-weight: bold } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - NVIDIA Config Manager Install Wizard — Values Preview / Generated + NVIDIA Config Manager Install Wizard — Values Preview / Generated - - - - -NVCM Install WizardValues Preview - -───────────────────────────────────────────────────────────── - -Generate and preview the Helm values that would be used for deployment. Optionally write them to a file for inspection or manual use. -  Cluster▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ - Generate  Write to File  -▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ -  Services -Output Path -▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ -  External Servicesvalues-generated.yaml -▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ - -  App SecretsValues generated successfully. - -  - name: AIRValidateSiteWorkflow -  Network Secrets    read_roles: -    - all -    execute_roles: -  Ingest Data    - all -  - name: ValidateHardwareWorkflow -    read_roles: -  Template Plugins    - all -    execute_roles: -    - all -  OS Images  - name: NVLinkSwitchFirmwareUpgradeWorkflow -    read_roles: -    - all -  Workflows    execute_roles: -    - all -  - name: SitePasswordRotationWorkflow -  Container Images    read_roles: -    - all -    execute_roles: -* SSO    - all -  - name: IBPKeyCreationWorkflow -    read_roles: -  SPIFFE    - all -    execute_roles: -    - all -  Infrastructure  - name: IBPKeyMemberAddWorkflow -    read_roles: -    - all -  Values Preview    execute_roles: -    - all -  - name: IBPKeyMemberUpdateWorkflow -  Deploy    read_roles: -    - all -    execute_roles: -    - all -  - name: IBPKeyMemberDeleteWorkflow -    read_roles: -    - all -    execute_roles: -    - all -  - name: IBPortGuidDiscoveryWorkflow -    read_roles: -    - all -    execute_roles: -    - all -  - name: DiagnosticsWorkflow -    read_roles:▅▅ -    - all -    execute_roles: -    - all - - - F2 Save  F5 Generate Values  F9 Deploy  F10 Save & Exit  ^N Next Section  ^P Prev Section  + + + + +NVCM Install WizardValues Preview + +───────────────────────────────────────────────────────────── + +Generate and preview the Helm values that would be used for deployment. Optionally write them to a file for inspection or manual use. +  Cluster▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ + Generate  Write to File  +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +  Services +Output Path +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +  External Servicesvalues-generated.yaml +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + +  App SecretsValues generated successfully. + +  - name: RedfishProvisioningWorkflow +  Network Secrets    read_roles: +    - all +    execute_roles: +  Ingest Data    - all +  - name: ValidateHardwareWorkflow +    read_roles: +  Template Plugins    - all +    execute_roles: +    - all +  OS Images  - name: NVLinkSwitchFirmwareUpgradeWorkflow +    read_roles: +    - all +  Workflows    execute_roles: +    - all +  - name: SitePasswordRotationWorkflow +  Container Images    read_roles: +    - all +    execute_roles: +* SSO    - all +  - name: IBPKeyCreationWorkflow +    read_roles: +  SPIFFE    - all +    execute_roles: +    - all +  Infrastructure  - name: IBPKeyMemberAddWorkflow +    read_roles: +    - all +  Values Preview    execute_roles: +    - all +  - name: IBPKeyMemberUpdateWorkflow +  Deploy    read_roles: +    - all +    execute_roles: +    - all +  - name: IBPKeyMemberDeleteWorkflow +    read_roles: +    - all +    execute_roles: +    - all +  - name: IBPortGuidDiscoveryWorkflow +    read_roles: +    - all +    execute_roles: +    - all +  - name: DiagnosticsWorkflow +    read_roles:▅▅ +    - all +    execute_roles: +    - all + + + F2 Save  F5 Generate Values  F9 Deploy  F10 Save & Exit  ^N Next Section  ^P Prev Section  diff --git a/docs/fern/docs.yml b/docs/fern/docs.yml index 8be447f..437a714 100644 --- a/docs/fern/docs.yml +++ b/docs/fern/docs.yml @@ -60,6 +60,8 @@ navigation: path: ../getting-started/interfaces.mdx - page: Local Development Quick Start path: ../getting-started/local-development-quick-start.mdx + - page: AIR Simulation User Guide + path: ../user-guides/air-simulation/index.mdx - page: First Run Tour path: ../getting-started/first-run-tour.mdx diff --git a/docs/getting-started/index.mdx b/docs/getting-started/index.mdx index fbfc78a..f927817 100644 --- a/docs/getting-started/index.mdx +++ b/docs/getting-started/index.mdx @@ -35,6 +35,7 @@ Use Config Manager when you need to: | [DHCP Service](../dhcp/index.mdx) | Enable Kea-backed DHCP and model reservations/options in Nautobot. | Inspect reservations, leases, and generated DHCP config when provisioning stalls. | | [Config Manager UI](interfaces.mdx) | Set `global.baseHostname`, SSO callbacks, and gateway/TLS access. | Use workflow and config views for day-0 and day-2 operations. | | Observability and troubleshooting | Enable monitoring resources and the local observability stack when appropriate. | Use [Troubleshooting](../install/troubleshooting.mdx), pod logs, and workflow status to investigate failed operations. | +| [AIR Simulation User Guide](../user-guides/air-simulation/index.mdx) | Bring up a public NVIDIA DSX Air demo with Config Manager installed inside the simulation. | Exercise Ethernet/Cumulus workflows against simulated switches before using a physical lab. | ## Recommended first path @@ -42,8 +43,9 @@ Use Config Manager when you need to: 1. Before pointing Config Manager at real devices, confirm network secrets, DHCP/ZTP exposure, device authentication prerequisites, and maintenance controls. 1. Operators should start with [Which Interface Should I Use?](interfaces.mdx), then review [Temporal Workflows](../temporal/index.mdx), [Workflow Lifecycle](../user-guides/workflow-lifecycle/index.mdx), and [Config Store](../config-store/index.mdx). 1. To preview Config Manager locally without a real lab, use [Local Development Quick Start](local-development-quick-start.mdx). +1. To validate device-facing workflows without a physical lab, use [AIR Simulation User Guide](../user-guides/air-simulation/index.mdx). 1. After installation, follow [First Run Tour](first-run-tour.mdx) to verify the deployment and choose the first workflows to run. -The local development quick start is not a deployment path and is not a workflow validation environment. It exists only to help developers preview rendered configuration, inspect UI flows, and understand Nautobot modeling shape without touching real devices. +The local development quick start is not a deployment path and is not a workflow validation environment. Use the AIR simulation guide when you need simulated Cumulus switches for workflow validation without touching real devices. diff --git a/docs/getting-started/local-development-quick-start.mdx b/docs/getting-started/local-development-quick-start.mdx index ac1078e..0cba75a 100644 --- a/docs/getting-started/local-development-quick-start.mdx +++ b/docs/getting-started/local-development-quick-start.mdx @@ -12,7 +12,7 @@ The bundled mock topology data is not production data and must never be used for Real device interaction is not meaningfully mocked today. Workflows such as ZTP, cable validation, deploy, multi-deploy, backup, reprovision, password rotation, hardware validation, and LLDP-based discovery require reachable real devices to be useful. -Future work will include a more robust mocking infrastructure for local development. Until then, treat the local quick start as a render and interface preview only. +Future work will include a more robust mocking infrastructure for local development. Until then, treat the local quick start as a render and interface preview only. To exercise workflows against simulated Cumulus switches, use the [AIR Simulation User Guide](../user-guides/air-simulation/index.mdx). ## What this gives you @@ -50,7 +50,7 @@ Equivalent headless installer command: ```bash cd installer -uv run nv-config-manager-installer deploy ../deploy/configs/local-superpod.yaml \ +uv run nvcm-installer deploy ../deploy/configs/local-superpod.yaml \ --image-source local \ --build-images \ --load-kind \ diff --git a/docs/i-want-to.mdx b/docs/i-want-to.mdx index 4427762..47db089 100644 --- a/docs/i-want-to.mdx +++ b/docs/i-want-to.mdx @@ -13,6 +13,7 @@ A scenario-first index into the docs. Pick the row that describes what you are t | Understand what Config Manager is and which path to start with. | [Start Here](getting-started/index.mdx) | | Know whether to use the Config Manager UI, Nautobot UI, Temporal Web, or an API. | [Which Interface Should I Use?](getting-started/interfaces.mdx) | | Preview Config Manager locally without touching real devices. | [Local Development Quick Start](getting-started/local-development-quick-start.mdx) | +| Validate Config Manager workflows against simulated Cumulus switches in NVIDIA DSX Air. | [AIR Simulation User Guide](user-guides/air-simulation/index.mdx) | | Decide what to inspect after the first install. | [First Run Tour](getting-started/first-run-tour.mdx) | ## Bootstrap and provisioning diff --git a/docs/index.mdx b/docs/index.mdx index 8a149c0..2ab7994 100644 --- a/docs/index.mdx +++ b/docs/index.mdx @@ -14,8 +14,9 @@ Read the [Start Here guide](getting-started/index.mdx) or learn fundamental conc - [Getting Started with Config Manager](install/index.mdx) covers the standard connected install path. - [Which Interface Should I Use?](getting-started/interfaces.mdx) explains the Config Manager UI, Nautobot UI, Temporal Web, and APIs. - [Airgapped Deployment](install/install-airgapped.mdx) covers disconnected environments. +- [AIR Simulation User Guide](user-guides/air-simulation/index.mdx) covers workflow validation in NVIDIA DSX Air. - [I want to...](i-want-to.mdx) routes common tasks to the right guide. -The local development quick start is only for render and UI exploration. It is not production data and is not a supported way to validate device-facing workflows. +The local development quick start is only for render and UI exploration. Use the AIR simulation guide when you need simulated Cumulus switches for device-facing workflow validation. diff --git a/docs/install/index.mdx b/docs/install/index.mdx index 9632bbb..406a874 100644 --- a/docs/install/index.mdx +++ b/docs/install/index.mdx @@ -22,7 +22,7 @@ Before you start, install the following tools and confirm you have access to the | :-------- | :------ | | `kubectl` | Connects to the Kubernetes cluster where Config Manager will be installed | | Helm 3.x | Installs and upgrades the Config Manager chart and optional operators | -| Python 3.11+ and `uv` | Runs or installs the `nv-config-manager-installer` CLI | +| Python 3.11+ and `uv` | Runs or installs the `nvcm-installer` CLI | | Docker | Builds local images when using the local image workflow | | Kind | Loads locally built images into a Kind cluster when using a local or remote-VM Kubernetes test environment | | Vault or OpenBao access | Required only when using External Secrets Operator instead of Kubernetes secrets | @@ -78,43 +78,45 @@ cd installer uv sync # Launch the interactive wizard. -uv run nv-config-manager-installer init +uv run nvcm-installer init # Re-open an existing configuration. -uv run nv-config-manager-installer init nv-config-manager-install.yaml +uv run nvcm-installer init nv-config-manager-install.yaml # Validate the configuration without deploying. -uv run nv-config-manager-installer validate nv-config-manager-install.yaml +uv run nvcm-installer validate nv-config-manager-install.yaml # Generate Helm values without deploying. -uv run nv-config-manager-installer generate-values nv-config-manager-install.yaml -o ./generated +uv run nvcm-installer generate-values nv-config-manager-install.yaml -o ./generated ``` The wizard writes `nv-config-manager-install.yaml` with owner-only permissions (`0600`). Treat the file as sensitive because it can contain secrets. ## Choose a CLI Workflow +`nvcm-installer` is the short command name for the installer. The longer `nv-config-manager-installer` command remains available for compatibility and accepts the same subcommands and flags. + For one-time use from the platform repository, run the CLI with `uv`. ```bash cd installer uv sync -uv run nv-config-manager-installer --help +uv run nvcm-installer --help ``` -To make `nv-config-manager-installer` available as a standalone shell command from the installer package, run: +To make `nvcm-installer` available as a standalone shell command from the installer package, run: ```bash cd installer uv tool install . -nv-config-manager-installer --help +nvcm-installer --help ``` ## CLI Command Reference ### init -Run `nv-config-manager-installer init` to launch the interactive TUI wizard. +Run `nvcm-installer init` to launch the interactive TUI wizard. | Flag | Default | Description | | :--- | :------ | :---------- | @@ -124,10 +126,10 @@ The wizard walks through every configuration section, saves the configuration to ### validate -Run `nv-config-manager-installer validate` to validate a configuration file without deploying. +Run `nvcm-installer validate` to validate a configuration file without deploying. ```bash -nv-config-manager-installer validate nv-config-manager-install.yaml +nvcm-installer validate nv-config-manager-install.yaml ``` Validation checks include: @@ -140,10 +142,10 @@ Validation checks include: ### generate-values -Using ArgoCD? Generate a values file to use instead of managing the full deployment lifecycle with the TUI. Run `nv-config-manager-installer generate-values` to generate deployment artifacts without running a deployment. +Using ArgoCD? Generate a values file to use instead of managing the full deployment lifecycle with the TUI. Run `nvcm-installer generate-values` to generate deployment artifacts without running a deployment. ```bash -nv-config-manager-installer generate-values nv-config-manager-install.yaml --output-dir ./generated +nvcm-installer generate-values nv-config-manager-install.yaml --output-dir ./generated ``` | Flag | Default | Description | @@ -156,12 +158,12 @@ The command writes a `values-generated.yaml` file to the output directory, combi ### deploy -Run `nv-config-manager-installer deploy` to run a headless, non-interactive deployment from an existing configuration file. +Run `nvcm-installer deploy` to run a headless, non-interactive deployment from an existing configuration file. Use this command for repeatable deploys after you have created and reviewed `nv-config-manager-install.yaml`. ```bash -nv-config-manager-installer deploy nv-config-manager-install.yaml \ +nvcm-installer deploy nv-config-manager-install.yaml \ --image-source local \ --build-images \ --load-kind \ @@ -200,7 +202,7 @@ Common `nv-config-manager-install.yaml` examples are documented in [Configuratio ## Deployment Steps -When you deploy from the TUI or with `nv-config-manager-installer deploy`, the installer runs the following steps. Steps are skipped when they do not apply to the selected configuration. +When you deploy from the TUI or with `nvcm-installer deploy`, the installer runs the following steps. Steps are skipped when they do not apply to the selected configuration. | # | Step | Description | | :- | :--- | :---------- | @@ -321,7 +323,7 @@ kubectl logs -n -l app.kubernetes.io/component=network-dhcp -c confi ## Iterative Deployment and Post-Deploy Changes -You do not need to redeploy from scratch when you change data, add images, update templates, or upgrade versions. Treat `nv-config-manager-install.yaml` as the source of truth: re-open it with `nv-config-manager-installer init nv-config-manager-install.yaml`, adjust the configuration, and deploy again. The deployer applies updates through Helm and only restarts services when relevant inputs have changed. +You do not need to redeploy from scratch when you change data, add images, update templates, or upgrade versions. Treat `nv-config-manager-install.yaml` as the source of truth: re-open it with `nvcm-installer init nv-config-manager-install.yaml`, adjust the configuration, and deploy again. The deployer applies updates through Helm and only restarts services when relevant inputs have changed. ### Update Nautobot or Site Data diff --git a/docs/install/install-airgapped.mdx b/docs/install/install-airgapped.mdx index ef52f01..ae9207f 100644 --- a/docs/install/install-airgapped.mdx +++ b/docs/install/install-airgapped.mdx @@ -157,17 +157,18 @@ Install the bundled installer package on the target host and verify the TUI is a ```bash ./installer/install.sh -./installer/nv-config-manager-installer --help +./installer/nvcm-installer --help ``` The bootstrap script creates a local virtual environment from bundled wheels and does not require internet access on the target host. +It also exposes the longer `./installer/nv-config-manager-installer` command for compatibility. ## Create a Configuration Launch the TUI and save a repeatable installer configuration. ```bash -./installer/nv-config-manager-installer init install.yaml +./installer/nvcm-installer init install.yaml ``` At minimum, confirm the following sections before deploying: @@ -205,7 +206,7 @@ The requirements for the `large` profile are cumulative across all nodes. For ex Deploy from the extracted bundle after `install.yaml` is complete and images are either reachable from the configured registry or preloaded on the cluster nodes. ```bash -./installer/nv-config-manager-installer deploy install.yaml \ +./installer/nvcm-installer deploy install.yaml \ --chart-dir helm \ --image-source registry \ --install-envoy-gateway \ @@ -292,10 +293,10 @@ kubectl logs -n nv-config-manager -l app=nv-config-manager-platform-dhcp-refresh ## Iterative Deployment and Post-Deploy Changes -You do not need to redeploy from scratch when you change data, add images, update templates, or upgrade versions. Treat `install.yaml` as the source of truth: re-open it with `./installer/nv-config-manager-installer init install.yaml`, adjust the configuration, and deploy again. +You do not need to redeploy from scratch when you change data, add images, update templates, or upgrade versions. Treat `install.yaml` as the source of truth: re-open it with `./installer/nvcm-installer init install.yaml`, adjust the configuration, and deploy again. ```bash -./installer/nv-config-manager-installer deploy install.yaml \ +./installer/nvcm-installer deploy install.yaml \ --chart-dir helm \ --image-source registry \ --helm-timeout 30m @@ -411,14 +412,14 @@ Verify the offline installer can bootstrap from the bundled wheels: ```bash ./installer/install.sh -./installer/nv-config-manager-installer --help +./installer/nvcm-installer --help ``` After you create an `install.yaml`, validate the installer config and generated Helm values against the bundled chart: ```bash -./installer/nv-config-manager-installer validate install.yaml -./installer/nv-config-manager-installer generate-values install.yaml \ +./installer/nvcm-installer validate install.yaml +./installer/nvcm-installer generate-values install.yaml \ --chart-dir helm \ --output-dir /tmp/nvcm-airgap-values ``` diff --git a/docs/network-ztp/upload-images.mdx b/docs/network-ztp/upload-images.mdx index 9763294..6c17c59 100644 --- a/docs/network-ztp/upload-images.mdx +++ b/docs/network-ztp/upload-images.mdx @@ -125,8 +125,8 @@ infrastructure: Re-run: ```bash -uv run nv-config-manager-installer init nv-config-manager-install.yaml -uv run nv-config-manager-installer deploy nv-config-manager-install.yaml +uv run nvcm-installer init nv-config-manager-install.yaml +uv run nvcm-installer deploy nv-config-manager-install.yaml ``` The installer computes the SHA256, writes the image into `{platform}/{version}/{filename}` on the PVC, and updates `manifest.json` — tagging the entry as the firmware image. See [Iterative Deployment → Add or Update OS Images](../install/index.mdx#add-or-update-os-images). diff --git a/docs/user-guides/air-simulation/index.mdx b/docs/user-guides/air-simulation/index.mdx new file mode 100644 index 0000000..bffa11f --- /dev/null +++ b/docs/user-guides/air-simulation/index.mdx @@ -0,0 +1,283 @@ +--- +title: AIR Simulation User Guide +sidebar-title: AIR Simulation +position: 5 +--- + +The Config Manager AIR simulation guide brings up a live NVIDIA DSX Air simulation, installs Config Manager inside the simulation, loads a demo Nautobot topology, and provisions virtual Cumulus Linux switches through DHCP and ZTP. Use it when you want to validate real device-facing workflows without touching a physical lab. + +This is different from the [Local Development Quick Start](../../getting-started/local-development-quick-start.mdx). The local quick start is render-only. The AIR simulation runs Cumulus Linux switch nodes and exercises Config Manager against those nodes over the simulated management network. + + +Most Config Manager workflows that operate on the demo Ethernet Cumulus switches can be exercised in AIR. InfiniBand and NVLink workflows are not included because AIR does not currently simulate those fabrics for this demo. Switch OS Upgrade is also excluded because the ONIE install path does not work in AIR. + + +## Prerequisites + +Before running the Config Manager AIR simulation, review the public NVIDIA DSX Air documentation: + +- [NVIDIA DSX Air Account Setup](https://docs.nvidia.com/networking-ethernet-software/nvidia-air-v2/Account-Setup/) - NGC account, organization selection, free trial, and Air roles. +- [NVIDIA DSX Air Quick Start](https://docs.nvidia.com/networking-ethernet-software/guides/nvidia-air/Quick-Start/) - simulation navigation, services, node consoles, SSH keys, and API authentication. + +You also need: + +- Python 3.11+ and `uv`. +- An NGC API key for AIR API authentication. You can paste it into the TUI or set `NGC_API_KEY` in your shell. +- Enough DSX Air capacity for the selected demo. The AIR free trial preset is resource-capped for public trial accounts. +- A workstation browser that can run through a SOCKS proxy. Chromium-family browsers are easiest because the TUI prints ready-to-run commands. + +## Start the TUI + +From the Config Manager repository: + +```bash +cd installer +uv sync +uv run nvcm-installer air-sim init +``` + +The TUI writes `~/.nvcm-air-sim.yaml` by default so API keys are not saved inside the Git checkout. Treat the file as sensitive if it contains your NGC API key. To use a different location outside the repository, pass `--config /path/to/config.yaml` or set `NVCM_AIR_CONFIG`. + +You can also run a saved simulation config headlessly: + +```bash +cd installer +uv run nvcm-installer air-sim deploy --config ~/.nvcm-air-sim.yaml +``` + +## TUI Walkthrough + +The AIR simulation TUI has three sections: Topology, Options, and Launch. + +### Topology + +Use the **Pre-built Config** selector for the built-in demos. For most users, start with **AIR free trial demo**. It creates one OOB management leaf and five `TAN-HLEAF` Cumulus switches, which is enough to exercise ZTP, backup, deploy, multi-deploy, reprovision, and validation workflows. + +The preset enables **Mock Topology**, points the loader at `development/mock_topology`, and configures the template plugin paired with the demo topology. Keep the paired template plugin in place; the prebuilt demos require those templates to render valid device configuration. + +![AIR sim topology screen](../../assets/images/air-sim/01-topology.svg) + +Important fields: + +| Field | Description | +| :---- | :---------- | +| **Pre-built Config** | Loads the AIR trial or SuperPOD demo values into the TUI. | +| **Mock Topology** | Builds AIR topology and Nautobot data from `development/mock_topology` instead of a hand-authored AIR topology YAML. | +| **Blueprint** | Mock topology blueprint, such as `air_trial`. | +| **Deployment Name** | Suffix used by the mock topology job. For the AIR trial preset, this is `demo`. | +| **Template Plugins** | Template plugin directories or archives paired with the selected topology. | +| **Simulation Name** | Name shown in DSX Air. Leave blank to auto-generate. | +| **OOB Management Server Name** | Existing AIR server node that receives Config Manager, usually `oob-mgmt-server`. | + +### Options + +Use **Options** to set AIR authentication, deployment behavior, and advanced timing values. + +![AIR sim options screen](../../assets/images/air-sim/02-options.svg) + +Recommended public AIR values: + +| Field | Recommended value | +| :---- | :---------------- | +| **NGC API Key** | Your AIR-capable NGC API key, or leave blank if `NGC_API_KEY` is set. | +| **Use Public Air** | On. | +| **Auto-configure server on boot** | On. The TUI attaches cloud-init so the server prepares itself after boot. | +| **Git Token** | Blank for the public `NVIDIA/nv-config-manager` repository. | +| **nv-config-manager Git Ref** | `main`, unless you are testing a branch that contains unreleased AIR sim code. | +| **Deployment Size** | `small` for the public AIR trial demo. | +| **Run nv-config-manager-installer deploy after setup** | On. | + +### Launch + +The **Launch** screen creates the simulation, starts it, waits for SSH, deploys Config Manager, runs post-deploy Nautobot jobs, monitors pods, and follows DHCP/ZTP logs. + +![AIR sim launch screen](../../assets/images/air-sim/03-launch-ready.svg) + +Click **Launch Simulation** and leave the TUI open. The launch can take a while because it boots the AIR nodes, builds and loads local images, installs Config Manager, and lets the switches provision. + +During launch: + +- **Steps** shows high-level orchestration progress. +- **Pod Status** polls Kubernetes pods inside the AIR server over SSH. +- **Deploy Log** streams installer output. +- **DHCP** and **ZTP** tabs appear when service monitoring starts. +- **SSH** appears once the AIR worker exposes the management server. + +![AIR sim pod status](../../assets/images/air-sim/05-launch-pod-status.svg) + +When bringup completes, the **Access** tab shows commands for reaching the simulation. + +![AIR sim access screen](../../assets/images/air-sim/08-launch-access.svg) + +## Access the Simulation + +Config Manager runs inside the `oob-mgmt-server` node in AIR. The TUI creates an AIR SSH service to that server and prints copyable commands. + +For Linux and macOS: + +1. Copy the **Linux / macOS - SOCKS tunnel** command from the Access tab and run it in a terminal. +1. Copy the **Linux / macOS - browser** command, or manually launch a browser with `socks5://localhost:8080`. +1. Open the Config Manager and Nautobot URLs through that browser session. + +Common service URLs inside the SOCKS-proxied browser are: + +| Service | URL | +| :------ | :-- | +| Config Manager UI | `https://nvcm.air` | +| Nautobot | `https://nautobot.nvcm.air` | +| Workflow API | `https://workflow.nvcm.air` | +| Config Store API | `https://config-store.nvcm.air` | +| Render API | `https://render.nvcm.air` | +| DHCP API | `https://dhcp.nvcm.air` | +| ZTP API | `https://ztp.nvcm.air` | + +The demo disables SSO. Nautobot credentials are `demo` / `demo`. Config Manager workflow RBAC is opened for the demo so the workflow forms can be exercised without identity-provider setup. + + +DNS resolution for `*.nvcm.air` must go through the SOCKS tunnel. If a browser resolves names locally before using the proxy, the URLs will fail even though the services are healthy. + + +You can also SSH directly to the AIR server with the SSH command shown at the top of the Launch screen. This is useful for `kubectl` checks, pod logs, and low-level troubleshooting. + +## What to Check After Bringup + +After launch completes: + +1. Open Nautobot and confirm the demo site, devices, interfaces, IP addresses, cables, roles, platforms, and config contexts exist. +1. Open Config Manager and confirm workflow forms are available. +1. Open Config Store and confirm intended configs exist for `tan-leaf-01` through `tan-leaf-05`. +1. In Nautobot, confirm the TAN leaf devices have status **Provisioned** after ZTP completes. +1. In the TUI, use the **DHCP** and **ZTP** tabs to inspect service activity if a device is still provisioning. + +## Workflow Support + +The AIR demo is intended for Ethernet and Cumulus Linux workflows. Useful workflows to try include: + +- Configuration Backup +- Configuration Deploy +- Batch Deploy +- Multi-Device Deploy +- Tenant Deploy, if you scope it to demo devices with tenant data +- Cable Validation +- Hardware Validation +- Device Password Rotation +- Site Password Rotation +- Device Reprovision +- Workflow lifecycle actions such as approve, reject, retry, and terminate + +Do not use the AIR demo for: + +| Workflow family | Why it is excluded | +| :-------------- | :----------------- | +| InfiniBand workflows | AIR does not simulate the InfiniBand fabric for this demo. | +| NVLink switch workflows | AIR does not simulate NVLink switches for this demo. | +| Switch OS Upgrade | AIR cannot complete the ONIE install path used by the OS upgrade workflow. | + +## Try Multi-Deploy + +Multi-Deploy is a good first Day-2 workflow because the AIR trial demo has five `TAN-HLEAF` switches that should receive the same diff. + +The example below adds a DNS server to the site config context. The render service picks up the Nautobot change, produces new intended configs for the TAN leaf devices, and Multi-Deploy groups the identical diffs for approval. + +### Edit the DNS config context + +1. Open `https://nautobot.nvcm.air`. +1. Log in as `demo` / `demo`. +1. Navigate to **Extensibility > Config Contexts**. +1. Open `air-trial-network-management-services - demo`. +1. Edit the **Data** field. +1. Add a DNS block. If the context already has a `dns` block, append one server to its `ipv4` list. + +Example data with a demo DNS server: + +```json +{ + "ztp": { + "ipv4": [ + "172.18.255.201" + ] + }, + "dhcp": { + "nvcm": { + "ipv4": [ + "172.18.255.202" + ] + } + }, + "management_prefixes": { + "ipv4": [ + "172.18.0.0/16" + ] + }, + "provisioning_servers": { + "ipv4": [ + "10.120.0.0" + ] + }, + "dns": { + "ipv4": [ + "192.0.2.53" + ] + } +} +``` + +1. Save the config context. +1. Wait for render to regenerate intended configs. If no intended config changes appear after a short wait, make a no-op edit to the context or call `POST /v1/render/all` from the Render API at `https://render.nvcm.air/docs`. + +### Run Multi-Deploy + +1. Open `https://nvcm.air`. +1. Click the **+** button and select **MultiDeployWorkflow**. +1. Set **Role** to `TAN-HLEAF`. +1. Set **Max Batch Size** to `5` so all demo TAN leaf devices can be approved together when they share one diff. +1. Submit the workflow. +1. When the parent reaches the batch execution stage, open the child Batch Deploy link. +1. Review the shared diff. It should show the DNS server addition on each `tan-leaf-*` device. +1. Approve the batch. +1. Confirm the parent workflow completes and the child batch reports successful applies and backups. + +For the full workflow behavior, see [Multi-Device Deploy](../multi-deploy/index.mdx) and [Batch Deploy](../batch-deploy/index.mdx). + +## Try Reprovision + +Reprovision shows the re-ZTP path for an already-running switch. It factory-resets the target Cumulus switch, waits for DHCP/ZTP to run again, and then triggers a fresh backup. + + +Reprovision intentionally takes the selected switch offline and wipes its running configuration. In the AIR demo this is safe, but the same workflow is disruptive in a real environment. + + +1. Open `https://nvcm.air`. +1. Click the **+** button and select **ReprovisionWorkflow**. +1. Select the AIR trial site, such as `TRIAL01 - demo`. +1. Select a TAN leaf device, such as `tan-leaf-01`. +1. Submit the workflow. +1. In the DSX Air UI, open the simulation topology. +1. Double-click the same switch node to open its console. +1. On the switch console, tail the Cumulus autoprovision log: + + ```bash + sudo tail -f /var/log/autoprovision + ``` + + This lets you watch the switch-side ZTP process while Config Manager waits for the device to report provisioned. + +1. In the Config Manager UI, watch the Reprovision workflow stages. The workflow completes after ZTP succeeds and the backup child workflow finishes. +1. In Nautobot, confirm the device status returns to **Provisioned**. + +You can also watch the TUI **ZTP** tab or the ZTP service logs if the switch stalls. For the full production workflow details, see [Device Reprovision](../reprovision/index.mdx). + +## Other Useful Exercises + +After Multi-Deploy and Reprovision, try these workflows: + +- **Configuration Backup** against one TAN leaf. Confirm the backup appears in Config Store. +- **Configuration Deploy** against one TAN leaf after a small Nautobot config context change. Use this before Multi-Deploy if you want to inspect the single-device approval path. +- **Cable Validation** for the AIR trial site. This checks LLDP-reported neighbors against the mock topology's Nautobot cabling. + + +## Cleanup + +AIR simulations consume account resources while running. When you are done, stop or delete the simulation in the DSX Air UI. If you expect to resume later, use AIR checkpoints according to your account limits. If you only needed a short demo, delete the simulation to free capacity. + +The TUI-created Config Manager deployment lives inside the AIR simulation. Deleting the simulation removes the demo Kubernetes cluster, Nautobot data, Config Store data, and generated switch state. diff --git a/installer/README.md b/installer/README.md index 7146dd0..66ec754 100644 --- a/installer/README.md +++ b/installer/README.md @@ -35,16 +35,16 @@ cd installer uv sync # Launch the interactive wizard -uv run nv-config-manager-installer init +uv run nvcm-installer init # Re-open an existing config -uv run nv-config-manager-installer init --config nv-config-manager-install.yaml +uv run nvcm-installer init --config nv-config-manager-install.yaml # Validate a config file -uv run nv-config-manager-installer validate nv-config-manager-install.yaml +uv run nvcm-installer validate nv-config-manager-install.yaml # Generate Helm values without deploying -uv run nv-config-manager-installer generate-values nv-config-manager-install.yaml -o ./generated +uv run nvcm-installer generate-values nv-config-manager-install.yaml -o ./generated ``` --- @@ -56,7 +56,7 @@ uv run nv-config-manager-installer generate-values nv-config-manager-install.yam ```bash cd installer uv sync -uv run nv-config-manager-installer --help +uv run nvcm-installer --help ``` ### As a standalone tool @@ -64,14 +64,17 @@ uv run nv-config-manager-installer --help ```bash cd installer uv tool install . -nv-config-manager-installer --help +nvcm-installer --help ``` +`nvcm-installer` is the short command name. The longer `nv-config-manager-installer` +command remains available for compatibility and accepts the same subcommands and flags. + --- ## CLI Commands -### `nv-config-manager-installer init` +### `nvcm-installer init` Launch the interactive TUI wizard. @@ -82,12 +85,12 @@ Launch the interactive TUI wizard. The wizard walks through all configuration sections, saves to `nv-config-manager-install.yaml` (with `0600` permissions), and can deploy directly from within the TUI. -### `nv-config-manager-installer validate` +### `nvcm-installer validate` Validate a configuration file without deploying. ```bash -nv-config-manager-installer validate nv-config-manager-install.yaml +nvcm-installer validate nv-config-manager-install.yaml ``` Checks include: @@ -97,12 +100,12 @@ Checks include: - `sso.issuer_url` required when SSO is enabled - Custom jobs require local Nautobot (`services.nautobot: true`) -### `nv-config-manager-installer generate-values` +### `nvcm-installer generate-values` Generate deployment artifacts without running a deployment. ```bash -nv-config-manager-installer generate-values nv-config-manager-install.yaml --output-dir ./generated +nvcm-installer generate-values nv-config-manager-install.yaml --output-dir ./generated ``` | Flag | Default | Description | @@ -115,12 +118,12 @@ Produces: - `values-generated.yaml` — Combined override file with TUI-generated values and the selected `cluster.size` profile - `config-secrets.ini` — Site-specific credential INI files -### `nv-config-manager-installer deploy` +### `nvcm-installer deploy` Run a headless (non-interactive) deployment. ```bash -nv-config-manager-installer deploy nv-config-manager-install.yaml \ +nvcm-installer deploy nv-config-manager-install.yaml \ --build-images \ --load-kind \ --install-envoy-gateway \ @@ -729,7 +732,7 @@ sites: ## Deployment Steps -When deploying (either via TUI or `nv-config-manager-installer deploy`), the following steps +When deploying (either via TUI or `nvcm-installer deploy`), the following steps execute in order. Steps are automatically skipped when not applicable. | # | Step | Description | diff --git a/installer/pyproject.toml b/installer/pyproject.toml index 539f28a..26989d2 100644 --- a/installer/pyproject.toml +++ b/installer/pyproject.toml @@ -7,6 +7,7 @@ readme = "README.md" license = { text = "Apache-2.0" } requires-python = ">=3.11" dependencies = [ + "nv-air-sdk==1.4.0", "click>=8.1.7", "kubernetes>=32.0.0", "pydantic>=2.10.2", @@ -18,6 +19,7 @@ dependencies = [ [project.scripts] nv-config-manager-installer = "nv_config_manager_installer.cli:main" +nvcm-installer = "nv_config_manager_installer.cli:main" [build-system] requires = ["hatchling"] diff --git a/installer/scripts/screenshot_air_sim_tui.py b/installer/scripts/screenshot_air_sim_tui.py new file mode 100644 index 0000000..a5d9283 --- /dev/null +++ b/installer/scripts/screenshot_air_sim_tui.py @@ -0,0 +1,671 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate SVG screenshots of the AIR simulation TUI for documentation. + +Usage: + uv run python scripts/screenshot_air_sim_tui.py + uv run python scripts/screenshot_air_sim_tui.py --output-dir ../docs/assets/images/air-sim + +The launch screenshots use deterministic mock data. They do not contact AIR, +Kubernetes, or the SSH target. +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import re +import sys +import time +from collections.abc import Callable +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from textual.widgets import Button, Static + +from nv_config_manager_installer.air_sim.constants import ( + NVCM_BOX_PASSWORD, + NVCM_BOX_USER, +) +from nv_config_manager_installer.air_sim.orchestrator import STEPS, StepStatus +from nv_config_manager_installer.air_sim.prebuilt_configs import load_prebuilt_config +from nv_config_manager_installer.air_sim.proxy import ProxyInfo +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.air_sim.app import SECTION_LABELS, NVCMAirSimApp +from nv_config_manager_installer.tui.air_sim.screens.launch import ( + LaunchScreen, + _clean_dhcp_line, + _clean_ztp_line, + _FollowLog, + _LogViewerWidget, + _PodStatusWidget, + _ProxyAccessWidget, + _StepListWidget, +) + +COLS = 180 +ROWS = 70 +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_OUT_DIR = REPO_ROOT / "docs" / "assets" / "images" / "air-sim" + +MOCK_HOST = "eb515e50.workers.ngc.air.nvidia.com" +MOCK_PORT = 17117 +TRUFFLEHOG_IGNORE_COMMENT = "" + + +def _slug(label: str) -> str: + return re.sub(r"[^a-z0-9]+", "-", label.lower()).strip("-") + + +def _save(output_dir: Path, name: str, svg: str) -> None: + (output_dir / name).write_text(_allowlist_demo_password(svg)) + print(f" {name}") + + +def _allowlist_demo_password(svg: str) -> str: + """Mark generated SVG lines containing the public demo password for scanners.""" + lines: list[str] = [] + for line in svg.splitlines(keepends=True): + if NVCM_BOX_PASSWORD not in line or "trufflehog:ignore" in line: + lines.append(line) + continue + + ending = "" + body = line + if line.endswith("\r\n"): + ending = "\r\n" + body = line[:-2] + elif line.endswith("\n"): + ending = "\n" + body = line[:-1] + lines.append(f"{body}{TRUFFLEHOG_IGNORE_COMMENT}{ending}") + return "".join(lines) + + +def _shot(app: NVCMAirSimApp, title: str) -> str: + no_color = os.environ.pop("NO_COLOR", None) + try: + return app.export_screenshot(title=f"NVCM AIR Sim Wizard - {title}") + finally: + if no_color is not None: + os.environ["NO_COLOR"] = no_color + + +async def _stabilize(pilot: object, pauses: int = 2, delay: float = 0.1) -> None: + """Pause multiple times so async mounts and screen updates can settle.""" + for _ in range(pauses): + await pilot.pause(delay) # type: ignore[attr-defined] + + +def _example_config() -> SimConfig: + """Return the public AIR trial demo config with screenshot-only auth filled in.""" + cfg = load_prebuilt_config("air-trial") + cfg.ngc_api_key = "nvapi-demo-key-for-screenshots" + return cfg + + +def _ssh_cmd(host: str = MOCK_HOST, port: int = MOCK_PORT) -> str: + return ( + f"sshpass -p {NVCM_BOX_PASSWORD} ssh" + " -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + " -o PreferredAuthentications=password" + f" -p {port} {NVCM_BOX_USER}@{host}" + ) + + +def _launch_screen(app: NVCMAirSimApp) -> LaunchScreen: + return app.query_one("#screen-launch", LaunchScreen) + + +def _set_step_states( + launch: LaunchScreen, + *, + running_step: str | None = None, + failed_step: str | None = None, +) -> None: + step_list = launch.query_one("#step-list", _StepListWidget) + step_list._start_times.clear() + step_list._durations.clear() + step_list._running_step = None + now = time.monotonic() + for idx, (step_id, _label) in enumerate(STEPS): + if failed_step and step_id == failed_step: + step_list.update_step(step_id, StepStatus.FAILED) + step_list._durations[step_id] = 42.0 + continue + if running_step and step_id == running_step: + step_list.update_step(step_id, StepStatus.RUNNING) + step_list._start_times[step_id] = now - 392 + continue + if failed_step: + failed_idx = next(i for i, (sid, _lbl) in enumerate(STEPS) if sid == failed_step) + status = StepStatus.SUCCESS if idx < failed_idx else StepStatus.PENDING + elif running_step: + running_idx = next(i for i, (sid, _lbl) in enumerate(STEPS) if sid == running_step) + status = StepStatus.SUCCESS if idx < running_idx else StepStatus.PENDING + else: + status = StepStatus.SUCCESS + step_list.update_step(step_id, status) + if status == StepStatus.SUCCESS: + step_list._durations[step_id] = _MOCK_STEP_DURATIONS.get(step_id, 8.0) + step_list._refresh(step_id) + + +_MOCK_STEP_DURATIONS: dict[str, float] = { + "parse-topology": 4.0, + "create-sim": 13.0, + "attach-cloud-init": 7.0, + "start-sim": 286.0, + "create-ssh": 18.0, + "wait-setup": 241.0, + "upload-files": 6.0, + "run-deploy": 1188.0, + "post-deploy": 51.0, +} + + +_MOCK_PODS: list[dict[str, str]] = [ + { + "name": "cluster-dhcp-1", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "53m", + }, + { + "name": "cluster-nautobot-1", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "53m", + }, + { + "name": "nv-config-manager-dhcp-c49966454-wvlts", + "ready": "4/4", + "status": "Running", + "restarts": "0", + "age": "54m", + }, + { + "name": "nv-config-manager-dhcp-refresh-5cc75b56fd-cbl2v", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "40m", + }, + { + "name": "nv-config-manager-nautobot-7c6c5b566-2kqq2", + "ready": "2/2", + "status": "Running", + "restarts": "0", + "age": "54m", + }, + { + "name": "nv-config-manager-nautobot-celery-559d9986b8-fvkzn", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "54m", + }, + { + "name": "nv-config-manager-nautobot-celery-beat-5d68f4c455-bznbk", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "54m", + }, + { + "name": "nv-config-manager-render-api-5858dcb947-n257z", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m43s", + }, + { + "name": "nv-config-manager-render-consumer-device-58599695-2q862", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m43s", + }, + { + "name": "nv-config-manager-render-consumer-device-58599695-4dn2k", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m43s", + }, + { + "name": "nv-config-manager-render-consumer-device-58599695-f5l4d", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m34s", + }, + { + "name": "nv-config-manager-render-consumer-device-58599695-j26nw", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m34s", + }, + { + "name": "nv-config-manager-render-consumer-nautobot-59c6d5bf8b-gcdlv", + "ready": "1/1", + "status": "Running", + "restarts": "0", + "age": "9m43s", + }, + { + "name": "nv-config-manager-ztp-69cbf8cd46-tm52t", + "ready": "3/3", + "status": "Running", + "restarts": "0", + "age": "54m", + }, +] + + +_DEPLOY_LOG_LINES = [ + "23:52:44 Prefixes tagged 'lb-allowed': 172.18.0.0/16, 10.0.0.0/8", + "23:52:44 Prefixes tagged 'relay-return': 10.120.0.0/16", + "23:52:44 Found 6 Cumulus Linux devices", + "23:52:44 Found 1 server nodes", + "23:52:44 Found 1 exit interface(s) for SSH access", + ( + "23:52:44 Overriding existing server 'oob-mgmt-server' with nvcm-box image " + "(generic/ubuntu2404, 16 CPU, 32768MB RAM, 100GB storage)" + ), + "23:52:44 Built topology with 7 nodes and 8 links", + "23:52:45 Created simulation: 9e1f8be2-43a0-4797-9e14-91e5b170b656", + (f"23:53:31 Created SSH service for oob-mgmt-server:eth0 -> {MOCK_HOST}:17117"), + f"SSH ready: nvcm@{MOCK_HOST}:17117", + ( + "23:56:03 Uploading /tmp/nv-config-manager-install-sujopgqf.yaml -> " + f"{MOCK_HOST}:/home/nvcm/nv-config-manager-install.yaml ..." + ), + "23:56:03 Upload complete: /home/nvcm/nv-config-manager-install.yaml", + "Uploaded nv-config-manager-install.yaml", + "Running deploy command:", + ( + " sudo NO_COLOR=1 KUBECONFIG=/home/nvcm/.kube/config uv run --directory " + "/home/nvcm/nv-config-manager --project /home/nvcm/nv-config-manager/installer " + "nv-config-manager-installer deploy /home/nvcm/nv-config-manager-install.yaml " + "--chart-dir /home/nvcm/nv-config-manager/deploy/helm --kind-cluster nvcm " + "--install-envoy-gateway --install-cert-manager --install-cnpg-operator " + "--image-source local --build-images --load-kind" + ), + "23:56:03 Running installer (this may take 15-30 min)...", + "23:56:07 [oob-mgmt-server] [>] Check prerequisites", + "23:56:07 [oob-mgmt-server] [*] Check prerequisites", + "23:56:08 [oob-mgmt-server] [>] Build local images", + "23:56:08 [oob-mgmt-server] Building nv-config-manager-nautobot:local...", + "00:01:45 [oob-mgmt-server] [*] Build local images", + "00:01:45 [oob-mgmt-server] [>] Load images to Kind", + "00:02:18 [oob-mgmt-server] [*] Load images to Kind", + "00:02:18 [oob-mgmt-server] [>] Install CRDs / operators", + "00:03:13 [oob-mgmt-server] [*] Install CRDs / operators", + "00:03:13 [oob-mgmt-server] [>] Create namespace", + "00:03:13 [oob-mgmt-server] Created: nv-config-manager (context=kind-nvcm)", + "00:03:13 [oob-mgmt-server] [*] Create namespace", + "00:03:24 [oob-mgmt-server] [>] Helm install / upgrade", + "00:13:27 [oob-mgmt-server] [*] Helm install / upgrade", + "00:13:27 [oob-mgmt-server] Network ZTP: https://ztp.nvcm.air", + "00:13:27 [oob-mgmt-server] Network DHCP: https://dhcp.nvcm.air", + "00:13:27 [oob-mgmt-server] Deployment completed successfully!", + ( + "00:16:56 Resetting 6 Cumulus node(s) to force fresh ZTP/DHCP cycle: " + "tan-leaf-05, tan-leaf-02, oob-mleaf-01, tan-leaf-01, tan-leaf-04, tan-leaf-03" + ), +] + + +_RAW_DHCP_LOG_LINES = [ + ( + "2026-05-30 00:57:54.424 INFO [kea-dhcp4.packets/14.139514333066944] " + "DHCP4_PACKET_RECEIVED [hwtype=1 44:38:39:00:00:08], cid=[no info], " + "tid=0xbec5f077: DHCPREQUEST (type 3) received from 172.18.0.1 to " + "10.244.0.20 on interface eth0" + ), + ( + "2026-05-30 00:57:54.424 INFO [kea-dhcp4.leases/14.139514333066944] " + "DHCP4_INIT_REBOOT [hwtype=1 44:38:39:00:00:08], cid=[no info], " + "tid=0xbec5f077: client is in INIT-REBOOT state and requests address 10.120.0.1" + ), + ( + "2026-05-30 00:57:54.425 INFO [kea-dhcp4.leases/14.139514333066944] " + "DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:08], cid=[no info], " + "tid=0xbec5f077: lease 10.120.0.1 has been allocated for 7200 seconds" + ), + ( + "2026-05-30 01:01:02.800 INFO [kea-dhcp4.leases/14.139514408601280] " + "DHCP4_LEASE_OFFER [hwtype=1 44:38:39:00:00:04], cid=[no info], " + "tid=0xaf0466f: lease 10.120.1.12 will be offered" + ), + ( + "2026-05-30 01:01:02.802 INFO [kea-dhcp4.leases/14.139514400208576] " + "DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:04], cid=[no info], " + "tid=0xaf0466f: lease 10.120.1.12 has been allocated for 7200 seconds" + ), + ( + "2026-05-30 01:01:04.907 INFO [kea-dhcp4.leases/14.139514307888832] " + "DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:07], cid=[no info], " + "tid=0x4a0ae633: lease 10.120.1.15 has been allocated for 7200 seconds" + ), + ( + "2026-05-30 01:01:21.715 INFO [kea-dhcp4.leases/14.139514333066944] " + "DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:03], cid=[no info], " + "tid=0x4bb0432d: lease 10.120.1.11 has been allocated for 7200 seconds" + ), + ( + "2026-05-30 01:01:23.362 INFO [kea-dhcp4.leases/14.139514383423168] " + "DHCP4_LEASE_ALLOC [hwtype=1 44:38:39:00:00:06], cid=[no info], " + "tid=0x1204853d: lease 10.120.1.14 has been allocated for 7200 seconds" + ), + ( + "2026-05-30 01:01:04.907 INFO [kea-dhcp4.packets/14.139514307888832] " + "DHCP4_PACKET_SEND [hwtype=1 44:38:39:00:00:07], cid=[no info], " + "tid=0x4a0ae633: trying to send packet DHCPACK (type 5) from " + "10.244.0.20:67 to 10.120.1.1:67 on interface eth0" + ), +] + + +_RAW_ZTP_LOG_LINES = [ + ( + r'{"message": "10.120.0.1:40294 - \"GET /v1/device/' + r'8f5a1532-e155-4119-937e-86e8aa8f4007/boot-script HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:00:35,976", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.0.1:40304 - \"POST /v1/device/' + r'8f5a1532-e155-4119-937e-86e8aa8f4007/validate_serial HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:00:39,764", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.0.1:40320 - \"GET /v1/device/' + r"8f5a1532-e155-4119-937e-86e8aa8f4007/config/startup.yaml HTTP/1.1\" " + r'200", "levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:00:40,162", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.0.1:53424 - \"POST /v1/device/' + r'8f5a1532-e155-4119-937e-86e8aa8f4007/provisioned HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:00:52,692", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.13:56094 - \"GET /v1/device/' + r'38065dde-1abe-41ef-865b-60fbb6405d06/boot-script HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:01:02,408", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.13:46012 - \"POST /v1/device/' + r'38065dde-1abe-41ef-865b-60fbb6405d06/validate_serial HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:01:06,051", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.13:46020 - \"GET /v1/device/' + r"38065dde-1abe-41ef-865b-60fbb6405d06/config/startup.yaml HTTP/1.1\" " + r'200", "levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:01:06,164", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.13:46026 - \"POST /v1/device/' + r'38065dde-1abe-41ef-865b-60fbb6405d06/provisioned HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:01:14,605", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.12:59136 - \"GET /v1/device/' + r'48331931-a577-4ad7-ac03-dc22461a9d0c/boot-script HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:02:01,311", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.15:35374 - \"GET /v1/device/' + r'9cae2d62-00a3-457e-b8c1-6bc9a18d8e0a/boot-script HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:02:01,608", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.11:46504 - \"POST /v1/device/' + r'857985c8-d3c8-41f2-90d8-f966e3306113/provisioned HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:02:14,424", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), + ( + r'{"message": "10.120.1.14:41842 - \"POST /v1/device/' + r'e4484bba-527d-4b60-8f2c-74bfdfd7516e/provisioned HTTP/1.1\" 200", ' + r'"levelname": "INFO", "name": "uvicorn.access", "asctime": ' + r'"2026-05-30 01:02:14,745", "module": "httptools_impl", "lineno": ' + r'484, "level": "info", "service": "ztp"}' + ), +] + +_DHCP_LOG_LINES = [_clean_dhcp_line(line) for line in _RAW_DHCP_LOG_LINES] +_ZTP_LOG_LINES = [_clean_ztp_line(line) for line in _RAW_ZTP_LOG_LINES] + + +def _populate_logs(launch: LaunchScreen, *, active_tab: str = "deploy") -> None: + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + viewer._buffers.clear() + viewer.query_one("#log-output", _FollowLog).clear() + viewer.add_tab("dhcp", "DHCP") + viewer.add_tab("ztp", "ZTP") + for line in _DEPLOY_LOG_LINES: + viewer.append_line(line, "deploy") + for line in _DHCP_LOG_LINES: + viewer.append_line(line, "dhcp") + for line in _ZTP_LOG_LINES: + viewer.append_line(line, "ztp") + viewer._activate_tab(active_tab) + + +def _populate_ssh_and_pods( + launch: LaunchScreen, + *, + provisioned: str = "4/6", + pending: str = "Pending: tan-leaf-04, tan-leaf-05", +) -> None: + launch._host = MOCK_HOST + launch._port = MOCK_PORT + launch._show_ssh_command(_ssh_cmd()) + pod_panel = launch.query_one("#pod-status-panel", _PodStatusWidget) + pod_panel._update_table(_MOCK_PODS) + pod_panel.query_one("#prov-count", Static).update(f"Provisioned: {provisioned}") + pod_panel.query_one("#prov-detail", Static).update(pending) + + +def _populate_ready_launch(launch: LaunchScreen) -> None: + launch.query_one("#btn-launch", Button).disabled = False + launch.query_one("#launch-status", Static).update( + "[green]Ready to create AIR simulation from mock topology air_trial.[/green]" + ) + + +def _populate_running_launch(launch: LaunchScreen) -> None: + launch._bringup_running = True + launch.query_one("#btn-launch", Button).disabled = True + launch.query_one("#launch-status", Static).update( + "[yellow]Running... log -> /tmp/nvcm-deploy-20260530-000000.log[/yellow]" + ) + _set_step_states(launch, running_step="run-deploy") + _populate_ssh_and_pods(launch, provisioned="0/6", pending="Waiting for first ZTP callback") + _populate_logs(launch) + + +def _populate_pods_launch(launch: LaunchScreen) -> None: + launch.query_one("#launch-status", Static).update( + "[yellow]Deployment running - monitoring Kubernetes pods over SSH.[/yellow]" + ) + _set_step_states(launch, running_step="post-deploy") + _populate_ssh_and_pods(launch, provisioned="4/6", pending="Pending: tan-leaf-04, tan-leaf-05") + _populate_logs(launch) + + +def _populate_dhcp_log_launch(launch: LaunchScreen) -> None: + _populate_pods_launch(launch) + _populate_logs(launch, active_tab="dhcp") + + +def _populate_ztp_log_launch(launch: LaunchScreen) -> None: + launch.query_one("#launch-status", Static).update( + "[yellow]Deployment running - watching ZTP callbacks over SSH.[/yellow]" + ) + _set_step_states(launch, running_step="post-deploy") + _populate_ssh_and_pods(launch, provisioned="6/6", pending="All devices reported provisioned") + _populate_logs(launch, active_tab="ztp") + + +def _populate_complete_launch(launch: LaunchScreen) -> None: + launch._bringup_running = False + launch.query_one("#btn-launch", Button).disabled = False + launch.query_one("#launch-status", Static).update( + "[bold green][*] Bringup complete![/bold green]" + ) + _set_step_states(launch) + _populate_ssh_and_pods(launch, provisioned="6/6", pending="") + launch.query_one("#prov-detail", Static).update("") + _populate_logs(launch) + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + viewer.set_access_widget( + _ProxyAccessWidget(ProxyInfo(host=MOCK_HOST, port=MOCK_PORT), id="proxy-access") + ) + + +def _populate_failure_launch(launch: LaunchScreen) -> None: + launch._bringup_running = False + launch.query_one("#btn-launch", Button).disabled = False + launch.query_one("#launch-status", Static).update( + "[bold red][!] Bringup failed - check the deploy log above[/bold red]" + ) + _set_step_states(launch, failed_step="post-deploy") + _populate_ssh_and_pods( + launch, + provisioned="0/6", + pending="Post-deploy topology job failed", + ) + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + viewer._buffers.clear() + viewer.query_one("#log-output", _FollowLog).clear() + for line in [ + "00:13:27 [oob-mgmt-server] [>] Run post-deploy jobs", + "00:13:27 [oob-mgmt-server] Port-forward to Nautobot established", + "00:13:27 [oob-mgmt-server] Waiting for Nautobot API...", + ( + "00:13:27 [oob-mgmt-server] Job 1/1: " + "mock_topology.jobs.mock_topology_design.MockTopologyDesign" + ), + "00:13:27 [oob-mgmt-server] Found job ID: 9ceddbd3-d1a2-4e52-a977-24209d29fed6", + "00:13:27 [oob-mgmt-server] Enabling job...", + "00:13:27 [oob-mgmt-server] Starting job execution...", + "00:13:27 [oob-mgmt-server] Job started, result ID: c32499ba-5292-4ca1-ad39-5112b1b5ca9b", + "00:13:27 [oob-mgmt-server] [INFO] [initialization] Running job", + "00:13:27 [oob-mgmt-server] Job failed (status: failure)", + "00:13:27 [oob-mgmt-server] [!] Run post-deploy jobs", + ]: + viewer.append_line(line, "deploy") + viewer._activate_tab("deploy") + + +async def _capture_launch( + output_dir: Path, + name: str, + title: str, + populate: Callable[[LaunchScreen], None], +) -> None: + app = NVCMAirSimApp(config=_example_config()) + async with app.run_test(size=(COLS, ROWS)) as pilot: + app.switch_section("launch") + await _stabilize(pilot) + populate(_launch_screen(app)) + await _stabilize(pilot) + _save(output_dir, name, _shot(app, title)) + + +async def _capture_all(output_dir: Path) -> None: + output_dir.mkdir(parents=True, exist_ok=True) + for stale_svg in output_dir.glob("*.svg"): + stale_svg.unlink() + + cfg = _example_config() + app = NVCMAirSimApp(config=cfg) + + async with app.run_test(size=(COLS, ROWS)) as pilot: + for idx, (section_id, label) in enumerate(SECTION_LABELS, start=1): + app.switch_section(section_id) + await _stabilize(pilot) + if section_id == "launch": + _populate_ready_launch(_launch_screen(app)) + await _stabilize(pilot) + filename = f"{idx:02d}-{_slug(label)}-ready.svg" + else: + filename = f"{idx:02d}-{_slug(label)}.svg" + _save(output_dir, filename, _shot(app, label)) + + launch_shots: list[tuple[str, str, Callable[[LaunchScreen], None]]] = [ + ("launch-running", "Launch / Running", _populate_running_launch), + ("launch-pod-status", "Launch / Pod Status", _populate_pods_launch), + ("launch-dhcp-log", "Launch / DHCP Log", _populate_dhcp_log_launch), + ("launch-ztp-log", "Launch / ZTP Log", _populate_ztp_log_launch), + ("launch-access", "Launch / Access", _populate_complete_launch), + ] + for offset, (slug, title, populate) in enumerate(launch_shots, start=1): + n = len(SECTION_LABELS) + offset + await _capture_launch(output_dir, f"{n:02d}-{slug}.svg", title, populate) + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUT_DIR, + help=f"Directory for generated SVGs (default: {DEFAULT_OUT_DIR})", + ) + args = parser.parse_args() + + total = len(SECTION_LABELS) + 5 + print(f"Capturing {total} AIR sim screenshots at {COLS}x{ROWS}...") + no_color = os.environ.pop("NO_COLOR", None) + try: + asyncio.run(_capture_all(args.output_dir)) + finally: + if no_color is not None: + os.environ["NO_COLOR"] = no_color + print(f"\n{total} screenshots saved to {args.output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/installer/scripts/screenshot_tui.py b/installer/scripts/screenshot_tui.py index 6c88100..ad4e3e0 100644 --- a/installer/scripts/screenshot_tui.py +++ b/installer/scripts/screenshot_tui.py @@ -1,6 +1,18 @@ #!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Generate SVG screenshots of every TUI section for documentation. Usage: diff --git a/installer/src/nv_config_manager_installer/air_sim/__init__.py b/installer/src/nv_config_manager_installer/air_sim/__init__.py new file mode 100644 index 0000000..bd904e8 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/installer/src/nv_config_manager_installer/air_sim/cli.py b/installer/src/nv_config_manager_installer/air_sim/cli.py new file mode 100644 index 0000000..d842bd1 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/cli.py @@ -0,0 +1,110 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Click commands for AIR simulation demos.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import click + +from nv_config_manager_installer.air_sim.constants import DEFAULT_AIR_SIM_CONFIG_PATH +from nv_config_manager_installer.air_sim.orchestrator import ( + SimOrchestrator, + StepStatus, +) +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.air_sim.app import NVCMAirSimApp + + +@click.group("air-sim") +def air_sim() -> None: + """Create and deploy AIR simulation demos.""" + + +@air_sim.command("init") +@click.option( + "--config", + "-c", + "config_path", + type=click.Path(dir_okay=False, path_type=Path), + default=DEFAULT_AIR_SIM_CONFIG_PATH, + help="Path to AIR simulation YAML config.", +) +def init_air_sim(config_path: Path) -> None: + """Launch the interactive AIR simulation TUI wizard.""" + config = SimConfig.load_or_default(config_path) + app = NVCMAirSimApp(config=config, config_path=config_path) + app.run() + + +@air_sim.command("deploy") +@click.option( + "--config", + "-c", + "config_path", + type=click.Path(exists=True, dir_okay=False, path_type=Path), + required=True, + help="Path to AIR simulation YAML config.", +) +def deploy_air_sim(config_path: Path) -> None: + """Run AIR simulation bringup from a config file.""" + config = SimConfig.from_yaml(config_path) + callback = _CliCallback() + orchestrator = SimOrchestrator(config, callback) + orchestrator.run() + if not callback.success: + sys.exit(1) + + +class _CliCallback: + """Simple stdout callback for headless AIR simulation deploys.""" + + def __init__(self) -> None: + self.success = False + self.host = "" + self.port = 0 + + def on_step(self, step_id: str, status: StepStatus, message: str = "") -> None: + icon = { + StepStatus.PENDING: "[ ]", + StepStatus.RUNNING: "[>]", + StepStatus.SUCCESS: "[*]", + StepStatus.FAILED: "[!]", + StepStatus.SKIPPED: "[-]", + }[status] + suffix = f" {message}" if message else "" + click.echo(f"{icon} {step_id}{suffix}") + + def on_log(self, line: str) -> None: + click.echo(line) + + def on_ssh_ready(self, host: str, port: int) -> None: + click.echo(f"SSH ready: {host}:{port}") + + def on_deploy_started(self, host: str, port: int) -> None: + click.echo(f"Deployment started over SSH: {host}:{port}") + + def on_complete(self, success: bool, host: str = "", port: int = 0) -> None: + self.success = success + self.host = host + self.port = port + if success: + click.echo("AIR simulation bringup completed.") + if host: + click.echo(f"SSH: {host}:{port}") + else: + click.echo("AIR simulation bringup failed.", err=True) diff --git a/installer/src/nv_config_manager_installer/air_sim/cloud_init.py b/installer/src/nv_config_manager_installer/air_sim/cloud_init.py new file mode 100644 index 0000000..a3ce9c4 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/cloud_init.py @@ -0,0 +1,690 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Cloud-init generation for nvcm-air-simulation.""" + +from __future__ import annotations + +import logging +import shlex +import textwrap +from typing import Any +from urllib.parse import quote, urlsplit, urlunsplit + +import yaml + +from nv_config_manager_installer.air_sim.constants import ( + CONFIG_MANAGER_NAMESPACE, + CONFIG_MANAGER_REMOTE_DIR, + NODE_EXPORTER_BASE_URL, + NODE_EXPORTER_SHA256, + NODE_EXPORTER_VERSION, + NVCM_NETWORK_SECRETS, + _BlockStyleDumper, +) + +LOG = logging.getLogger(__name__) + + +def _repo_url_with_optional_token(repo_url: str, git_token: str | None) -> str: + """Return *repo_url* with HTTPS credentials only when a token is supplied.""" + token = (git_token or "").strip() + if not token: + return repo_url + + parts = urlsplit(repo_url) + if parts.scheme not in {"http", "https"}: + return repo_url + + host = parts.netloc.rsplit("@", 1)[-1] + username = "x-access-token" if "github.com" in host.lower() else "oauth2" + netloc = f"{username}:{quote(token, safe='')}@{host}" + return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment)) + + +# ============================================================================== +# Cloud-init setup script template +# ============================================================================== +# Placeholders (__VARNAME__) are substituted at generation time. +# This runs as root during cloud-init, so no sudo needed. + +_SETUP_SCRIPT_TEMPLATE = textwrap.dedent("""\ + #!/bin/bash + set -euo pipefail + exec > >(tee -a /var/log/nvcm-setup.log) 2>&1 + export HOME=/root + + DEPLOY_SIZE=__DEPLOY_SIZE__ + INTERNAL_IP=__INTERNAL_IP__ + INTERNAL_MAC=__INTERNAL_MAC__ + OOB_SWITCH_GW=__OOB_SWITCH_GW__ + BGP_ASN=__BGP_ASN__ + BGP_PASSWORD=__BGP_PASSWORD__ + RELAY_RETURN_NETWORKS=(__RELAY_RETURN_NETWORKS__) + echo "========================================" + echo " NVCM AIR Setup" + echo "========================================" + echo "Started: $(date)" + echo "Deploy size: $DEPLOY_SIZE" + echo "" + + # ========================================================================== + # PREREQUISITES + # ========================================================================== + export DEBIAN_FRONTEND=noninteractive + APT_OPTS='-o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold' + + echo ">>> Waiting for cloud-init apt lock..." + for i in $(seq 1 120); do + fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 || break + sleep 5 + done + echo ">>> Lock released." + + echo ">>> Updating system packages..." + apt-get update && apt-get $APT_OPTS upgrade -y + + echo ">>> Installing Docker..." + if ! command -v docker &>/dev/null; then + apt-get $APT_OPTS install -y ca-certificates curl gnupg + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg \\ + | gpg --dearmor -o /etc/apt/keyrings/docker.gpg + chmod a+r /etc/apt/keyrings/docker.gpg + . /etc/os-release + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \\ + https://download.docker.com/linux/ubuntu $VERSION_CODENAME stable" \\ + > /etc/apt/sources.list.d/docker.list + apt-get update + apt-get $APT_OPTS install -y docker-ce docker-ce-cli containerd.io \\ + docker-buildx-plugin docker-compose-plugin + usermod -aG docker nvcm + else + echo " Docker already installed" + fi + + echo ">>> Installing kubectl..." + if ! command -v kubectl &>/dev/null; then + curl -LO "https://dl.k8s.io/release/$(curl -Ls \\ + https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl + rm -f kubectl + fi + + echo ">>> Installing Helm..." + if ! command -v helm &>/dev/null; then + curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \\ + | bash + fi + + echo ">>> Installing Kind..." + if ! command -v kind &>/dev/null; then + curl -Lo /usr/local/bin/kind \\ + https://kind.sigs.k8s.io/dl/v0.25.0/kind-linux-amd64 + chmod +x /usr/local/bin/kind + fi + + echo ">>> Installing uv..." + if ! command -v uv &>/dev/null; then + curl -LsSf https://astral.sh/uv/install.sh \\ + | env UV_INSTALL_DIR=/usr/local/bin sh + fi + + echo ">>> Installing dev tools..." + apt-get $APT_OPTS install -y git jq htop yq \\ + isc-dhcp-relay sshpass + systemctl disable isc-dhcp-relay + systemctl stop isc-dhcp-relay + + # ========================================================================== + # SYSTEM LIMITS + # ========================================================================== + echo ">>> Configuring system limits..." + if ! grep -q "fs.inotify.max_user_watches" /etc/sysctl.conf 2>/dev/null; then + cat >> /etc/sysctl.conf <<'SYSCTL' + fs.inotify.max_user_watches=1048576 + fs.inotify.max_user_instances=8192 + fs.file-max=2097152 + SYSCTL + sysctl -p + fi + + # ========================================================================== + # INTERNAL NETWORK (resolve by MAC) + # ========================================================================== + echo ">>> Configuring internal network (MAC: $INTERNAL_MAC)..." + INT_IFACE=$(ip -o link | grep -i "$INTERNAL_MAC" \\ + | awk -F': ' '{print $2}' | head -1) + if [[ -n "$INT_IFACE" ]]; then + echo " Resolved internal interface: $INT_IFACE" + ip addr flush dev "$INT_IFACE" 2>/dev/null || true + ip addr add "$INTERNAL_IP" dev "$INT_IFACE" 2>/dev/null || true + ip link set "$INT_IFACE" up + INTERNAL_NETWORK=$(echo "$INTERNAL_IP" | sed 's|\\.[0-9]*/|.0/|') + ip route add "$INTERNAL_NETWORK" dev "$INT_IFACE" 2>/dev/null || true + for _rr_net in "${RELAY_RETURN_NETWORKS[@]}"; do + ip route replace "$_rr_net" via "$OOB_SWITCH_GW" \\ + dev "$INT_IFACE" 2>/dev/null || true + done + else + echo " WARNING: Could not find interface with MAC $INTERNAL_MAC" + fi + + # ========================================================================== + # KIND CLUSTER + # ========================================================================== + echo ">>> Creating Kind cluster..." + kind delete cluster --name nvcm 2>/dev/null || true + kind create cluster --name nvcm --config /opt/kind-config.yaml --wait 5m + echo " Kind cluster created" + + echo ">>> Setting up kubeconfig for nvcm..." + mkdir -p /home/nvcm/.kube + kind get kubeconfig --name nvcm > /home/nvcm/.kube/config + chown -R nvcm:nvcm /home/nvcm/.kube + + # ========================================================================== + # METALLB + # ========================================================================== + echo ">>> Installing MetalLB..." + helm repo add metallb https://metallb.github.io/metallb 2>/dev/null || true + helm repo update + helm upgrade --install metallb metallb/metallb \\ + --namespace metallb-system \\ + --create-namespace \\ + --set frr-k8s.prometheus.serviceMonitor.enabled=false \\ + --wait + + kubectl wait --for=condition=ready pod -n metallb-system \\ + -l app.kubernetes.io/component=controller --timeout=120s + + KIND_SUBNET=$(docker network inspect kind \\ + -f '{{range .IPAM.Config}}{{.Subnet}} {{end}}' 2>/dev/null \\ + | grep -oE '([0-9]+\\.){3}[0-9]+/[0-9]+' | head -1) + if [[ -n "$KIND_SUBNET" ]]; then + KIND_PREFIX=$(echo "$KIND_SUBNET" | cut -d'.' -f1-2) + METALLB_RANGE="${KIND_PREFIX}.255.200-${KIND_PREFIX}.255.220" + else + METALLB_RANGE="172.18.255.200-172.18.255.220" + fi + + kubectl apply -f - < /home/nvcm/.nvcm-metallb-range + chown nvcm:nvcm /home/nvcm/.nvcm-metallb-range + + CONTROL_PLANE=$(kubectl get nodes \\ + -l node-role.kubernetes.io/control-plane \\ + -o jsonpath='{.items[0].metadata.name}') + kubectl taint nodes "$CONTROL_PLANE" \\ + node-role.kubernetes.io/control-plane:NoSchedule- 2>/dev/null || true + + # IP forwarding + echo ">>> Configuring IP forwarding..." + grep -q "net.ipv4.ip_forward=1" /etc/sysctl.conf || \\ + echo "net.ipv4.ip_forward=1" >> /etc/sysctl.conf + sysctl -w net.ipv4.ip_forward=1 + iptables -t nat -C POSTROUTING -d 172.18.0.0/16 -j MASQUERADE \\ + 2>/dev/null || \\ + iptables -t nat -A POSTROUTING -d 172.18.0.0/16 -j MASQUERADE + + # ========================================================================== + # FRR (BGP peering with OOB switch) + # ========================================================================== + echo ">>> Installing FRR for BGP peering with OOB switch..." + curl -s https://deb.frrouting.org/frr/keys.gpg \\ + | tee /usr/share/keyrings/frrouting.gpg >/dev/null + FRRVER="frr-stable" + echo "deb [signed-by=/usr/share/keyrings/frrouting.gpg] \\ + https://deb.frrouting.org/frr $(lsb_release -s -c) $FRRVER" \\ + | tee /etc/apt/sources.list.d/frr.list + apt-get update -qq && apt-get install -y -qq frr frr-pythontools \\ + 2>/dev/null + + sed -i 's/^bgpd=no/bgpd=yes/' /etc/frr/daemons + + if [[ -n "${KIND_PREFIX:-}" ]]; then + FRR_METALLB_PREFIX="${KIND_PREFIX}.255.0/24" + else + FRR_METALLB_PREFIX="172.18.255.0/24" + fi + + BRIDGE_ID=$(docker network inspect kind \\ + -f '{{.Id}}' 2>/dev/null | cut -c1-12) + ip route add "$FRR_METALLB_PREFIX" dev "br-${BRIDGE_ID}" \\ + 2>/dev/null || true + + cat > /etc/frr/frr.conf << FRREOF + frr version 10 + frr defaults traditional + hostname oob-mgmt-server + log syslog informational + service integrated-vtysh-config + ! + ip prefix-list PL-METALLB seq 10 permit ${FRR_METALLB_PREFIX} + ip prefix-list PL-METALLB seq 9999 deny any + ! + route-map RM-EXPORT permit 10 + match ip address prefix-list PL-METALLB + route-map RM-EXPORT deny 9999 + ! + router bgp ${BGP_ASN} + bgp router-id __ZTP_URL_HOST__ + no bgp ebgp-requires-policy + neighbor ${OOB_SWITCH_GW} remote-as external + neighbor ${OOB_SWITCH_GW} password ${BGP_PASSWORD} + ! + address-family ipv4 unicast + redistribute kernel route-map RM-EXPORT + neighbor ${OOB_SWITCH_GW} route-map RM-EXPORT out + exit-address-family + ! + FRREOF + + systemctl enable frr + systemctl restart frr + echo " FRR BGP configured: ASN ${BGP_ASN}, neighbor ${OOB_SWITCH_GW}" + echo " Advertising ${FRR_METALLB_PREFIX}" + + # ========================================================================== + # REFRESH KUBECONFIG FOR NVCM USER + # ========================================================================== + echo ">>> Setting up kubeconfig for nvcm..." + mkdir -p /home/nvcm/.kube + kind get kubeconfig --name nvcm > /home/nvcm/.kube/config + chown -R nvcm:nvcm /home/nvcm/.kube + + # ========================================================================== + # CLONE REPOSITORIES + # ========================================================================== + echo ">>> Ensuring /home/nvcm ownership..." + chown nvcm:nvcm /home/nvcm + + echo ">>> Cloning repositories..." + __CLONE_COMMANDS__ + + # ========================================================================== + # STAGE NODE-EXPORTER BINARIES FOR ZTP + # ========================================================================== + NE_VERSION="__NODE_EXPORTER_VERSION__" + NE_DIR="/home/nvcm/ztp-files/node-exporter/${NE_VERSION}" + NE_BASE="__NODE_EXPORTER_BASE_URL__" + declare -A NE_SHA256=( + [amd64]="__NODE_EXPORTER_AMD64_SHA256__" + [armv5]="__NODE_EXPORTER_ARMV5_SHA256__" + ) + echo ">>> Downloading node-exporter ${NE_VERSION} binaries..." + mkdir -p "$NE_DIR" + + for arch_pair in "amd64:amd64" "armv5:armv5"; do + gh_arch="${arch_pair%%:*}" + out_name="node_exporter_${arch_pair##*:}" + tarball="node_exporter-${NE_VERSION}.linux-${gh_arch}.tar.gz" + url="${NE_BASE}/v${NE_VERSION}/${tarball}" + echo " Fetching ${tarball}..." + curl -fsSL "$url" -o "/tmp/${tarball}" + expected_sha="${NE_SHA256[$gh_arch]}" + actual_sha="$(sha256sum "/tmp/${tarball}" | awk '{print $1}')" + if [[ "$actual_sha" != "$expected_sha" ]]; then + echo "ERROR: checksum mismatch for ${tarball}: expected ${expected_sha}, got ${actual_sha}" >&2 + rm -f "/tmp/${tarball}" + exit 1 + fi + tar -xzf "/tmp/${tarball}" -C /tmp \\ + "node_exporter-${NE_VERSION}.linux-${gh_arch}/node_exporter" + mv "/tmp/node_exporter-${NE_VERSION}.linux-${gh_arch}/node_exporter" \\ + "${NE_DIR}/${out_name}" + rm -rf "/tmp/${tarball}" \\ + "/tmp/node_exporter-${NE_VERSION}.linux-${gh_arch}" + done + chmod +x "${NE_DIR}"/* + chown -R nvcm:nvcm /home/nvcm/ztp-files + echo " Staged at ${NE_DIR}" + + jq -n ' + {images: [ + {platform: "node-exporter", version: "__NODE_EXPORTER_VERSION__", + filename: "node_exporter_amd64", + path: "node-exporter/__NODE_EXPORTER_VERSION__/node_exporter_amd64", + sha256: "__NODE_EXPORTER_AMD64_SHA256__", + tags: {}}, + {platform: "node-exporter", version: "__NODE_EXPORTER_VERSION__", + filename: "node_exporter_armv5", + path: "node-exporter/__NODE_EXPORTER_VERSION__/node_exporter_armv5", + sha256: "__NODE_EXPORTER_ARMV5_SHA256__", + tags: {}} + ]} + ' > /home/nvcm/ztp-files/manifest.json + chown nvcm:nvcm /home/nvcm/ztp-files/manifest.json + + cat > /home/nvcm/populate-ztp-files.sh << 'ZTPEOF' + #!/bin/bash + set -euo pipefail + export KUBECONFIG=/home/nvcm/.kube/config + + NAMESPACE="__CONFIG_MANAGER_NAMESPACE__" + PVC_NAME="ztp-os-images" + SRC_DIR="/home/nvcm/ztp-files" + POD_NAME="populate-ztp-files-$(date +%s)" + + echo "Creating temporary pod to populate ZTP PVC..." + kubectl run "$POD_NAME" \\ + --namespace="$NAMESPACE" \\ + --image=busybox:1.36 \\ + --restart=Never \\ + --overrides="{ + \\"spec\\": { + \\"containers\\": [{ + \\"name\\": \\"populate\\", + \\"image\\": \\"busybox:1.36\\", + \\"command\\": [\\"sleep\\", \\"300\\"], + \\"volumeMounts\\": [{ + \\"name\\": \\"ztp-files\\", + \\"mountPath\\": \\"/images\\" + }] + }], + \\"volumes\\": [{ + \\"name\\": \\"ztp-files\\", + \\"persistentVolumeClaim\\": { + \\"claimName\\": \\"$PVC_NAME\\" + } + }] + } + }" + + echo "Waiting for pod..." + kubectl wait --for=condition=Ready "pod/$POD_NAME" \\ + -n "$NAMESPACE" --timeout=120s + + echo "Copying files to PVC..." + cd "$SRC_DIR" + tar czf /tmp/ztp-files.tar.gz . + kubectl cp /tmp/ztp-files.tar.gz \\ + "$NAMESPACE/$POD_NAME:/tmp/ztp-files.tar.gz" + kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\ + sh -c "cd /images && tar -xzf /tmp/ztp-files.tar.gz" + kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\ + sh -c "chmod -R a+rX /images" + + echo "Files in PVC:" + kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\ + find /images -type f 2>/dev/null || true + + echo "Cleaning up..." + kubectl delete pod "$POD_NAME" -n "$NAMESPACE" --wait=false + rm -f /tmp/ztp-files.tar.gz + + echo "ZTP PVC population complete" + ZTPEOF + chmod +x /home/nvcm/populate-ztp-files.sh + chown nvcm:nvcm /home/nvcm/populate-ztp-files.sh + + # ========================================================================== + # DONE + # ========================================================================== + echo "" + echo "========================================" + echo " NVCM AIR Setup Complete!" + echo "========================================" + echo "Finished: $(date)" + echo "" + echo "Cluster status:" + kubectl get nodes -o wide + echo "" + echo "Next step: run nv-config-manager-installer deploy" + echo " sudo KUBECONFIG=/home/nvcm/.kube/config uv run \\\\" + echo " --directory /home/nvcm/nv-config-manager \\\\" + echo " --project /home/nvcm/nv-config-manager/installer \\\\" + echo " nv-config-manager-installer deploy /home/nvcm/nv-config-manager-install.yaml \\\\" + echo " --chart-dir /home/nvcm/nv-config-manager/deploy/helm \\\\" + echo " --kind-cluster nvcm \\\\" + echo " --install-envoy-gateway --install-cnpg-operator --install-cert-manager" + echo "" + echo "After deploy, populate ZTP files:" + echo " ~/populate-ztp-files.sh" +""") + + +def generate_kind_config(deploy_size: str) -> str: + """Generate a single-node Kind cluster config. + + A single control-plane node avoids MetalLB L2 / pod affinity + issues where traffic lands on a different node than the pod. + """ + if deploy_size == "medium": + mem_system = "2Gi" + mem_kube = "2Gi" + eviction = "memory.available<1Gi" + else: + mem_system = "1Gi" + mem_kube = "1Gi" + eviction = "memory.available<500Mi" + + cp_patch = ( + "kind: InitConfiguration\n" + "nodeRegistration:\n" + " kubeletExtraArgs:\n" + f" system-reserved: cpu=50m,memory={mem_system}\n" + f" kube-reserved: cpu=50m,memory={mem_kube}\n" + f" eviction-hard: {eviction}\n" + ) + + port_mappings = [ + {"containerPort": 30080, "hostPort": 80, "protocol": "TCP"}, + {"containerPort": 30443, "hostPort": 443, "protocol": "TCP"}, + ] + nodes: list[dict[str, Any]] = [ + { + "role": "control-plane", + "extraPortMappings": port_mappings, + "kubeadmConfigPatches": [cp_patch], + }, + ] + + config = { + "kind": "Cluster", + "apiVersion": "kind.x-k8s.io/v1alpha4", + "nodes": nodes, + } + return yaml.dump( + config, + Dumper=_BlockStyleDumper, + default_flow_style=False, + sort_keys=False, + ) + + +def generate_setup_script( + *, + deploy_size: str, + git_token: str | None, + config_manager_repo: str, + config_manager_ref: str, + internal_ip: str, + internal_mac: str, + site_name: str, + oob_gateway: str | None, + lb_allowed_prefixes: str, + relay_return_networks: str, + bgp_asn: str, +) -> str: + """Build the full OOB-server setup bash script from the template. + + Substitutes placeholder tokens in ``_SETUP_SCRIPT_TEMPLATE`` with + concrete values so the script can run unattended via cloud-init. + """ + config_manager_auth = _repo_url_with_optional_token(config_manager_repo, git_token) + + def _quote_shell_words(words: str) -> str: + return " ".join(shlex.quote(word) for word in words.split()) + + clone_lines = ( + f'su - nvcm -c "git clone -b {shlex.quote(config_manager_ref)}' + f' {shlex.quote(config_manager_auth)} {shlex.quote(CONFIG_MANAGER_REMOTE_DIR)}"' + ) + + script = _SETUP_SCRIPT_TEMPLATE + script = script.replace("__DEPLOY_SIZE__", shlex.quote(deploy_size)) + script = script.replace("__INTERNAL_IP__", shlex.quote(internal_ip)) + script = script.replace("__INTERNAL_MAC__", shlex.quote(internal_mac)) + ztp_url_host = internal_ip.split("/")[0] + script = script.replace("__ZTP_URL_HOST__", ztp_url_host) + script = script.replace("__SITE_NAME__", site_name) + script = script.replace("__CONFIG_MANAGER_NAMESPACE__", CONFIG_MANAGER_NAMESPACE) + script = script.replace("__OOB_SWITCH_GW__", shlex.quote(oob_gateway or "UNSET")) + script = script.replace("__LB_ALLOWED_PREFIXES__", _quote_shell_words(lb_allowed_prefixes)) + script = script.replace("__RELAY_RETURN_NETWORKS__", _quote_shell_words(relay_return_networks)) + script = script.replace("__BGP_ASN__", shlex.quote(bgp_asn)) + script = script.replace("__BGP_PASSWORD__", shlex.quote(NVCM_NETWORK_SECRETS["bgp_password"])) + script = script.replace("__NODE_EXPORTER_VERSION__", NODE_EXPORTER_VERSION) + script = script.replace("__NODE_EXPORTER_BASE_URL__", NODE_EXPORTER_BASE_URL) + script = script.replace("__NODE_EXPORTER_AMD64_SHA256__", NODE_EXPORTER_SHA256["amd64"]) + script = script.replace("__NODE_EXPORTER_ARMV5_SHA256__", NODE_EXPORTER_SHA256["armv5"]) + script = script.replace("__CLONE_COMMANDS__", clone_lines) + return script + + +def generate_server_cloud_init( + *, + internal_mac: str, + git_token: str | None = None, + config_manager_repo: str = "", + config_manager_ref: str = "main", + deploy_size: str = "medium", + internal_ip: str, + site_name: str, + oob_gateway: str | None, + lb_allowed_prefixes: str = "0.0.0.0/0", + relay_return_networks: str = "", + bgp_asn: str = "4266000000", +) -> str: + """Generate cloud-init user-data for the oob-mgmt-server. + + eth0 is the AIR exit interface (auto-DHCP by AIR, no config needed). + eth1 is the internal OOB interface configured here with a static IP + matched by MAC address. + + When *config_manager_repo* is supplied, produces a full-setup cloud-init that + installs all prerequisites, creates a Kind cluster with MetalLB, clones the + nv-config-manager repository. ``git_token`` is optional and is only embedded in + the clone URL when set, which keeps public GitHub clones tokenless by + default while still allowing private forks. + """ + netplan_yaml = yaml.dump( + { + "network": { + "version": 2, + "ethernets": { + "eth1-internal": { + "match": {"macaddress": internal_mac}, + "dhcp4": False, + "addresses": [internal_ip], + "optional": True, + }, + }, + } + }, + Dumper=_BlockStyleDumper, + default_flow_style=False, + sort_keys=False, + ) + + write_files: list[dict[str, Any]] = [ + { + "path": "/etc/netplan/99-air-config.yaml", + "permissions": "0600", + "content": netplan_yaml, + }, + { + "path": "/etc/sudoers.d/99-nvcm-nopasswd", + "content": "nvcm ALL=(ALL) NOPASSWD:ALL\n", + }, + ] + + runcmd: list[list[str]] = [ + ["netplan", "apply"], + ] + + if config_manager_repo: + setup_script = generate_setup_script( + deploy_size=deploy_size, + git_token=git_token, + config_manager_repo=config_manager_repo, + config_manager_ref=config_manager_ref, + internal_ip=internal_ip, + internal_mac=internal_mac, + site_name=site_name, + oob_gateway=oob_gateway, + lb_allowed_prefixes=lb_allowed_prefixes, + relay_return_networks=relay_return_networks, + bgp_asn=bgp_asn, + ) + kind_config = generate_kind_config(deploy_size) + + write_files.extend( + [ + { + "path": "/opt/nvcm-setup.sh", + "permissions": "0755", + "content": setup_script, + }, + { + "path": "/opt/kind-config.yaml", + "content": kind_config, + }, + ] + ) + runcmd.append(["bash", "/opt/nvcm-setup.sh"]) + + cloud_config: dict[str, Any] = { + "users": [ + { + "name": "nvcm", + "gecos": "NVCM Demo User", + "groups": "sudo,adm", + "shell": "/bin/bash", + "lock_passwd": False, + "passwd": ( + "$6$nvcmsalt$lHuZ5gth0uLkQEy.uz47oeG85XNZwA8AIHmFEKf98ZBs0S4b5M69JX2DyqQKTD05Hlek39poAyNJgN1J.0A.y/" + ), + "ssh_pwauth": True, + }, + ], + "chpasswd": {"expire": False}, + "write_files": write_files, + "runcmd": runcmd, + } + + return "#cloud-config\n" + yaml.dump( + cloud_config, + Dumper=_BlockStyleDumper, + default_flow_style=False, + sort_keys=False, + ) diff --git a/installer/src/nv_config_manager_installer/air_sim/constants.py b/installer/src/nv_config_manager_installer/air_sim/constants.py new file mode 100644 index 0000000..e4d397e --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/constants.py @@ -0,0 +1,198 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Constants and configuration for the NVCM AIR simulation helper.""" + +from __future__ import annotations + +import re +from pathlib import Path + +import yaml + + +class _BlockStyleDumper(yaml.SafeDumper): + """YAML dumper that uses block scalar style for multi-line strings.""" + + pass + + +def _block_str_representer(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +_BlockStyleDumper.add_representer(str, _block_str_representer) + +_RELEASE_TAG_RE = re.compile(r"^v?\d+\.\d+\.\d+") + + +def _is_release_tag(ref: str) -> bool: + """Return True if *ref* looks like a release tag or is main.""" + return ref == "main" or bool(_RELEASE_TAG_RE.match(ref)) + + +DEFAULT_AIR_API_URL = "https://api.air-ngc.nvidia.com/api/" +DEFAULT_AIR_INTERNAL_URL = "https://api.air-inside.nvidia.com/api/" +DEFAULT_AIR_ORG = "" +DEFAULT_AIR_SIM_CONFIG_PATH = Path.home() / ".nvcm-air-sim.yaml" + +CUMULUS_VX_IMAGES = { + "5.16.1": "cumulus-linux-vx-amd64-5.16.1.0008.qcow2", + "5.16.0": "cumulus-vx-5.16.0", + "5.15.0": "cumulus-vx-5.15.0", + "5.14.0": "cumulus-vx-5.14.0", + "5.11.0": "cumulus-vx-5.11.0", + "5.10.0": "cumulus-vx-5.10.0", + "5.9.0": "cumulus-vx-5.9.0", + "5.8.0": "cumulus-vx-5.8.0", + "5.7.0": "cumulus-vx-5.7.0", + "5.6.0": "cumulus-vx-5.6.0", + "5.5.0": "cumulus-vx-5.5.0", + "5.4.0": "cumulus-vx-5.4.0", +} +DEFAULT_CUMULUS_VERSION = "5.14.0" + +DEFAULT_NODE_CPU = 2 +DEFAULT_NODE_MEMORY = 4096 +DEFAULT_NODE_STORAGE = 10 +DEFAULT_SERVER_OS = "generic/ubuntu2404" + +NVCM_SERVER_CPU = 16 +NVCM_SERVER_MEMORY = 32768 +NVCM_SERVER_STORAGE = 100 +NVCM_SERVER_OS = "generic/ubuntu2404" +DEFAULT_NVCM_SERVER_NAME = "nvcm-server" + +NVCM_BOX_USER = "nvcm" +NVCM_BOX_PASSWORD = "NVCMDemo1!" # trufflehog:ignore - public AIR demo VM password +NVCM_BOX_DIR = "/opt/nvcm-box" +DEFAULT_NAUTOBOT_DEMO_USERNAME = "demo" +DEFAULT_NAUTOBOT_DEMO_PASSWORD = "demo" # trufflehog:ignore - public AIR demo user password + +PROJECT_ROOT = Path(__file__).resolve().parents[4] +DEFAULT_MOCK_TOPOLOGY_PATH = PROJECT_ROOT / "development" / "mock_topology" +DEFAULT_MOCK_CONTEXT_ROOT = DEFAULT_MOCK_TOPOLOGY_PATH / "context" +DEFAULT_AIR_TRIAL_CONFIG = PROJECT_ROOT / "development" / "air_sim" / "configs" / "air_trial.yaml" +DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH = ( + Path("development") / "air_sim" / "template_plugins" / "superpod-template-plugin" +) + +CONFIG_MANAGER_REPO_DIR = "nv-config-manager" +CONFIG_MANAGER_REMOTE_DIR = f"/home/{NVCM_BOX_USER}/{CONFIG_MANAGER_REPO_DIR}" +CONFIG_MANAGER_INSTALL_CONFIG = "nv-config-manager-install.yaml" +CONFIG_MANAGER_NAMESPACE = "nv-config-manager" +CONFIG_MANAGER_RELEASE = "nv-config-manager" +CONFIG_MANAGER_HOSTNAME = "nvcm.air" +CONFIG_MANAGER_KIND_CLUSTER = "nvcm" +CONFIG_MANAGER_COMPONENT_PREFIX = "nv-config-manager" +CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT = f"{CONFIG_MANAGER_COMPONENT_PREFIX}-nautobot" +CONFIG_MANAGER_DHCP_DEPLOYMENT = f"{CONFIG_MANAGER_COMPONENT_PREFIX}-dhcp" +CONFIG_MANAGER_DHCP_REFRESH_DEPLOYMENT = f"{CONFIG_MANAGER_DHCP_DEPLOYMENT}-refresh" +CONFIG_MANAGER_RENDER_API_DEPLOYMENT = f"{CONFIG_MANAGER_COMPONENT_PREFIX}-render-api" +CONFIG_MANAGER_ZTP_DEPLOYMENT = f"{CONFIG_MANAGER_COMPONENT_PREFIX}-ztp" +CONFIG_MANAGER_TEMPORAL_DEPLOYMENT = f"{CONFIG_MANAGER_COMPONENT_PREFIX}-temporal" +CONFIG_MANAGER_TEMPORAL_FRONTEND_DEPLOYMENT = f"{CONFIG_MANAGER_TEMPORAL_DEPLOYMENT}-frontend" +CONFIG_MANAGER_TEMPORAL_WORKER_DEPLOYMENT = ( + f"{CONFIG_MANAGER_TEMPORAL_DEPLOYMENT}-{CONFIG_MANAGER_COMPONENT_PREFIX}-worker" +) +DEFAULT_CONFIG_MANAGER_REPO = "https://github.com/NVIDIA/nv-config-manager" + +AGGRESSIVE_DHCLIENT_CONF = """\ +option rfc3442-classless-static-routes code 121 = array of unsigned integer 8; +option cumulus-provision-url code 239 = text; + +send host-name = gethostname(); +request subnet-mask, broadcast-address, time-offset, routers, + domain-name, domain-name-servers, domain-search, host-name, + dhcp6.name-servers, dhcp6.domain-search, dhcp6.fqdn, dhcp6.sntp-servers, + netbios-name-servers, netbios-scope, interface-mtu, + rfc3442-classless-static-routes, ntp-servers, cumulus-provision-url; + +send dhcp-lease-time 7200; +timeout 30; +retry 30; +reboot 5; +backoff-cutoff 2; +initial-interval 1; +send vendor-class-identifier "cumulus-linux x86_64"; +""" + +NVCM_SECRETS = { + "cumulus_user": "cumulus", + "cumulus_password": NVCM_BOX_PASSWORD, + "nvcm_user": "nvConfigManager", + "nvcm_password": NVCM_BOX_PASSWORD, + "nautobot_superuser": "admin", + "nautobot_password": "admin", + "nautobot_db_password": "nautobot-db-password", + "temporal_db_password": "temporal-db-password", + "temporal_visibility_db_password": "temporal-vis-db-password", + "config_store_db_password": "config-store-db-password", + "dhcp_db_password": "dhcp-db-password", + "nautobot_secret_key": "air-sim-secret-key-not-for-production-use-1234567890", + "redis_password": "redis-password", +} + +NVCM_NETWORK_SECRETS = { + "root_password": NVCM_BOX_PASSWORD, + "api_user_key": NVCM_BOX_PASSWORD, + "bgp_password": "NVCMBgp1!", # trufflehog:ignore - public AIR demo BGP password + "isis_password": "NVCMIsis1!", # trufflehog:ignore - public AIR demo ISIS password + "tacacs_key": "NVCMTacacs1!", # trufflehog:ignore - public AIR demo TACACS key +} + +NODE_EXPORTER_VERSION = "1.8.2" +NODE_EXPORTER_BASE_URL = "https://github.com/prometheus/node_exporter/releases/download" +NODE_EXPORTER_SHA256 = { + "amd64": "0c9219b9860c6250c0bc3da5d79bd79c17f3938345fa7503f95cfa2ad7c3ba1d", + "armv5": "d639498cdb3a12205ed40bed27b11a0bd6d32b247dfebabc36c1ae76cc87131f", +} + +NVCM_SERVER_SETUP_SCRIPT = """#!/bin/bash +set -euo pipefail +echo "Install prerequisites for the NVCM AIR simulation server." +""" + +NVCM_KIND_CONFIG = """ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + extraPortMappings: + - containerPort: 30080 + hostPort: 80 + protocol: TCP + - containerPort: 30443 + hostPort: 443 + protocol: TCP +networking: + disableDefaultCNI: false +""" + + +def find_ssh_pubkey(path: str | None = None) -> str: + """Locate and read an SSH public key.""" + candidates: list[Path] = [] + if path: + candidates.append(Path(path).expanduser()) + candidates.extend( + [Path.home() / ".ssh" / "id_ed25519.pub", Path.home() / ".ssh" / "id_rsa.pub"] + ) + for candidate in candidates: + if candidate.is_file(): + return candidate.read_text().strip() + tried = ", ".join(str(candidate) for candidate in candidates) + raise FileNotFoundError(f"No SSH public key found (tried: {tried})") diff --git a/installer/src/nv_config_manager_installer/air_sim/context_topology.py b/installer/src/nv_config_manager_installer/air_sim/context_topology.py new file mode 100644 index 0000000..2e5261f --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/context_topology.py @@ -0,0 +1,196 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Build temporary AIR topology input from mock_topology context data.""" + +from __future__ import annotations + +import atexit +import json +import tempfile +from pathlib import Path +from typing import Any + +import yaml + +from nv_config_manager_installer.air_sim.constants import DEFAULT_MOCK_CONTEXT_ROOT + + +def _render_deployment_name(value: Any, deployment_name: str) -> Any: + if isinstance(value, str): + return value.replace("{{ deployment_name }}", deployment_name).replace( + "{{deployment_name}}", deployment_name + ) + if isinstance(value, list): + return [_render_deployment_name(item, deployment_name) for item in value] + if isinstance(value, dict): + return {key: _render_deployment_name(item, deployment_name) for key, item in value.items()} + return value + + +def _name(value: Any, default: str = "") -> str: + if isinstance(value, dict): + return str(value.get("name") or default) + if value is None: + return default + return str(value) + + +def _load_yaml(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + with open(path) as f: + return yaml.safe_load(f) or {} + + +def build_site_design_from_mock_context( + blueprint: str, + deployment_name: str, + *, + context_root: Path = DEFAULT_MOCK_CONTEXT_ROOT, +) -> dict[str, Any]: + """Return an AIR topology-builder site design from mock_topology context files.""" + context_dir = context_root / blueprint + devices_dir = context_dir / "devices" + if not devices_dir.is_dir(): + raise ValueError(f"Mock topology context has no devices directory: {devices_dir}") + + locations_doc = _render_deployment_name( + _load_yaml(context_dir / "locations.yaml"), deployment_name + ) + prefixes_doc = _render_deployment_name( + _load_yaml(context_dir / "prefixes.yaml"), deployment_name + ) + + location_hierarchy = [] + for loc in locations_doc.get("locations", []): + location_hierarchy.append( + { + "name": loc.get("name"), + "type": loc.get("location_type", loc.get("type", "Region")), + "tenant": locations_doc.get("global_defaults", {}).get("tenant"), + } + ) + + prefixes = [] + for prefix in prefixes_doc.get("aggregate_prefixes", []): + prefixes.append( + { + "prefix": prefix.get("prefix"), + "role": prefix.get("role"), + "tags": prefix.get("tags", []), + } + ) + + devices = [] + interfaces = [] + ip_addresses = [] + connections = [] + + for json_file in sorted(devices_dir.glob("*.json")): + with open(json_file) as f: + payload = _render_deployment_name(json.load(f), deployment_name) + device = (payload.get("data") or {}).get("device") or {} + if not device.get("name"): + continue + + device_name = device["name"] + normalized_device = { + "name": device_name, + "device_type": { + "manufacturer": _name((device.get("device_type") or {}).get("manufacturer")), + "model": (device.get("device_type") or {}).get("model", "Unknown"), + }, + "status": _name(device.get("status"), "Active"), + "role": _name(device.get("role"), "Unknown"), + "platform": _name(device.get("platform")), + "tenant": _name(device.get("tenant")), + "serial": device.get("serial", ""), + "local_config_context_data": device.get("config_context", {}), + "tags": [_name(tag) for tag in device.get("tags", [])], + } + if device.get("_air"): + normalized_device["_air"] = device["_air"] + devices.append(normalized_device) + + for intf in device.get("interfaces", []): + intf_name = intf.get("name") + if not intf_name: + continue + normalized_intf = { + "device": device_name, + "name": intf_name, + "type": intf.get("type", "1000base-t"), + "description": intf.get("description", ""), + "mac_address": intf.get("mac_address"), + "role": _name(intf.get("role")), + "mgmt_only": bool(intf.get("mgmt_only", False)), + "mode": intf.get("mode"), + "mtu": intf.get("mtu"), + } + if intf.get("untagged_vlan"): + vlan = intf["untagged_vlan"] + normalized_intf["untagged_vlan"] = ( + vlan.get("vid") if isinstance(vlan, dict) else vlan + ) + interfaces.append({k: v for k, v in normalized_intf.items() if v not in (None, "")}) + + for ip in intf.get("ip_addresses", []): + address = ip.get("address") + if not address: + continue + ip_addresses.append( + { + "device": device_name, + "interface": intf_name, + "address": address, + "mask_length": ip.get("mask_length"), + } + ) + + connected = intf.get("connected_interface") or {} + remote_device = connected.get("device") or {} + remote_name = remote_device.get("name") + remote_interface = connected.get("name") + if remote_name and remote_interface: + connections.append( + { + "source": {"device": device_name, "component": {"name": intf_name}}, + "destination": { + "device": remote_name, + "component": {"name": remote_interface}, + }, + } + ) + + return { + "location_hierarchy": location_hierarchy, + "prefixes": prefixes, + "devices": devices, + "interfaces": interfaces, + "ip_addresses": ip_addresses, + "cabling_assignments": {"connections": connections}, + } + + +def write_site_design_from_mock_context(blueprint: str, deployment_name: str) -> str: + """Write a temporary site-design YAML generated from mock context and return its path.""" + site_design = build_site_design_from_mock_context(blueprint, deployment_name) + tmp = tempfile.NamedTemporaryFile( + mode="w", suffix=".yaml", prefix=f"air-{blueprint}-", delete=False + ) + yaml.safe_dump(site_design, tmp, default_flow_style=False, sort_keys=False) + tmp.close() + atexit.register(Path(tmp.name).unlink, missing_ok=True) + return tmp.name diff --git a/installer/src/nv_config_manager_installer/air_sim/installer_config.py b/installer/src/nv_config_manager_installer/air_sim/installer_config.py new file mode 100644 index 0000000..e7d31e7 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/installer_config.py @@ -0,0 +1,208 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate nv-config-manager-install.yaml from AIR sim config.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import yaml + +from nv_config_manager_installer.air_sim.constants import ( + CONFIG_MANAGER_HOSTNAME, + CONFIG_MANAGER_INSTALL_CONFIG, + CONFIG_MANAGER_KIND_CLUSTER, + CONFIG_MANAGER_NAMESPACE, + CONFIG_MANAGER_RELEASE, + CONFIG_MANAGER_REMOTE_DIR, + NVCM_BOX_USER, + NVCM_NETWORK_SECRETS, + NVCM_SECRETS, + PROJECT_ROOT, +) +from nv_config_manager_installer.air_sim.sim_config import SimConfig + +_NETWORK_SECRET_KEYS = [ + ("root_password", "Switch root / cumulus user password"), + ("api_user_key", "NVUE REST API key"), + ("bgp_password", "BGP MD5 authentication password"), + ("isis_password", "IS-IS authentication password"), + ("tacacs_key", "TACACS+ shared key"), +] + +_MOCK_TOPOLOGY_JOB = "mock_topology.jobs.mock_topology_design.MockTopologyDesign" + + +def _remote_repo_path(path: str) -> str: + """Map repo-local paths on the workstation to paths in the AIR server clone.""" + if not path: + return path + path_obj = Path(path).expanduser() + if path_obj.is_absolute(): + try: + rel = path_obj.resolve().relative_to(PROJECT_ROOT) + except ValueError: + return str(path_obj) + return f"{CONFIG_MANAGER_REMOTE_DIR}/{rel.as_posix()}" + return f"{CONFIG_MANAGER_REMOTE_DIR}/{path_obj.as_posix()}" + + +def _normalize_post_deploy_job(job_spec: dict[str, Any]) -> dict[str, str]: + job = str(job_spec.get("job", "")).strip() + raw_input = job_spec.get("input", "") + if isinstance(raw_input, str): + job_input = raw_input + else: + job_input = json.dumps(raw_input) + return {"job": job, "input": job_input} + + +def build_content_jobs(cfg: SimConfig) -> tuple[list[dict[str, str]], list[dict[str, str]]]: + """Return installer content.jobs and content.run_after_deploy entries.""" + jobs: list[dict[str, str]] = [] + run_after_deploy: list[dict[str, str]] = [] + + if cfg.run_mock_topology_job: + jobs.append( + {"path": _remote_repo_path(cfg.mock_topology_path or "development/mock_topology")} + ) + run_after_deploy.append( + { + "job": _MOCK_TOPOLOGY_JOB, + "input": json.dumps( + {"blueprint": cfg.mock_blueprint, "deployment_name": cfg.deployment_name} + ), + } + ) + + for path in cfg.extra_job_paths: + if path: + jobs.append({"path": _remote_repo_path(path)}) + + for job_spec in cfg.extra_run_after_deploy: + normalized = _normalize_post_deploy_job(job_spec) + if normalized["job"]: + run_after_deploy.append(normalized) + + return jobs, run_after_deploy + + +def build_template_plugins(cfg: SimConfig) -> list[dict[str, str]]: + """Return installer content.template_plugins entries.""" + return [{"path": _remote_repo_path(path)} for path in cfg.template_plugin_paths if path] + + +def generate_air_sim_install_config( + cfg: SimConfig, + site_name: str, + lb_allowed_prefixes: list[str], +) -> dict[str, Any]: + """Build the installer config structure for NVIDIA Config Manager.""" + content_jobs, run_after_deploy = build_content_jobs(cfg) + template_plugins = build_template_plugins(cfg) + + network_secrets = [ + { + "name": description, + "secret_key": key, + "source": "manual", + "value": NVCM_NETWORK_SECRETS[key], + "rotation": "r1", + } + for key, description in _NETWORK_SECRET_KEYS + ] + + return { + "version": "1", + "cluster": { + "hostname": CONFIG_MANAGER_HOSTNAME, + "environment": "air-sim", + "namespace": CONFIG_MANAGER_NAMESPACE, + "release_name": CONFIG_MANAGER_RELEASE, + "mock_devices": False, + "size": cfg.size, + }, + "secrets": { + "method": "kubernetes", + "config_manager_service_username": NVCM_SECRETS["nvcm_user"], + }, + "network_secrets": network_secrets, + "git_tokens": [], + "sites": [{"name": site_name}], + "sso": {"enabled": False}, + "spiffe": {"enabled": False}, + "services": { + "render": True, + "ztp": True, + "dhcp": True, + "temporal": True, + "config_store": True, + "nautobot": True, + }, + "content": { + "jobs": content_jobs, + "template_plugins": template_plugins, + "include_bootstrap_jobs": True, + "run_after_deploy": run_after_deploy, + }, + "infrastructure": { + "gateway": "envoyGateway", + "tls": True, + "load_balancer": { + "provider": "metallb", + "ztp_lb_ip": "172.18.255.201", + "dhcp_lb_ip": "172.18.255.202", + "allowed_prefixes": lb_allowed_prefixes, + }, + "ztp_storage": {"type": "file", "pvc_size": "10Gi"}, + }, + "images": {"source": "local"}, + "rbac": { + "admin_roles": ["all"], + "default_read_roles": ["all"], + "default_execute_roles": ["all"], + }, + "redfish": {"enabled": False}, + } + + +def generate_air_sim_install_yaml( + cfg: SimConfig, + site_name: str, + lb_allowed_prefixes: list[str], +) -> str: + """Return nv-config-manager-install.yaml as YAML.""" + data = generate_air_sim_install_config(cfg, site_name, lb_allowed_prefixes) + return yaml.safe_dump(data, default_flow_style=False, sort_keys=False) + + +def build_deploy_command(_cfg: SimConfig) -> str: + """Return the remote command that runs nv-config-manager-installer deploy.""" + user = NVCM_BOX_USER + kube = f"KUBECONFIG=/home/{user}/.kube/config" + config_path = f"/home/{user}/{CONFIG_MANAGER_INSTALL_CONFIG}" + + return ( + f"sudo NO_COLOR=1 {kube} uv run" + f" --directory {CONFIG_MANAGER_REMOTE_DIR}" + f" --project {CONFIG_MANAGER_REMOTE_DIR}/installer" + f" nv-config-manager-installer deploy {config_path}" + f" --chart-dir {CONFIG_MANAGER_REMOTE_DIR}/deploy/helm" + f" --kind-cluster {CONFIG_MANAGER_KIND_CLUSTER}" + f" --install-envoy-gateway --install-cert-manager --install-cnpg-operator" + f" --image-source local --build-images --load-kind" + ) diff --git a/installer/src/nv_config_manager_installer/air_sim/models.py b/installer/src/nv_config_manager_installer/air_sim/models.py new file mode 100644 index 0000000..3dcd3cb --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/models.py @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Data classes for nvcm-air-simulation.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from nv_config_manager_installer.air_sim.constants import ( + DEFAULT_NVCM_SERVER_NAME, + NVCM_SERVER_CPU, + NVCM_SERVER_MEMORY, + NVCM_SERVER_OS, + NVCM_SERVER_STORAGE, +) + + +@dataclass +class DeviceInfo: + """Information about a device from the site export.""" + + name: str + platform: str + role: str + model: str + firmware_version: str + interfaces: list[str] = field(default_factory=list) + interface_macs: dict[str, str] = field(default_factory=dict) + serial: str = "" + nvcm_enabled: bool = False + + +@dataclass +class CableConnection: + """A cable connection between two interfaces.""" + + source_device: str + source_interface: str + dest_device: str + dest_interface: str + + +@dataclass +class NVCMServerConfig: + """Configuration for the NVCM server in the simulation. + + Can either: + 1. Use an existing server from the simulation (use_existing_server=name) + 2. Create a new node and attach it to a switch + """ + + # Option 1: Use existing server from simulation (e.g., "oob-mgmt-server") + use_existing_server: str | None = None + + # Option 2: Create new node attached to a switch + attach_switch: str | None = None # Name of the switch to attach to + attach_interface: str | None = None # Interface on the switch to connect to + server_interface: str = "eth0" # Interface on the nvcm server for the connection + cpu: int = NVCM_SERVER_CPU + memory: int = NVCM_SERVER_MEMORY + storage: int = NVCM_SERVER_STORAGE + os: str = NVCM_SERVER_OS + + # Common settings + metallb_ip_range: str = "192.168.200.100-192.168.200.110" # MetalLB IP pool + nvcm_size: str = "small" # T-shirt size for NVCM deployment + + @property + def server_name(self) -> str: + """Get the server name (existing or new).""" + if self.use_existing_server: + return self.use_existing_server + return DEFAULT_NVCM_SERVER_NAME + + @property + def creates_new_node(self) -> bool: + """Whether this config creates a new node vs using existing.""" + return self.use_existing_server is None diff --git a/installer/src/nv_config_manager_installer/air_sim/orchestrator.py b/installer/src/nv_config_manager_installer/air_sim/orchestrator.py new file mode 100644 index 0000000..778a0ad --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/orchestrator.py @@ -0,0 +1,349 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Orchestrator for AIR simulation bringup with TUI/CLI callbacks.""" + +from __future__ import annotations + +import logging +import shutil +import tempfile +from collections.abc import Callable +from enum import Enum +from pathlib import Path +from typing import Protocol + +from nv_config_manager_installer.air_sim.cloud_init import generate_server_cloud_init +from nv_config_manager_installer.air_sim.constants import ( + CONFIG_MANAGER_INSTALL_CONFIG, + NVCM_BOX_PASSWORD, + NVCM_BOX_USER, +) +from nv_config_manager_installer.air_sim.context_topology import write_site_design_from_mock_context +from nv_config_manager_installer.air_sim.installer_config import ( + build_deploy_command, + generate_air_sim_install_yaml, +) +from nv_config_manager_installer.air_sim.models import NVCMServerConfig +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.air_sim.sim_manager import AirSimulationManager +from nv_config_manager_installer.air_sim.topology import ( + AirTopologyBuilder, + _create_version_override_yaml, + _resolve_oob_server_ips_from_topology, +) + + +class StepStatus(Enum): + PENDING = "pending" + RUNNING = "running" + SUCCESS = "success" + FAILED = "failed" + SKIPPED = "skipped" + + +STEPS: list[tuple[str, str]] = [ + ("parse-topology", "Resolve topology"), + ("create-sim", "Create AIR simulation"), + ("attach-cloud-init", "Attach cloud-init"), + ("start-sim", "Start simulation"), + ("create-ssh", "Create SSH service"), + ("wait-setup", "Wait for cloud-init"), + ("upload-files", "Upload installer config"), + ("run-deploy", "Run nvcm installer"), + ("post-deploy", "Post-deploy setup"), +] + + +class OrchestratorCallback(Protocol): + def on_step(self, step_id: str, status: StepStatus, message: str = "") -> None: ... + def on_log(self, line: str) -> None: ... + def on_ssh_ready(self, host: str, port: int) -> None: ... + def on_deploy_started(self, host: str, port: int) -> None: ... + def on_complete(self, success: bool, host: str = "", port: int = 0) -> None: ... + + +class _CallbackLogHandler(logging.Handler): + """Forwards log records to an on_log callback.""" + + def __init__(self, on_log: Callable[[str], None]) -> None: + super().__init__() + self._on_log = on_log + + def emit(self, record: logging.LogRecord) -> None: + try: + self._on_log(self.format(record)) + except Exception: + pass + + +class SimOrchestrator: + """Run the full AIR simulation bringup.""" + + def __init__(self, config: SimConfig, callback: OrchestratorCallback) -> None: + self._cfg = config + self._cb = callback + + def _log(self, msg: str) -> None: + self._cb.on_log(msg) + + def _step(self, step_id: str, status: StepStatus, message: str = "") -> None: + self._cb.on_step(step_id, status, message) + + def run(self) -> None: + handler = _CallbackLogHandler(self._log) + handler.setFormatter(logging.Formatter("%(asctime)s %(message)s", datefmt="%H:%M:%S")) + pkg_logger = logging.getLogger("nv_config_manager_installer.air_sim") + prev_level = pkg_logger.level + pkg_logger.setLevel(logging.DEBUG) + pkg_logger.addHandler(handler) + success = False + host = "" + port = 0 + try: + host, port = self._run_impl() + success = True + except Exception as exc: + self._log(f"[ERROR] {exc}") + finally: + pkg_logger.removeHandler(handler) + pkg_logger.setLevel(prev_level) + self._cb.on_complete(success, host, port) + + def _resolve_topology_path(self, cfg: SimConfig) -> str: + if cfg.topology_path: + return cfg.topology_path + if cfg.run_mock_topology_job: + return write_site_design_from_mock_context(cfg.mock_blueprint, cfg.deployment_name) + raise RuntimeError( + "topology_path is required when run_mock_topology_job is disabled. " + "Custom job flows must provide a direct AIR topology YAML." + ) + + def _run_impl(self) -> tuple[str, int]: + cfg = self._cfg + + self._step("parse-topology", StepStatus.RUNNING) + topology_path = self._resolve_topology_path(cfg) + if cfg.cumulus_version: + topology_path = _create_version_override_yaml(topology_path, cfg.cumulus_version) + + nvcm_server: NVCMServerConfig | None = None + if cfg.server_mode == "use-existing": + nvcm_server = NVCMServerConfig( + use_existing_server=cfg.oob_server_name, + nvcm_size=cfg.size, + ) + elif cfg.server_mode == "create-new" and cfg.attach_switch and cfg.attach_interface: + nvcm_server = NVCMServerConfig( + attach_switch=cfg.attach_switch, + attach_interface=cfg.attach_interface, + nvcm_size=cfg.size, + ) + + builder = AirTopologyBuilder( + yaml_path=topology_path, + simulation_name=cfg.simulation_name or None, + minimal_mode=False, + nvcm_server=nvcm_server, + ) + topology = builder.build_topology() + self._log( + f"Site: {builder.site_name} " + f"Devices: {len(builder.devices)} " + f"Nodes: {len(topology['nodes'])} " + f"Links: {len(topology['links'])}" + ) + self._step("parse-topology", StepStatus.SUCCESS) + + derived_ip, derived_gw = _resolve_oob_server_ips_from_topology( + builder.site_design, cfg.oob_server_name + ) + internal_ip = derived_ip + oob_gateway = derived_gw + lb_allowed = builder.lb_allowed_prefixes + bgp_asn = builder.resolve_device_bgp_asn(cfg.oob_server_name) or "4266000000" + + self._step("create-sim", StepStatus.RUNNING) + manager = AirSimulationManager( + ngc_api_key=cfg.ngc_api_key, + use_internal=cfg.use_internal, + org_id=cfg.org_id, + ) + simulation_id = manager.create_simulation(builder.simulation_name, topology) + self._log(f"Simulation: {simulation_id}") + if nvcm_server: + manager.prepare_nvcm_server(simulation_id, nvcm_server.server_name) + self._step("create-sim", StepStatus.SUCCESS) + + internal_mac = "" + full_setup = bool(cfg.config_manager_repo) + + if cfg.auto_configure: + self._step("attach-cloud-init", StepStatus.RUNNING) + server_dev = builder.devices.get(cfg.oob_server_name) + if not server_dev: + raise RuntimeError(f"Server {cfg.oob_server_name!r} not in topology") + internal_mac = server_dev.interface_macs.get("eth1", "") + if not internal_mac: + raise RuntimeError( + f"No MAC for {cfg.oob_server_name}:eth1; add mac_address to the topology" + ) + + cloud_init = generate_server_cloud_init( + internal_mac=internal_mac, + git_token=cfg.git_token, + config_manager_repo=cfg.config_manager_repo, + config_manager_ref=cfg.config_manager_ref, + deploy_size=cfg.size, + internal_ip=internal_ip, + site_name=builder.site_name, + oob_gateway=oob_gateway, + lb_allowed_prefixes=",".join(lb_allowed), + relay_return_networks=" ".join(builder.relay_return_prefixes), + bgp_asn=bgp_asn, + ) + manager.attach_cloud_init(simulation_id, cfg.oob_server_name, cloud_init) + self._log( + "Cloud-init attached " + ("(full setup)" if full_setup else "(minimal setup)") + ) + self._step("attach-cloud-init", StepStatus.SUCCESS) + else: + self._step("attach-cloud-init", StepStatus.SKIPPED) + + if not cfg.no_aggressive_dhcp: + cumulus_names = [d.name for d in builder.devices.values() if "Cumulus" in d.platform] + if cumulus_names: + manager.attach_dhclient_tuning(simulation_id, cumulus_names) + self._log(f"Aggressive DHCP attached to {len(cumulus_names)} switch(es)") + + self._step("start-sim", StepStatus.RUNNING) + self._log("Waiting for simulation to boot (this may take several minutes)...") + manager.start_simulation(simulation_id, wait=True) + self._log(f"Simulation running: {simulation_id}") + self._step("start-sim", StepStatus.SUCCESS) + + if not cfg.auto_configure: + for step_id in ( + "create-ssh", + "wait-setup", + "upload-files", + "run-deploy", + "post-deploy", + ): + self._step(step_id, StepStatus.SKIPPED) + return "", 0 + + if not shutil.which("sshpass"): + raise RuntimeError( + "sshpass not found; required for SSH automation. Install sshpass and retry." + ) + + self._step("create-ssh", StepStatus.RUNNING) + ssh_info = manager.create_ssh_service(simulation_id, cfg.oob_server_name, "eth0") + if not ssh_info: + raise RuntimeError(f"Could not create SSH service for {cfg.oob_server_name}:eth0") + host, port = ssh_info + self._log(f"SSH ready: {NVCM_BOX_USER}@{host}:{port}") + self._step("create-ssh", StepStatus.SUCCESS) + self._cb.on_ssh_ready(host, port) + + if not full_setup or cfg.wait_timeout == 0: + for step_id in ("wait-setup", "upload-files", "run-deploy", "post-deploy"): + self._step(step_id, StepStatus.SKIPPED) + self._log( + f"\nMonitor setup: sshpass -p {NVCM_BOX_PASSWORD} ssh -p {port} " + f"{NVCM_BOX_USER}@{host} 'sudo tail -f /var/log/nvcm-setup.log'" + ) + return host, port + + self._step("wait-setup", StepStatus.RUNNING) + setup_ok = manager.wait_for_cloud_init(host, port, timeout=cfg.wait_timeout) + if not setup_ok: + self._step("wait-setup", StepStatus.FAILED) + for step_id in ("upload-files", "run-deploy", "post-deploy"): + self._step(step_id, StepStatus.SKIPPED) + self._log( + f"\nSetup timed out. Check: sshpass -p {NVCM_BOX_PASSWORD} ssh -p {port} " + f"{NVCM_BOX_USER}@{host} 'sudo tail -f /var/log/nvcm-setup.log'" + ) + return host, port + self._step("wait-setup", StepStatus.SUCCESS) + + self._step("upload-files", StepStatus.RUNNING) + install_yaml = generate_air_sim_install_yaml( + cfg, + site_name=builder.site_name, + lb_allowed_prefixes=lb_allowed, + ) + with tempfile.NamedTemporaryFile( + mode="w", suffix=".yaml", prefix="nv-config-manager-install-", delete=False + ) as tmp: + tmp.write(install_yaml) + tmp_path = tmp.name + + ok = manager.upload_to_server( + host, + port, + tmp_path, + f"/home/{NVCM_BOX_USER}/{CONFIG_MANAGER_INSTALL_CONFIG}", + ) + Path(tmp_path).unlink(missing_ok=True) + if not ok: + self._step("upload-files", StepStatus.FAILED) + raise RuntimeError(f"Failed to upload {CONFIG_MANAGER_INSTALL_CONFIG}") + self._log(f"Uploaded {CONFIG_MANAGER_INSTALL_CONFIG}") + self._step("upload-files", StepStatus.SUCCESS) + + if not cfg.deploy: + for step_id in ("run-deploy", "post-deploy"): + self._step(step_id, StepStatus.SKIPPED) + self._log(f"\nSetup done. SSH in and run:\n {build_deploy_command(cfg)}") + return host, port + + self._step("run-deploy", StepStatus.RUNNING) + self._cb.on_deploy_started(host, port) + deploy_cmd = build_deploy_command(cfg) + self._log(f"Running deploy command:\n {deploy_cmd}") + deploy_ok = manager.run_deploy(host, port, deploy_cmd, timeout=cfg.deploy_timeout) + if not deploy_ok: + self._step("run-deploy", StepStatus.FAILED) + raise RuntimeError("nv-config-manager-installer deploy failed") + self._step("run-deploy", StepStatus.SUCCESS) + + self._step("post-deploy", StepStatus.RUNNING) + manager.configure_etc_hosts(host, port) + resolved_iface = manager.resolve_iface_by_mac(host, port, internal_mac) + manager.configure_nat_rules( + host, + port, + oob_gateway=oob_gateway, + relay_return_networks=builder.relay_return_prefixes, + internal_iface=resolved_iface or "eth1", + ) + manager.run_populate_ztp(host, port) + + cumulus_reset = [d.name for d in builder.devices.values() if "Cumulus" in d.platform] + manager.queue_render_all(host, port) + manager.wait_for_intended_configs(host, port, expected_total=len(cumulus_reset)) + manager.restart_dhcp_refresh(host, port) + + if not cfg.no_reset_before_dhcp and cumulus_reset: + manager.reset_cumulus_nodes(simulation_id, cumulus_reset) + + manager.create_nautobot_demo_user(host, port) + manager.ensure_temporal_search_attributes(host, port) + self._step("post-deploy", StepStatus.SUCCESS) + self._log(f"\nDone! {NVCM_BOX_USER}@{host}:{port}") + return host, port diff --git a/installer/src/nv_config_manager_installer/air_sim/prebuilt_configs.py b/installer/src/nv_config_manager_installer/air_sim/prebuilt_configs.py new file mode 100644 index 0000000..426f60c --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/prebuilt_configs.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pre-built TUI configurations for public demo workflows.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from nv_config_manager_installer.air_sim.constants import ( + DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH, + DEFAULT_AIR_TRIAL_CONFIG, + DEFAULT_CONFIG_MANAGER_REPO, + DEFAULT_MOCK_TOPOLOGY_PATH, +) +from nv_config_manager_installer.air_sim.sim_config import SimConfig + + +@dataclass(frozen=True) +class PrebuiltConfig: + """A named configuration preset that can populate the TUI.""" + + id: str + label: str + description: str + path: Path | None = None + + +PREBUILT_CONFIGS: tuple[PrebuiltConfig, ...] = ( + PrebuiltConfig( + id="air-trial", + label="AIR free trial demo", + description="Resource-capped ZTP and multi-deploy demo for public AIR trial accounts.", + path=DEFAULT_AIR_TRIAL_CONFIG, + ), + PrebuiltConfig( + id="superpod", + label="SuperPOD demo", + description=( + "Two-rack public SuperPOD mockup built from mock_topology context " + "with dedicated demo templates." + ), + ), +) + + +def get_prebuilt_config(config_id: str) -> PrebuiltConfig | None: + """Return metadata for a pre-built config.""" + return next((config for config in PREBUILT_CONFIGS if config.id == config_id), None) + + +def load_prebuilt_config(config_id: str) -> SimConfig: + """Return a fresh SimConfig populated from a named preset.""" + preset = get_prebuilt_config(config_id) + if preset is None: + raise ValueError(f"Unknown pre-built config: {config_id}") + + if preset.path: + return SimConfig.from_yaml(preset.path) + + if preset.id == "superpod": + return SimConfig( + topology_path="", + mock_blueprint="air_superpod", + deployment_name="demo", + simulation_name="nv-config-manager-superpod-demo", + oob_server_name="oob-mgmt-server", + server_mode="use-existing", + auto_configure=True, + git_token="", + config_manager_repo=DEFAULT_CONFIG_MANAGER_REPO, + config_manager_ref="main", + cumulus_version="", + size="small", + deploy=True, + run_mock_topology_job=True, + mock_topology_path=str(DEFAULT_MOCK_TOPOLOGY_PATH), + template_plugin_paths=[str(DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH)], + use_internal=False, + ngc_api_key="", + ) + + raise ValueError(f"Pre-built config has no loader: {preset.id}") diff --git a/installer/src/nv_config_manager_installer/air_sim/proxy.py b/installer/src/nv_config_manager_installer/air_sim/proxy.py new file mode 100644 index 0000000..b67b7f2 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/proxy.py @@ -0,0 +1,198 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SOCKS proxy helpers for accessing NVCM services through AIR VMs.""" + +from __future__ import annotations + +import os +import platform +import subprocess +import time +from dataclasses import dataclass + +from nv_config_manager_installer.air_sim.constants import NVCM_BOX_PASSWORD, NVCM_BOX_USER + +SOCKS_PORT = 8080 +_NVCM_URL = "https://nautobot.nvcm.air" + +_SSH_OPTS = [ + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "-o", + "PreferredAuthentications=password", +] + +# Chromium-family executables to try, in preference order, per platform. +_CHROME_PATHS_DARWIN = ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", +) +_CHROME_PATHS_LINUX = ( + "google-chrome", + "google-chrome-stable", + "chromium-browser", + "chromium", + "brave-browser", + "microsoft-edge", +) +_CHROME_PATHS_WINDOWS = ( + r"C:\Program Files\Google\Chrome\Application\chrome.exe", + r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe", + r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe", +) + + +@dataclass +class ProxyInfo: + host: str + port: int + socks_port: int = SOCKS_PORT + + # ── Per-platform command strings ────────────────────────────────────────── + + def ssh_cmd_unix(self) -> str: + """SOCKS tunnel command for Linux / macOS (uses sshpass).""" + return ( + f"sshpass -p {NVCM_BOX_PASSWORD}" + f" ssh {' '.join(_SSH_OPTS)}" + f" -D {self.socks_port} -N -p {self.port}" + f" {NVCM_BOX_USER}@{self.host}" + ) + + def ssh_cmd_windows(self) -> str: + """SOCKS tunnel command for Windows (built-in OpenSSH, prompts for password).""" + return ( + f"ssh {' '.join(_SSH_OPTS)}" + f" -D {self.socks_port} -N -p {self.port}" + f" {NVCM_BOX_USER}@{self.host}" + ) + + def browser_cmd_unix(self) -> str: + return ( + f'chromium-browser --proxy-server="socks5://localhost:{self.socks_port}"' + f' --user-data-dir="/tmp/chrome-nvcm-proxy"' + f" --ignore-certificate-errors {_NVCM_URL}" + ) + + def browser_cmd_windows(self) -> str: + """PowerShell-compatible Chrome launch command using the & call operator.""" + p = self.socks_port + return ( + f"& 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'" + f" '--proxy-server=socks5://localhost:{p}'" + f" '--user-data-dir=%TEMP%\\chrome-nvcm-proxy'" + f" '--ignore-certificate-errors'" + f" '{_NVCM_URL}'" + ) + + def teleport_forward_cmd(self, remote_host: str) -> str: + """Teleport local-forward command to expose the SOCKS port on Windows. + + Run this on the Windows machine to forward local:{socks_port} through + the Teleport session to the Linux dev machine, where the SOCKS tunnel + to the AIR VM is already running. Then point your browser at + socks5://localhost:{socks_port}. + """ + return f"tsh ssh -L {self.socks_port}:localhost:{self.socks_port} {remote_host}" + + # ── Local launch ────────────────────────────────────────────────────────── + + def start_tunnel(self) -> subprocess.Popen[bytes] | None: + """Start the SOCKS tunnel in the background; returns the Popen or None on failure.""" + system = platform.system() + if system == "Windows": + cmd = [ + "ssh", + *_SSH_OPTS, + "-D", + str(self.socks_port), + "-N", + "-p", + str(self.port), + f"{NVCM_BOX_USER}@{self.host}", + ] + else: + cmd = [ + "sshpass", + "-p", + NVCM_BOX_PASSWORD, + "ssh", + *_SSH_OPTS, + "-D", + str(self.socks_port), + "-N", + "-p", + str(self.port), + f"{NVCM_BOX_USER}@{self.host}", + ] + try: + proc = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + time.sleep(1) + if proc.poll() is not None: + return None + return proc + except FileNotFoundError: + return None + + def launch_browser(self) -> bool: + """Launch a Chromium-family browser with the SOCKS proxy. Returns True on success.""" + system = platform.system() + proxy_arg = f"--proxy-server=socks5://localhost:{self.socks_port}" + user_data = r"%TEMP%\chrome-nvcm-proxy" if system == "Windows" else "/tmp/chrome-nvcm-proxy" + extra = ["--ignore-certificate-errors", _NVCM_URL] + + exe = _find_browser(system) + if not exe: + return False + + try: + subprocess.Popen( + [exe, proxy_arg, f"--user-data-dir={user_data}", *extra], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + return True + except Exception: + return False + + +def _find_browser(system: str) -> str | None: + if system == "Darwin": + for p in _CHROME_PATHS_DARWIN: + if os.path.isfile(p): + return p + elif system == "Windows": + for p in _CHROME_PATHS_WINDOWS: + if os.path.isfile(p): + return p + else: + for name in _CHROME_PATHS_LINUX: + result = subprocess.run(["which", name], capture_output=True, text=True) + if result.returncode == 0: + return result.stdout.strip() + return None diff --git a/installer/src/nv_config_manager_installer/air_sim/sim_config.py b/installer/src/nv_config_manager_installer/air_sim/sim_config.py new file mode 100644 index 0000000..5251a41 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/sim_config.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wizard state for AIR simulation deployment.""" + +from __future__ import annotations + +import dataclasses +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml + +from nv_config_manager_installer.air_sim.constants import ( + DEFAULT_AIR_ORG, + DEFAULT_CONFIG_MANAGER_REPO, + DEFAULT_MOCK_TOPOLOGY_PATH, +) + + +def _default_git_token() -> str: + """Return an optional generic Git token for private forks.""" + return os.environ.get("GIT_TOKEN", os.environ.get("GITHUB_TOKEN", "")) + + +def _default_path(path: Path) -> str: + return str(path) if path.exists() else "" + + +@dataclass +class SimConfig: + """Configuration for bringing up an NVCM AIR simulation.""" + + topology_path: str = "" + mock_blueprint: str = "air_superpod" + deployment_name: str = "demo" + simulation_name: str = "" + oob_server_name: str = "oob-mgmt-server" + + server_mode: str = "use-existing" + attach_switch: str = "" + attach_interface: str = "" + + auto_configure: bool = True + git_token: str = field(default_factory=_default_git_token) + config_manager_repo: str = DEFAULT_CONFIG_MANAGER_REPO + + size: str = "small" + config_manager_ref: str = "main" + cumulus_version: str = "" + deploy: bool = True + + run_mock_topology_job: bool = True + mock_topology_path: str = field( + default_factory=lambda: _default_path(DEFAULT_MOCK_TOPOLOGY_PATH) + ) + template_plugin_paths: list[str] = field(default_factory=list) + extra_job_paths: list[str] = field(default_factory=list) + extra_run_after_deploy: list[dict[str, Any]] = field(default_factory=list) + + use_internal: bool = False + org_id: str = DEFAULT_AIR_ORG + ngc_api_key: str = field(default_factory=lambda: os.environ.get("NGC_API_KEY", "")) + + wait_timeout: int = 1800 + deploy_timeout: int = 3600 + + no_aggressive_dhcp: bool = False + no_reset_before_dhcp: bool = False + + def to_yaml(self, path: Path) -> None: + """Persist config to a YAML file with 0600 permissions.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.safe_dump(dataclasses.asdict(self), f, default_flow_style=False, sort_keys=False) + path.chmod(0o600) + + @classmethod + def from_yaml(cls, path: Path) -> SimConfig: + """Load config from a YAML file, ignoring unknown keys.""" + with open(path) as f: + data = yaml.safe_load(f) or {} + known = {field.name for field in dataclasses.fields(cls)} + return cls(**{key: value for key, value in data.items() if key in known}) + + @classmethod + def load_or_default(cls, path: Path) -> SimConfig: + """Load from path if possible, otherwise return defaults.""" + if path.exists(): + try: + return cls.from_yaml(path) + except Exception: + pass + return cls() diff --git a/installer/src/nv_config_manager_installer/air_sim/sim_manager.py b/installer/src/nv_config_manager_installer/air_sim/sim_manager.py new file mode 100644 index 0000000..9768de7 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/sim_manager.py @@ -0,0 +1,2447 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""AIR simulation manager for nvcm-air-simulation.""" + +from __future__ import annotations + +import ipaddress +import logging +import os +import platform +import re +import select +import shlex +import subprocess +import threading +import time +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from air_sdk import AirApi +from air_sdk.endpoints.user_configs import UserConfig as UserConfigModel + +from nv_config_manager_installer.air_sim.constants import ( + AGGRESSIVE_DHCLIENT_CONF, + CONFIG_MANAGER_DHCP_DEPLOYMENT, + CONFIG_MANAGER_DHCP_REFRESH_DEPLOYMENT, + CONFIG_MANAGER_HOSTNAME, + CONFIG_MANAGER_NAMESPACE, + CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT, + CONFIG_MANAGER_RENDER_API_DEPLOYMENT, + CONFIG_MANAGER_TEMPORAL_FRONTEND_DEPLOYMENT, + CONFIG_MANAGER_TEMPORAL_WORKER_DEPLOYMENT, + CONFIG_MANAGER_ZTP_DEPLOYMENT, + DEFAULT_AIR_API_URL, + DEFAULT_AIR_INTERNAL_URL, + DEFAULT_AIR_ORG, + DEFAULT_CONFIG_MANAGER_REPO, + DEFAULT_NAUTOBOT_DEMO_PASSWORD, + DEFAULT_NAUTOBOT_DEMO_USERNAME, + NODE_EXPORTER_BASE_URL, + NODE_EXPORTER_SHA256, + NODE_EXPORTER_VERSION, + NVCM_BOX_PASSWORD, + NVCM_BOX_USER, + NVCM_KIND_CONFIG, + NVCM_NETWORK_SECRETS, + NVCM_SECRETS, + NVCM_SERVER_SETUP_SCRIPT, +) +from nv_config_manager_installer.air_sim.models import NVCMServerConfig + +LOG = logging.getLogger(__name__) +_ANSI_ESCAPE = re.compile(r"\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + +_NAUTOBOT_PROVISIONING_NBSHELL = ( + "from nautobot.dcim.models import Device;" + "qs=Device.objects.filter(platform__name='Cumulus Linux');" + "total=qs.count();" + "status_field=Device._meta.get_field('status');" + "status_lookup='status__name' if getattr(status_field,'remote_field',None) else 'status';" + "status_filter={status_lookup:'Provisioned'};" + "prov=qs.filter(**status_filter).count();" + "pending=qs.exclude(**status_filter);" + "names=[d.name for d in pending.order_by('name')[:5]];" + "print(f'{prov}/{total}|{chr(44).join(names)}')" +) + +_NAUTOBOT_INTENDED_CONFIG_NBSHELL = ( + "from nv_config_manager.models import ConfigManagerDeviceStatus, IntendedConfig;" + "total=ConfigManagerDeviceStatus.objects.filter(render_enabled=True).count();" + "ready=IntendedConfig.objects.filter(device_id__render_enabled=True).count();" + "print(f'{ready}/{total}')" +) + + +class AirSimulationManager: + """Manage AIR simulations for e2e testing.""" + + def __init__( + self, + api_url: str | None = None, + ngc_api_key: str | None = None, + org_id: str | None = None, + use_internal: bool = False, + ) -> None: + """Initialize the AIR simulation manager. + + Args: + api_url: AIR API URL (auto-detected if not provided) + ngc_api_key: NGC API key (Starfleet API Key / SAK) for auth + org_id: AIR organization ID for the simulation + use_internal: Use internal AIR instance (api.air-inside.nvidia.com) + """ + self.api_url = api_url or ( + DEFAULT_AIR_INTERNAL_URL if use_internal else DEFAULT_AIR_API_URL + ) + + self.ngc_api_key = ngc_api_key or os.environ.get("NGC_API_KEY") + self.org_id = org_id or os.environ.get("AIR_ORG_ID", DEFAULT_AIR_ORG) + + if not self.ngc_api_key: + LOG.error("No NGC API key found. Set NGC_API_KEY env var or pass --ngc-api-key.") + raise ValueError("Missing NGC API key for AIR authentication") + + LOG.info("Authenticating with NGC API key (Bearer token)...") + self.client = AirApi.with_api_key( + api_key=self.ngc_api_key, + api_url=self.api_url, + ) + + def create_simulation( + self, + name: str, + topology: dict[str, Any], + ) -> str: + """Create a new AIR simulation. + + Args: + name: Simulation name + topology: AIR topology JSON + + Returns: + Simulation ID + """ + LOG.info(f"Creating simulation: {name}") + + simulation = self.client.simulations.import_from_data( + format="JSON", + content=topology, + name=name, + ) + + LOG.info(f"Created simulation: {simulation.id}") + return simulation.id + + # ------------------------------------------------------------------ + # Cloud-init UserConfig attach / cleanup + # ------------------------------------------------------------------ + + def attach_cloud_init( + self, + simulation_id: str, + node_name: str, + cloud_init_content: str, + ) -> None: + """Create a cloud-init UserConfig and attach it to a node. + + Uses a fixed, deterministic name so that repeated runs reuse + the same UserConfig rather than creating orphans. Must be + called *before* the simulation is started so that cloud-init + runs on first boot. + """ + target_node = None + for attempt in range(6): + for node in self.client.nodes.list(simulation=simulation_id): + if node.name == node_name: + target_node = node + break + if target_node: + break + LOG.info( + "Node '%s' not yet visible (attempt %d/6), retrying...", + node_name, + attempt + 1, + ) + time.sleep(5) + + if not target_node: + raise ValueError(f"Node '{node_name}' not found in simulation {simulation_id}") + + config_name = f"{node_name}-cloud-init" + + user_config = None + for cfg in self.client.user_configs.list(): + if getattr(cfg, "name", None) == config_name: + cfg.update(content=cloud_init_content) + user_config = cfg + LOG.info("Updated existing UserConfig '%s': %s", config_name, cfg.id) + break + + if user_config is None: + user_config = self.client.user_configs.create( + name=config_name, + kind=UserConfigModel.KIND_CLOUD_INIT_USER_DATA, + organization=self.org_id or None, + content=cloud_init_content, + ) + LOG.info("Created UserConfig '%s': %s", config_name, user_config.id) + + target_node.set_cloud_init_assignment({"user_data": user_config.id}) + LOG.info("Attached cloud-init to node '%s'", node_name) + + def prepare_nvcm_server(self, simulation_id: str, server_name: str) -> None: + """Ensure the nvcm server node has an eth0 outbound interface for SSH. + + The switch nodes are left unconfigured - they will get their configuration + via ZTP from the NVCM server running inside the simulation. + + Args: + simulation_id: ID of the simulation + server_name: Name of the server node (existing or newly created) + """ + LOG.info(f"Preparing {server_name} for external access...") + + target_node = None + for attempt in range(6): + for node in self.client.nodes.list(simulation=simulation_id): + if node.name == server_name: + target_node = node + break + if target_node: + break + LOG.info( + "Node '%s' not yet visible (attempt %d/6), retrying...", + server_name, + attempt + 1, + ) + time.sleep(5) + + if not target_node: + LOG.warning("Node '%s' not found; skipping OOB prep", server_name) + return + + has_eth0 = any( + iface.name == "eth0" for iface in self.client.interfaces.list(node=target_node) + ) + if not has_eth0: + LOG.info(f"Creating eth0 outbound interface for {server_name}") + self.client.interfaces.create( + name="eth0", + node=target_node, + interface_type="OOB_INTF", + link_up=True, + outbound=True, + ) + + def create_ssh_service( + self, + simulation_id: str, + node_name: str, + interface_name: str = "eth0", + ) -> tuple[str, int] | None: + """Create an SSH service for a node and return (host, port). + + Checks for an existing SSH service first to avoid duplicates. + + Args: + simulation_id: Simulation ID + node_name: Name of the node + interface_name: Interface to attach the service to + + Returns: + (host, port) tuple, or None if the node/interface wasn't found + """ + for node in self.client.nodes.list(simulation=simulation_id): + if node.name != node_name: + continue + target_iface = None + for iface in self.client.interfaces.list(node=node): + if iface.name == interface_name: + target_iface = iface + break + if not target_iface: + LOG.warning(f"Interface {interface_name} not found on {node_name}") + return None + + # Check for existing SSH service on this interface + existing = self.client.services.list(simulation=simulation_id) + for svc in existing: + if svc.interface.id == target_iface.id and svc.node_port == 22: + LOG.info( + f"SSH service already exists for " + f"{node_name}:{interface_name} " + f"-> {svc.worker_fqdn}:{svc.worker_port}" + ) + return (svc.worker_fqdn, svc.worker_port) + + # Create new SSH service + svc = self.client.services.create( + name=f"{node_name} SSH", + interface=target_iface, + node_port=22, + service_type="SSH", + ) + LOG.info( + f"Created SSH service for {node_name}:{interface_name} " + f"-> {svc.worker_fqdn}:{svc.worker_port}" + ) + return (svc.worker_fqdn, svc.worker_port) + + LOG.warning(f"Node '{node_name}' not found in simulation") + return None + + # ------------------------------------------------------------------ + # Server netplan via node instructions + # ------------------------------------------------------------------ + + def attach_server_netplan( + self, + simulation_id: str, + server_name: str, + ssh_mac: str, + ) -> None: + """Configure the SSH interface on the nvcm-box via node instructions. + + Uses the ``file`` executor to write a netplan config that matches + the outbound interface by MAC and enables DHCP. Runs before the + simulation is started; the AIR agent delivers the file on first + boot and then runs ``netplan apply``. + + Args: + simulation_id: Simulation ID. + server_name: Name of the oob-mgmt-server node. + ssh_mac: MAC address of the outbound (SSH) interface. + """ + netplan_content = ( + "network:\n" + " version: 2\n" + " ethernets:\n" + " oob-ssh:\n" + " match:\n" + f" macaddress: {ssh_mac.lower()}\n" + " dhcp4: true\n" + ) + + data_payload = { + "files": [ + { + "path": "/etc/netplan/99-oob-ssh.yaml", + "content": netplan_content, + } + ], + "post_commands": [ + "#!/bin/bash\nnetplan apply", + ], + } + + target_node = None + for attempt in range(6): + for node in self.client.nodes.list( + simulation=simulation_id, + ): + if node.name == server_name: + target_node = node + break + if target_node: + break + LOG.info( + "Node '%s' not yet visible (attempt %d/6)...", + server_name, + attempt + 1, + ) + time.sleep(5) + + if not target_node: + LOG.error( + "Node '%s' not found -- cannot attach netplan", + server_name, + ) + return + + target_node.instructions.create( + executor="file", + data=data_payload, + wait_for_network=False, + ) + LOG.info( + "Attached netplan instruction to %s (SSH MAC %s)", + server_name, + ssh_mac, + ) + + def resolve_iface_by_mac( + self, + host: str, + port: int, + mac: str, + ) -> str | None: + """Resolve an interface name by its MAC address via SSH.""" + ssh_base = self._ssh_cmd(host, port) + cmd = ssh_base + [f"ip -o link | grep -i '{mac}' | awk -F': ' '{{print $2}}' | head -1"] + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=30, + ) + iface = result.stdout.strip() + if iface: + LOG.info("Resolved MAC %s -> %s", mac, iface) + return iface + except Exception: + pass + LOG.warning("Could not resolve interface for MAC %s", mac) + return None + + def prepare_server( + self, + host: str, + port: int, + *, + internal_mac: str, + internal_ip: str, + site_name: str, + oob_gateway: str | None, + relay_return_networks: list[str] | None = None, + bgp_asn: str = "4266000000", + ) -> str | None: + """Configure the nvcm-box server for AIR after --setup completes. + + Runs via SSH after ``nvcm-box-setup.sh --setup`` finishes on boot. + Sets up everything the old cloud-init setup script used to do: + + 1. Internal interface IP + routes (resolved by MAC address) + 2. FRR/BGP config for OOB switch peering + 3. IP forwarding + MASQUERADE + 4. Node-exporter binary staging + manifest + populate-ztp helper + + Args: + host: SSH hostname (from AIR service). + port: SSH port. + internal_mac: MAC of the internal interface (oob-mgmt-switch). + internal_ip: IP/CIDR for the internal interface. + site_name: Site name for secrets.ini. + oob_gateway: Peer IP for BGP / next-hop for relay-return routes. + relay_return_networks: Prefixes that need return routes via OOB gw. + bgp_asn: BGP AS number for FRR. + + Returns: + Resolved internal interface name on success, ``None`` on failure. + """ + LOG.info("Preparing nvcm-box server via SSH...") + ssh_base = self._ssh_cmd(host, port) + gw = oob_gateway or "UNSET" + rr_nets = relay_return_networks or [] + ztp_url_host = internal_ip.split("/")[0] + user = NVCM_BOX_USER + + def _ssh(cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=timeout, + ) + + try: + # -- 0. Resolve internal interface name by MAC --------------------- + iface_name = _ssh( + f"ip -o link | grep -i '{internal_mac.lower()}'" + " | awk -F': ' '{print $2}' | head -1" + ).stdout.strip() + if not iface_name: + LOG.error( + "Could not find interface with MAC %s", + internal_mac, + ) + return None + LOG.info( + "Internal interface: %s (MAC %s)", + iface_name, + internal_mac, + ) + + # -- 1. Internal interface IP + routes ----------------------------- + LOG.info( + "Configuring %s internal network (%s)...", + iface_name, + internal_ip, + ) + _ssh(f"sudo ip addr add {internal_ip} dev {iface_name} 2>/dev/null || true") + _ssh(f"sudo ip link set {iface_name} up") + internal_network = str(ipaddress.ip_network(internal_ip, strict=False)) + _ssh(f"sudo ip route add {internal_network} dev {iface_name} 2>/dev/null || true") + for rr_net in rr_nets: + _ssh( + f"sudo ip route replace {rr_net} via {gw} dev {iface_name} 2>/dev/null || true" + ) + LOG.info(" %s configured", iface_name) + + # -- 2. FRR/BGP with password -------------------------------------- + LOG.info("Configuring FRR/BGP (ASN %s, neighbor %s)...", bgp_asn, gw) + bgp_password = NVCM_NETWORK_SECRETS["bgp_password"] + kind_subnet = _ssh( + "sudo docker network inspect kind" + " -f '{{range .IPAM.Config}}{{.Subnet}} {{end}}' 2>/dev/null" + " | grep -oE '([0-9]+\\.){3}[0-9]+/[0-9]+' | head -1" + ).stdout.strip() + kind_prefix = ".".join(kind_subnet.split(".")[:2]) if kind_subnet else "172.18" + frr_metallb_prefix = f"{kind_prefix}.255.0/24" + + bridge_id = _ssh( + "sudo docker network inspect kind -f '{{.Id}}' 2>/dev/null | cut -c1-12" + ).stdout.strip() + if bridge_id: + _ssh( + f"sudo ip route add {frr_metallb_prefix} dev br-{bridge_id} 2>/dev/null || true" + ) + + _ssh("sudo sed -i 's/^bgpd=no/bgpd=yes/' /etc/frr/daemons") + + frr_conf = ( + "frr version 10\n" + "frr defaults traditional\n" + "hostname nvcm-box\n" + "log syslog informational\n" + "service integrated-vtysh-config\n" + "!\n" + f"ip prefix-list PL-METALLB seq 10 permit {frr_metallb_prefix}\n" + "ip prefix-list PL-METALLB seq 9999 deny any\n" + "!\n" + "route-map RM-EXPORT permit 10\n" + " match ip address prefix-list PL-METALLB\n" + "route-map RM-EXPORT deny 9999\n" + "!\n" + f"router bgp {bgp_asn}\n" + f" bgp router-id {ztp_url_host}\n" + " no bgp ebgp-requires-policy\n" + f" neighbor {gw} remote-as external\n" + f" neighbor {gw} password {bgp_password}\n" + " !\n" + " address-family ipv4 unicast\n" + " redistribute kernel route-map RM-EXPORT\n" + f" neighbor {gw} route-map RM-EXPORT out\n" + " exit-address-family\n" + "!\n" + ) + _ssh(f"sudo tee /etc/frr/frr.conf > /dev/null << 'FRREOF'\n{frr_conf}FRREOF") + _ssh("sudo systemctl enable frr") + _ssh("sudo systemctl restart frr") + LOG.info(" FRR BGP configured, advertising %s", frr_metallb_prefix) + + # -- 3. IP forwarding + MASQUERADE --------------------------------- + LOG.info("Enabling IP forwarding and MASQUERADE...") + _ssh("sudo sysctl -w net.ipv4.ip_forward=1 > /dev/null") + _ssh( + "grep -q 'net.ipv4.ip_forward=1' /etc/sysctl.conf" + " || echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf > /dev/null" + ) + _ssh( + "sudo iptables -t nat -C POSTROUTING -d 172.18.0.0/16 -j MASQUERADE 2>/dev/null" + " || sudo iptables -t nat -A POSTROUTING -d 172.18.0.0/16 -j MASQUERADE" + ) + LOG.info(" Forwarding enabled") + + # -- 4. Node-exporter staging + ZTP helper ------------------------- + LOG.info("Staging node-exporter binaries for ZTP...") + ne_version = NODE_EXPORTER_VERSION + ne_dir = f"/home/{user}/ztp-files/node-exporter/{ne_version}" + ne_base = NODE_EXPORTER_BASE_URL + _ssh(f"mkdir -p {ne_dir}") + + for gh_arch, out_name in [ + ("amd64", "node_exporter_amd64"), + ("armv5", "node_exporter_armv5"), + ]: + tarball = f"node_exporter-{ne_version}.linux-{gh_arch}.tar.gz" + url = f"{ne_base}/v{ne_version}/{tarball}" + expected_sha = NODE_EXPORTER_SHA256[gh_arch] + _ssh( + f"curl -fsSL '{url}' -o /tmp/{tarball}" + f" && echo '{expected_sha} /tmp/{tarball}' | sha256sum -c -" + f" && tar -xzf /tmp/{tarball} -C /tmp" + f" 'node_exporter-{ne_version}.linux-{gh_arch}/node_exporter'" + f" && mv '/tmp/node_exporter-{ne_version}.linux-{gh_arch}/node_exporter'" + f" '{ne_dir}/{out_name}'" + f" && rm -rf /tmp/{tarball}" + f" '/tmp/node_exporter-{ne_version}.linux-{gh_arch}'", + timeout=120, + ) + _ssh(f"chmod +x {ne_dir}/*") + _ssh(f"sudo chown -R {user}:{user} /home/{user}/ztp-files") + + manifest_json = ( + '{"images": [' + f'{{"platform": "node-exporter", "version": "{NODE_EXPORTER_VERSION}",' + ' "filename": "node_exporter_amd64",' + f' "path": "node-exporter/{NODE_EXPORTER_VERSION}/node_exporter_amd64",' + f' "sha256": "{NODE_EXPORTER_SHA256["amd64"]}",' + ' "tags": {}},' + f'{{"platform": "node-exporter", "version": "{NODE_EXPORTER_VERSION}",' + ' "filename": "node_exporter_armv5",' + f' "path": "node-exporter/{NODE_EXPORTER_VERSION}/node_exporter_armv5",' + f' "sha256": "{NODE_EXPORTER_SHA256["armv5"]}",' + ' "tags": {}}' + "]}" + ) + _ssh(f"echo '{manifest_json}' | jq . > /home/{user}/ztp-files/manifest.json") + _ssh(f"chown {user}:{user} /home/{user}/ztp-files/manifest.json") + + populate_script = ( + "#!/bin/bash\n" + "set -euo pipefail\n" + "export KUBECONFIG=/home/nvcm/.kube/config\n" + "\n" + f'NAMESPACE="{CONFIG_MANAGER_NAMESPACE}"\n' + 'PVC_NAME="ztp-os-images"\n' + 'SRC_DIR="/home/nvcm/ztp-files"\n' + 'POD_NAME="populate-ztp-files-$(date +%s)"\n' + "\n" + 'echo "Creating temporary pod to populate ZTP PVC..."\n' + 'kubectl run "$POD_NAME" \\\n' + ' --namespace="$NAMESPACE" \\\n' + " --image=busybox:1.36 \\\n" + " --restart=Never \\\n" + " --overrides='{\n" + ' "spec": {\n' + ' "containers": [{\n' + ' "name": "populate",\n' + ' "image": "busybox:1.36",\n' + ' "command": ["sleep", "300"],\n' + ' "volumeMounts": [{\n' + ' "name": "ztp-files",\n' + ' "mountPath": "/images"\n' + " }]\n" + " }],\n" + ' "volumes": [{\n' + ' "name": "ztp-files",\n' + ' "persistentVolumeClaim": {\n' + ' "claimName": "' + "$PVC_NAME" + '"\n' + " }\n" + " }]\n" + " }\n" + " }'\n" + "\n" + 'echo "Waiting for pod..."\n' + 'kubectl wait --for=condition=Ready "pod/$POD_NAME" \\\n' + ' -n "$NAMESPACE" --timeout=120s\n' + "\n" + 'echo "Copying files to PVC..."\n' + 'cd "$SRC_DIR"\n' + "tar czf /tmp/ztp-files.tar.gz .\n" + "kubectl cp /tmp/ztp-files.tar.gz \\\n" + ' "$NAMESPACE/$POD_NAME:/tmp/ztp-files.tar.gz"\n' + 'kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\\n' + ' sh -c "cd /images && tar -xzf /tmp/ztp-files.tar.gz"\n' + 'kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\\n' + ' sh -c "chmod -R a+rX /images"\n' + "\n" + 'echo "Files in PVC:"\n' + 'kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \\\n' + " find /images -type f 2>/dev/null || true\n" + "\n" + 'echo "Cleaning up..."\n' + 'kubectl delete pod "$POD_NAME" -n "$NAMESPACE" --wait=false\n' + "rm -f /tmp/ztp-files.tar.gz\n" + "\n" + 'echo "ZTP PVC population complete"\n' + ) + _ssh(f"cat > /home/{user}/populate-ztp-files.sh << 'ZTPEOF'\n{populate_script}ZTPEOF") + _ssh(f"chmod +x /home/{user}/populate-ztp-files.sh") + _ssh(f"chown {user}:{user} /home/{user}/populate-ztp-files.sh") + LOG.info(" Node-exporter staged, populate-ztp-files.sh created") + + LOG.info("Server preparation complete (internal iface: %s)", iface_name) + return iface_name + + except Exception as exc: + LOG.error("Failed to prepare server: %s", exc) + return None + + # ------------------------------------------------------------------ + # Aggressive dhclient tuning for Cumulus switches + # ------------------------------------------------------------------ + + def attach_dhclient_tuning( + self, + simulation_id: str, + cumulus_device_names: list[str], + ) -> list[Any]: + """Push aggressive dhclient.conf onto Cumulus switches via node instructions. + + Uses the Air SDK ``file`` executor to overwrite + ``/etc/dhcp/dhclient.conf`` with shorter retry/timeout values so + switches acquire DHCP leases faster in simulations. + + Must be called *before* the simulation is started. + + Args: + simulation_id: Simulation ID. + cumulus_device_names: Names of Cumulus nodes to configure. + + Returns: + List of created NodeInstruction objects. + """ + target_names = set(cumulus_device_names) + instructions: list[Any] = [] + + data_payload = { + "files": [ + { + "path": "/etc/dhcp/dhclient.conf", + "content": AGGRESSIVE_DHCLIENT_CONF, + } + ], + "post_commands": [ + "#!/bin/bash\npkill -HUP dhclient 2>/dev/null || true", + ], + } + + for node in self.client.nodes.list(simulation=simulation_id): + if node.name not in target_names: + continue + + instr = node.instructions.create( + executor="file", + data=data_payload, + wait_for_network=False, + ) + instructions.append(instr) + LOG.debug(f"Created dhclient tuning instruction for {node.name}") + + LOG.info( + f"Attached dhclient tuning to {len(instructions)}/{len(target_names)} Cumulus switches" + ) + return instructions + + # ------------------------------------------------------------------ + # SSH poll + log tail for cloud-init progress + # ------------------------------------------------------------------ + + _SSH_OPTS = [ + "-o", + "ConnectTimeout=5", + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + "-o", + "LogLevel=ERROR", + ] + + @staticmethod + def _ssh_cmd( + host: str, + port: int, + command: str | None = None, + ) -> list[str]: + """Build an SSH command list with sshpass for password auth.""" + base = [ + "sshpass", + "-p", + NVCM_BOX_PASSWORD, + "ssh", + *AirSimulationManager._SSH_OPTS, + "-p", + str(port), + f"{NVCM_BOX_USER}@{host}", + ] + if command is not None: + base.append(command) + return base + + _SETUP_COMPLETE_MARKER = "NVCM AIR Setup Complete" + _DEPLOY_COMPLETE_MARKER = "Deployment completed successfully!" + + _SOCKS_PORT = 8080 + + def _ssh_run_and_tail( + self, + host: str, + port: int, + command: str, + *, + marker: str, + label: str = "oob-mgmt-server", + timeout: int = 3600, + ) -> bool: + """Run a command over SSH and stream its output locally. + + Watches for *marker* in the output to detect completion. + + Args: + host: SSH hostname. + port: SSH port. + command: Shell command to execute on the remote host. + marker: String that signals successful completion. + label: Prefix for each output line. + timeout: Max seconds to wait. + + Returns: + True if marker was seen, False on timeout or drop. + """ + deadline = time.monotonic() + timeout + ssh_base = self._ssh_cmd(host, port) + + try: + proc = subprocess.Popen( + [*ssh_base, command], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + + assert proc.stdout is not None + for line in proc.stdout: + line = _ANSI_ESCAPE.sub("", line).rstrip("\n").rstrip("\r") + LOG.info("[%s] %s", label, line) + + if marker in line: + LOG.info(f"\n{marker}") + proc.terminate() + proc.wait(timeout=5) + return True + + if time.monotonic() >= deadline: + LOG.warning( + "\nTimed out waiting for command to complete. Check the server manually." + ) + proc.terminate() + proc.wait(timeout=5) + return False + + rc = proc.wait(timeout=5) + if rc == 0: + LOG.info("\nCommand finished (exit 0).") + return True + LOG.warning(f"\nCommand exited with code {rc}. Check the server manually.") + return False + + except KeyboardInterrupt: + LOG.info("\nTailing interrupted.") + if proc.poll() is None: + proc.terminate() + proc.wait(timeout=5) + return False + + _RSYNC_EXCLUDES = [ + ".git", + "__pycache__", + ".venv", + "node_modules", + "*.pyc", + ".mypy_cache", + ".pytest_cache", + ] + + def upload_to_server( + self, + host: str, + port: int, + local_path: str, + remote_path: str, + *, + excludes: list[str] | None = None, + timeout: int = 300, + ) -> bool: + """Upload a local directory to the remote server via rsync. + + Args: + host: SSH hostname. + port: SSH port. + local_path: Local directory to upload. + remote_path: Destination path on the remote server. + excludes: Extra rsync exclude patterns (merged with defaults). + timeout: Max seconds for the transfer. + + Returns: + True if rsync succeeded, False otherwise. + """ + local_resolved = Path(local_path).resolve() + # rsync trailing slash means "copy contents of dir"; files must NOT have it + local = str(local_resolved) + ("/" if local_resolved.is_dir() else "") + + ssh_opts_flat = f"sshpass -p {NVCM_BOX_PASSWORD} ssh -p {port} " + " ".join( + f"{self._SSH_OPTS[i]} {self._SSH_OPTS[i + 1]}" for i in range(0, len(self._SSH_OPTS), 2) + ) + + all_excludes = list(self._RSYNC_EXCLUDES) + if excludes: + all_excludes.extend(excludes) + + cmd = [ + "rsync", + "-az", + "--delete", + "-e", + ssh_opts_flat, + ] + for exc in all_excludes: + cmd.extend(["--exclude", exc]) + + cmd.extend( + [ + local, + f"{NVCM_BOX_USER}@{host}:{remote_path}", + ] + ) + + LOG.info("Uploading %s -> %s:%s ...", local_path, host, remote_path) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + ) + if result.returncode != 0: + LOG.error("rsync failed (exit %d): %s", result.returncode, result.stderr) + return False + LOG.info("Upload complete: %s", remote_path) + return True + except subprocess.TimeoutExpired: + LOG.error("rsync timed out after %ds", timeout) + return False + + def run_deploy( + self, + host: str, + port: int, + deploy_cmd: str, + timeout: int = 3600, + horizontal: bool = False, + ) -> bool: + """Stream nv-config-manager-installer deploy output via SSH, return True on success. + + Args: + host: SSH hostname (from AIR service). + port: SSH port (from AIR service). + deploy_cmd: Full installer command string. + timeout: Max seconds to wait (default 60 min). + horizontal: Accepted for CLI compatibility; streaming here is plain text. + + Returns: + True if deploy completed successfully, False otherwise. + """ + LOG.info("Running installer (this may take 15-30 min)...") + return self._ssh_run_and_tail( + host, + port, + deploy_cmd, + marker=self._DEPLOY_COMPLETE_MARKER, + timeout=timeout, + ) + + # ------------------------------------------------------------------ + # Post-deploy helpers + # ------------------------------------------------------------------ + + _NVCM_HOSTS = ( + f"{CONFIG_MANAGER_HOSTNAME} nautobot.{CONFIG_MANAGER_HOSTNAME}" + f" render.{CONFIG_MANAGER_HOSTNAME}" + f" ztp.{CONFIG_MANAGER_HOSTNAME} dhcp.{CONFIG_MANAGER_HOSTNAME}" + f" workflow.{CONFIG_MANAGER_HOSTNAME}" + f" config-store.{CONFIG_MANAGER_HOSTNAME} temporal.{CONFIG_MANAGER_HOSTNAME}" + f" svc-workflow.{CONFIG_MANAGER_HOSTNAME}" + f" svc-config-store.{CONFIG_MANAGER_HOSTNAME}" + f" svc-render.{CONFIG_MANAGER_HOSTNAME} svc-ztp.{CONFIG_MANAGER_HOSTNAME}" + f" svc-dhcp.{CONFIG_MANAGER_HOSTNAME} svc-nautobot.{CONFIG_MANAGER_HOSTNAME}" + ) + + def configure_etc_hosts( + self, + host: str, + port: int, + ) -> bool: + """Add /etc/hosts entries pointing nvcm.air to the gateway MetalLB IP. + + The Envoy Gateway receives a MetalLB IP via L2 advertisement. + We discover that IP and write it into ``/etc/hosts`` so that + ``nvcm.air`` and its subdomains resolve on the server (used + by the SOCKS proxy for browser access). + """ + LOG.info("Configuring /etc/hosts for %s...", CONFIG_MANAGER_HOSTNAME) + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + try: + gateway_ip = ( + subprocess.run( + [ + *ssh_base, + f"{kube} kubectl get svc -n envoy-gateway-system" + " -l " + f"'gateway.envoyproxy.io/owning-gateway-namespace={CONFIG_MANAGER_NAMESPACE}'" + " -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}'" + " 2>/dev/null", + ], + capture_output=True, + text=True, + timeout=15, + ) + .stdout.strip() + .strip("'") + ) + + if not gateway_ip: + LOG.warning("Could not discover gateway MetalLB IP; falling back to 127.0.0.1") + gateway_ip = "127.0.0.1" + + hosts_line = f"{gateway_ip} {self._NVCM_HOSTS}" + add_cmd = ( + f"grep -q '{CONFIG_MANAGER_HOSTNAME}' /etc/hosts" + f" || echo '{hosts_line}'" + f" | sudo tee -a /etc/hosts > /dev/null" + ) + subprocess.run( + [*ssh_base, add_cmd], + capture_output=True, + timeout=15, + ) + LOG.info( + "Added /etc/hosts: %s -> %s (+ subdomains)", + gateway_ip, + CONFIG_MANAGER_HOSTNAME, + ) + return True + except Exception as exc: + LOG.warning("Failed to configure /etc/hosts: %s", exc) + return False + + _ZTP_LB_IP = "172.18.255.201" + _DHCP_LB_IP = "172.18.255.202" + + def configure_nat_rules( + self, + host: str, + port: int, + oob_gateway: str | None = None, + relay_return_networks: list[str] | None = None, + internal_iface: str = "eth1", + ) -> bool: + """Set up forwarding, routing, MASQUERADE, and isc-dhcp-relay. + + Mirrors ``nvcm-box-setup.sh configure_forwarding()`` (standard + mode) with AIR-specific additions for relay-return networks. + + 1. DOCKER-USER -- allow forwarding internal <-> Kind bridge + 2. ZTP DNAT -- TCP 80/443 from internal -> ZTP MetalLB IP + (needed before switches have BGP routes) + 3. MASQUERADE -- general to 172.18.0.0/16, with exemptions + for relay source IP (UDP 67) and per-rr_net + ZTP client IP preservation + 4. DHCP reply MASQUERADE -- per relay-return network + 5. Kind node routes -- relay-return prefixes via host + 6. Host route -- relay-return prefixes via OOB switch + 7. isc-dhcp-relay -- broadcast DHCP on internal -> Kea MetalLB IP + + Args: + internal_iface: Resolved name of the internal interface + (facing the oob-mgmt-switch). + """ + if oob_gateway is None: + LOG.warning("No OOB gateway provided; skipping NAT/routing setup") + return False + gw = oob_gateway + rr_nets = relay_return_networks or [] + LOG.info("Configuring forwarding rules and routing for DHCP/ZTP...") + ssh_base = self._ssh_cmd(host, port) + + kube = "KUBECONFIG=/home/nvcm/.kube/config" + + def _ssh(cmd: str, timeout: int = 15) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=timeout, + ) + + # Discover ZTP and DHCP MetalLB IPs + dhcp_ip = ( + _ssh( + f"{kube} kubectl get svc -n {CONFIG_MANAGER_NAMESPACE}" + f" {CONFIG_MANAGER_DHCP_DEPLOYMENT}-service" + " -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null" + ).stdout.strip() + or self._DHCP_LB_IP + ) + ztp_ip = ( + _ssh( + f"{kube} kubectl get svc -n {CONFIG_MANAGER_NAMESPACE}" + f" {CONFIG_MANAGER_ZTP_DEPLOYMENT}-service" + " -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null" + ).stdout.strip() + or self._ZTP_LB_IP + ) + LOG.info("ZTP service IP: %s", ztp_ip) + LOG.info("DHCP service IP: %s", dhcp_ip) + + try: + # -- 1. DOCKER-USER forwarding (internal <-> Kind bridge) ---------- + bridge_name = _ssh( + "sudo docker network inspect kind -f '{{.Id}}' 2>/dev/null | cut -c1-12" + ).stdout.strip() + br_iface = f"br-{bridge_name}" if bridge_name else "" + if br_iface: + for direction in [ + f"-i {internal_iface} -o {br_iface}", + f"-i {br_iface} -o {internal_iface}", + ]: + _ssh( + f"sudo iptables -C DOCKER-USER {direction} -j ACCEPT" + f" 2>/dev/null ||" + f" sudo iptables -I DOCKER-USER 1 {direction} -j ACCEPT" + ) + + # -- 2. ZTP DNAT (TCP 80/443 from internal iface) ----------------- + # Before switches have BGP routes to the MetalLB prefix, ZTP + # traffic arrives on the internal iface destined for the server IP. + # DNAT redirects it to the ZTP service. No DNAT for UDP 67 -- + # the relay handles DHCP and preserves giaddr. + _ssh("sudo iptables -t nat -N ZTP-FWD 2>/dev/null || true") + _ssh("sudo iptables -t nat -F ZTP-FWD") + _ssh( + f"sudo iptables -t nat -A ZTP-FWD -p tcp --dport 443" + f" -j DNAT --to-destination {ztp_ip}:443" + ) + _ssh( + f"sudo iptables -t nat -A ZTP-FWD -p tcp --dport 80" + f" -j DNAT --to-destination {ztp_ip}:80" + ) + _ssh( + f"sudo iptables -t nat -D PREROUTING -i {internal_iface}" + " -j ZTP-FWD 2>/dev/null || true" + ) + _ssh(f"sudo iptables -t nat -I PREROUTING 1 -i {internal_iface} -j ZTP-FWD") + + # -- 3. MASQUERADE exemptions (preserve client IP) ---------------- + for rr_net in rr_nets: + _ssh( + f"sudo iptables -t nat -C POSTROUTING -s {rr_net}" + f" -d {ztp_ip} -j RETURN 2>/dev/null ||" + f" sudo iptables -t nat -I POSTROUTING 1 -s {rr_net}" + f" -d {ztp_ip} -j RETURN" + ) + # DHCP: preserve relay source IP so Kea sees giaddr source + _ssh( + "sudo iptables -t nat -C POSTROUTING -d 172.18.0.0/16" + " -p udp --dport 67 -j RETURN 2>/dev/null ||" + " sudo iptables -t nat -I POSTROUTING -d 172.18.0.0/16" + " -p udp --dport 67 -j RETURN" + ) + # General MASQUERADE for all traffic to the Kind network + _ssh( + "sudo iptables -t nat -C POSTROUTING -d 172.18.0.0/16" + " -j MASQUERADE 2>/dev/null ||" + " sudo iptables -t nat -A POSTROUTING -d 172.18.0.0/16" + " -j MASQUERADE" + ) + + # -- 4. MASQUERADE for DHCP replies (pod -> relay-return nets) ----- + for rr_net in rr_nets: + _ssh( + f"sudo iptables -t nat -C POSTROUTING -d {rr_net}" + " -p udp --dport 67 -j MASQUERADE 2>/dev/null ||" + f" sudo iptables -t nat -I POSTROUTING -d {rr_net}" + " -p udp --dport 67 -j MASQUERADE" + ) + + # -- 5. Kind node routes (return path for DHCP/ZTP replies) ------- + kind_nodes = ( + _ssh( + "sudo docker ps --filter 'label=io.x-k8s.kind.cluster=nvcm'" + " --format '{{.Names}}'" + ) + .stdout.strip() + .splitlines() + ) + for node_name in kind_nodes: + if not node_name: + continue + for rr_net in rr_nets: + _ssh(f"sudo docker exec {node_name} ip route replace {rr_net} via 172.18.0.1") + + # -- 6. Host route (reply path out internal to OOB switch) --------- + for rr_net in rr_nets: + _ssh(f"sudo ip route replace {rr_net} via {gw} dev {internal_iface}") + + # -- 7. isc-dhcp-relay (broadcast DHCP -> Kea) -------------------- + # Pre-installed on the nvcm-box image. The relay converts + # broadcast DHCP discovers into unicast toward Kea, setting + # giaddr so Kea matches the correct subnet. + if br_iface: + _ssh( + f'printf \'SERVERS="{dhcp_ip}"\\n' + f'INTERFACES="{internal_iface} {br_iface}"\\n' + f'OPTIONS=""\\n\'' + " | sudo tee /etc/default/isc-dhcp-relay" + ) + _ssh("sudo systemctl enable isc-dhcp-relay") + _ssh("sudo systemctl restart isc-dhcp-relay") + + # Flush stale DHCP conntrack + _ssh("sudo conntrack -D -p udp --dport 67 2>/dev/null || true") + + rr_str = ", ".join(rr_nets) if rr_nets else "(none)" + ifc = internal_iface + LOG.info( + "Forwarding + routing configured:" + "\n DOCKER-USER: %s <-> Kind bridge (ACCEPT)" + "\n ZTP DNAT: %s TCP 80/443 -> %s" + "\n MASQUERADE skip: -s %s -d %s (ZTP client IP preserved)" + "\n MASQUERADE skip: -d 172.18.0.0/16 UDP 67 (relay source)" + "\n MASQUERADE: -d 172.18.0.0/16 (general Kind traffic)" + "\n MASQUERADE: -d %s UDP 67 (DHCP reply)" + "\n Kind routes: %s via 172.18.0.1" + "\n Host route: %s via %s dev %s" + "\n isc-dhcp-relay: %s + %s -> %s", + ifc, + ifc, + ztp_ip, + rr_str, + ztp_ip, + rr_str, + rr_str, + rr_str, + gw, + ifc, + ifc, + br_iface or "(no bridge)", + dhcp_ip, + ) + return True + + except Exception as exc: + LOG.warning("Failed to configure forwarding/routing: %s", exc) + return False + + def run_populate_ztp( + self, + host: str, + port: int, + timeout: int = 300, + ) -> bool: + """Run ~/populate-ztp-files.sh on the remote server.""" + LOG.info("Running populate-ztp-files.sh to stage node-exporter in ZTP PVC...") + return self._ssh_run_and_tail( + host, + port, + "sudo /home/nvcm/populate-ztp-files.sh", + marker="ZTP PVC population complete", + label="populate-ztp", + timeout=timeout, + ) + + def queue_render_all( + self, + host: str, + port: int, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_RENDER_API_DEPLOYMENT, + timeout: int = 90, + ) -> bool: + """Queue renders for every render-enabled Config Manager device.""" + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + payload = b'{"commit_message":"AIR demo render"}' + python_code = ( + "import urllib.request;" + "req=urllib.request.Request(" + "'http://127.0.0.1:9000/v1/render/all'," + f"data={payload!r}," + "headers={'Content-Type':'application/json','X-Forwarded-User':'admin'}," + "method='POST');" + "print(urllib.request.urlopen(req, timeout=60).read().decode())" + ) + cmd = ( + f"{kube} kubectl exec -n {namespace} deployment/{deployment} -- " + f"python -c {shlex.quote(python_code)}" + ) + try: + LOG.info("Queueing render-all for Config Manager devices...") + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=timeout, + ) + output = (result.stdout or result.stderr or "").strip() + if result.returncode != 0: + LOG.warning("Failed to queue render-all: %s", output) + return False + if output: + LOG.info("[render-all] %s", output.splitlines()[-1]) + return True + except subprocess.TimeoutExpired: + LOG.warning("Timed out queueing render-all") + return False + except Exception as exc: + LOG.warning("Failed to queue render-all: %s", exc) + return False + + def wait_for_intended_configs( + self, + host: str, + port: int, + expected_total: int | None = None, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT, + timeout: int = 180, + interval: int = 10, + ) -> bool: + """Wait until render-enabled devices have intended config records.""" + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + cmd = ( + f"{kube} kubectl exec -i -n {namespace} deployment/{deployment}" + f" -- nautobot-server nbshell --command " + f"{shlex.quote(_NAUTOBOT_INTENDED_CONFIG_NBSHELL)}" + ) + deadline = time.time() + timeout + last_counts = "0/0" + while time.time() < deadline: + try: + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0: + for line in result.stdout.strip().splitlines(): + clean = line.strip() + if not re.match(r"^\d+/\d+$", clean): + continue + ready_str, _, total_str = clean.partition("/") + ready = int(ready_str) + total = int(total_str) + last_counts = clean + target = expected_total if expected_total is not None else total + if target and ready >= target: + LOG.info("Intended configs ready: %s", clean) + return True + break + except Exception: + pass + time.sleep(interval) + + LOG.warning("Timed out waiting for intended configs; last count: %s", last_counts) + return False + + def create_nautobot_demo_user( + self, + host: str, + port: int, + username: str = DEFAULT_NAUTOBOT_DEMO_USERNAME, + password: str = DEFAULT_NAUTOBOT_DEMO_PASSWORD, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT, + timeout: int = 60, + ) -> bool: + """Create or update demo/demo user in Nautobot via kubectl exec and nbshell.""" + ssh_base = self._ssh_cmd(host, port) + script = ( + "from django.contrib.auth import get_user_model\n" + "User = get_user_model()\n" + f"u, created = User.objects.get_or_create(\n" + f" username={repr(username)},\n" + f' defaults={{"email": "{username}@localhost"}},\n' + ")\n" + f"u.set_password({repr(password)})\n" + "u.is_superuser = True\n" + "u.is_staff = True\n" + "u.save()\n" + f'print("Created user {username}" if created else "Updated password for {username}")\n' + ) + heredoc_end = "NAUTOBOT_DEMO_END" + cmd_create = f"cat > /tmp/demo_user.py << '{heredoc_end}'\n{script}\n{heredoc_end}" + kube = "KUBECONFIG=/home/nvcm/.kube/config" + cmd_run = ( + f"{kube} kubectl exec -i -n {namespace} deployment/{deployment} -- " + "nautobot-server nbshell < /tmp/demo_user.py" + ) + try: + LOG.info("Creating Nautobot user %s via nbshell...", username) + r1 = subprocess.run( + [*ssh_base, cmd_create], + capture_output=True, + text=True, + timeout=15, + ) + if r1.returncode != 0: + LOG.warning("Failed to write demo user script: %s", r1.stderr or r1.stdout) + return False + r2 = subprocess.run( + [*ssh_base, cmd_run], + capture_output=True, + text=True, + timeout=timeout, + ) + if r2.returncode != 0: + LOG.warning("Failed to create demo user: %s", r2.stderr or r2.stdout) + return False + for line in (r2.stdout or "").strip().splitlines(): + LOG.info("[nautobot-demo] %s", line) + return True + except subprocess.TimeoutExpired: + LOG.warning("Nautobot nbshell timed out") + return False + except Exception as exc: + LOG.warning("Failed to create Nautobot demo user: %s", exc) + return False + + def reset_cumulus_nodes( + self, + simulation_id: str, + cumulus_device_names: list[str], + ) -> int: + """Reset all Cumulus switch nodes so they restart ZTP/DHCP from scratch. + + On Cumulus 5.14+, ZTP polls DHCP for only 5 minutes after boot. + If the DHCP server wasn't reachable in that window (common for + switches far from the relay, e.g. tan/cin tiers), the switch stops + requesting DHCP entirely. Resetting the node forces a fresh boot + and a new ZTP cycle when the DHCP infrastructure is actually ready. + + This is fire-and-forget — device state is monitored via Nautobot, + not by polling the Air API. + + Returns: + Number of nodes that were successfully sent a reset request. + """ + target_names = set(cumulus_device_names) + if not target_names: + return 0 + + nodes_to_reset = [ + node + for node in self.client.nodes.list(simulation=simulation_id) + if node.name in target_names + ] + + if not nodes_to_reset: + LOG.warning("No matching Cumulus nodes found to reset") + return 0 + + LOG.info( + "Resetting %d Cumulus node(s) to force fresh ZTP/DHCP cycle: %s", + len(nodes_to_reset), + ", ".join(n.name for n in nodes_to_reset), + ) + reset_count = 0 + for node in nodes_to_reset: + try: + node.reset() + reset_count += 1 + LOG.debug("Reset requested for %s", node.name) + except Exception as exc: + LOG.warning("Failed to reset node %s: %s", node.name, exc) + + LOG.info( + "Reset requested for %d/%d node(s); monitor device state in Nautobot", + reset_count, + len(nodes_to_reset), + ) + return reset_count + + def restart_dhcp_refresh( + self, + host: str, + port: int, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_DHCP_REFRESH_DEPLOYMENT, + ) -> bool: + """Restart the DHCP refresh deployment so it syncs config immediately. + + After deployment finishes, the DHCP refresh CronJob/deployment may + not run for up to 5 minutes. Restarting forces an immediate sync + of DHCP configuration from Nautobot data. + """ + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + cmd = ( + f"{kube} kubectl rollout restart deployment/{deployment} -n {namespace}" + f" && {kube} kubectl rollout status deployment/{deployment} -n {namespace}" + " --timeout=120s" + ) + try: + LOG.info("Restarting %s to force immediate DHCP config sync...", deployment) + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=150, + ) + if result.returncode != 0: + LOG.warning( + "Failed to restart %s: %s", + deployment, + result.stderr or result.stdout, + ) + return False + LOG.info("Restarted %s and rollout completed", deployment) + return True + except subprocess.TimeoutExpired: + LOG.warning("Timed out restarting %s or waiting for rollout", deployment) + return False + except Exception as exc: + LOG.warning("Failed to restart %s: %s", deployment, exc) + return False + + _TEMPORAL_SEARCH_ATTRIBUTES = { + "User": "Keyword", + "DeviceID": "Keyword", + "DeviceRole": "Keyword", + "DeviceName": "Text", + "DevicePlatform": "Keyword", + "Site": "Text", + "ReadRoles": "KeywordList", + "ExecuteRoles": "KeywordList", + } + + def ensure_temporal_search_attributes( + self, + host: str, + port: int, + *, + namespace: str = CONFIG_MANAGER_NAMESPACE, + max_attempts: int = 3, + wait_between: int = 30, + ) -> bool: + """Verify Temporal search attributes exist; restart the worker if not. + + The ``temporal-setup`` init container on the worker pod registers + custom search attributes on first boot. When the Temporal DB + isn't ready yet, the init container can silently fail, leaving the + worker running without attributes like ``ReadRoles`` — which then + causes RPCErrors at workflow time. + + This method: + 1. Lists search attributes on the Temporal frontend pod. + 2. If any expected attribute is missing, restarts the worker + deployment (so the ``temporal-setup`` init container re-runs). + 3. Re-checks after the rollout, up to *max_attempts* times. + + Returns True if all attributes are present (or became present + after a restart), False otherwise. + """ + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + + def _ssh(cmd: str, timeout: int = 30) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=timeout, + ) + + def _check_attributes() -> list[str]: + """Return list of missing attribute names.""" + result = _ssh( + f"{kube} kubectl exec -n {namespace}" + f" deploy/{CONFIG_MANAGER_TEMPORAL_FRONTEND_DEPLOYMENT} --" + " temporal operator search-attribute list" + " --address localhost:7233 2>/dev/null" + ) + output = result.stdout or "" + missing = [] + for attr_name in self._TEMPORAL_SEARCH_ATTRIBUTES: + if attr_name not in output: + missing.append(attr_name) + return missing + + LOG.info("Checking Temporal search attributes...") + + for attempt in range(1, max_attempts + 1): + try: + missing = _check_attributes() + except (subprocess.TimeoutExpired, Exception) as exc: + LOG.warning( + "Could not check Temporal search attributes (attempt %d/%d): %s", + attempt, + max_attempts, + exc, + ) + if attempt < max_attempts: + time.sleep(wait_between) + continue + + if not missing: + LOG.info("All Temporal search attributes are registered") + return True + + LOG.warning( + "Missing Temporal search attributes (attempt %d/%d): %s", + attempt, + max_attempts, + ", ".join(missing), + ) + + if attempt < max_attempts: + LOG.info( + "Restarting %s so temporal-setup init container re-runs...", + CONFIG_MANAGER_TEMPORAL_WORKER_DEPLOYMENT, + ) + _ssh( + f"{kube} kubectl rollout restart" + f" deployment/{CONFIG_MANAGER_TEMPORAL_WORKER_DEPLOYMENT}" + f" -n {namespace}" + ) + LOG.info( + "Waiting %ds for worker rollout to complete...", + wait_between, + ) + _ssh( + f"{kube} kubectl rollout status" + f" deployment/{CONFIG_MANAGER_TEMPORAL_WORKER_DEPLOYMENT} -n {namespace}" + f" --timeout={wait_between}s 2>/dev/null || true", + timeout=wait_between + 15, + ) + time.sleep(10) + + LOG.warning( + "Temporal search attributes still missing after %d attempts. " + "You may need to register them manually — see docs.", + max_attempts, + ) + return False + + def print_socks_instructions( + self, + host: str, + port: int, + *, + open_browser: bool = False, + ) -> None: + """Print SOCKS proxy instructions and optionally launch Chrome.""" + socks_port = self._SOCKS_PORT + ssh_cmd = ( + f"sshpass -p {NVCM_BOX_PASSWORD}" + f" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + f" -D {socks_port} -N -p {port}" + f" {NVCM_BOX_USER}@{host}" + ) + chrome_cmd = ( + "/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome" + f' --proxy-server="socks5://localhost:{socks_port}"' + f' --user-data-dir="/tmp/chrome-nvcm-proxy"' + " --ignore-certificate-errors" + f" https://nautobot.{CONFIG_MANAGER_HOSTNAME}" + ) + + LOG.info( + "\n=== Access NVCM UI ===" + "\n\nTerminal 1 (SOCKS proxy):" + "\n %s" + "\n\nTerminal 2 (Chrome with proxy):" + "\n %s" + "\n\nDNS resolution happens through the proxy, " + "so %s resolves on the oob-mgmt-server.", + ssh_cmd, + chrome_cmd, + CONFIG_MANAGER_HOSTNAME, + ) + + if open_browser: + if platform.system() == "Darwin": + # Start SOCKS tunnel in background so Chrome can use it + try: + tunnel_proc = subprocess.Popen( + [ + "sshpass", + "-p", + NVCM_BOX_PASSWORD, + "ssh", + *self._SSH_OPTS, + "-D", + str(socks_port), + "-N", + "-p", + str(port), + f"{NVCM_BOX_USER}@{host}", + ], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + time.sleep(2) # Let tunnel establish + if tunnel_proc.poll() is not None: + LOG.warning( + "SOCKS tunnel exited immediately. Start it manually: %s", + ssh_cmd, + ) + else: + LOG.info( + "SOCKS tunnel started in background (PID %s). Stop with: kill %s", + tunnel_proc.pid, + tunnel_proc.pid, + ) + except Exception as e: + LOG.warning("Could not start SOCKS tunnel: %s", e) + LOG.info("Start it manually in another terminal: %s", ssh_cmd) + + # Find Chrome/Chromium; path has spaces so pass as list to Popen + _chrome_paths = ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + ) + chrome_exe = None + for p in _chrome_paths: + if os.path.isfile(p): + chrome_exe = p + break + if chrome_exe: + LOG.info("Launching browser with SOCKS proxy...") + try: + subprocess.Popen( + [ + chrome_exe, + f"--proxy-server=socks5://localhost:{socks_port}", + "--user-data-dir=/tmp/chrome-nvcm-proxy", + "--ignore-certificate-errors", + f"https://nautobot.{CONFIG_MANAGER_HOSTNAME}", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except Exception as e: + LOG.warning("Could not launch browser: %s", e) + else: + LOG.warning( + "No Chrome/Chromium found in Applications. " + "Use the commands above to connect." + ) + else: + LOG.info( + "Auto-launch is only supported on macOS. Use the commands above to connect." + ) + + def _poll_provisioning_status( + self, + host: str, + port: int, + expected_total: int, + status_ref: dict[str, int | str], + stop_event: threading.Event, + done_event: threading.Event, + interval: int = 30, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT, + ) -> None: + """Poll Nautobot for the number of Cumulus Linux devices with status Provisioned. + + Updates *status_ref* in-place with ``provisioned``, ``total``, and + ``detail`` keys. Sets *done_event* when provisioned == expected_total. + Runs in a background thread; exits when *stop_event* is set. + """ + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + nbshell_script = _NAUTOBOT_PROVISIONING_NBSHELL + cmd = ( + f"{kube} kubectl exec -i -n {namespace} deployment/{deployment}" + f' -- nautobot-server nbshell --command "{nbshell_script}"' + ) + + while not stop_event.wait(timeout=interval): + try: + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0: + for line in result.stdout.strip().splitlines(): + if "/" in line: + counts, _, remaining = line.partition("|") + prov_str, _, total_str = counts.partition("/") + prov = int(prov_str.strip()) + total = int(total_str.strip()) + status_ref["provisioned"] = prov + status_ref["total"] = total + if remaining.strip(): + status_ref["detail"] = remaining.strip() + else: + status_ref["detail"] = "" + if prov >= expected_total: + done_event.set() + return + break + except Exception: + pass + + def get_provisioning_status( + self, + host: str, + port: int, + namespace: str = CONFIG_MANAGER_NAMESPACE, + deployment: str = CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT, + ) -> tuple[int, int, list[str]]: + """Return (provisioned, total, remaining[:5]) from Nautobot, or (0,0,[]) on error.""" + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + nbshell_script = _NAUTOBOT_PROVISIONING_NBSHELL + cmd = ( + f"{kube} kubectl exec -i -n {namespace} deployment/{deployment}" + f' -- nautobot-server nbshell --command "{nbshell_script}"' + ) + try: + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0: + for line in result.stdout.strip().splitlines(): + if "/" in line: + counts, _, remaining = line.partition("|") + prov_str, _, total_str = counts.partition("/") + prov = int(prov_str.strip()) + total = int(total_str.strip()) + names = [n.strip() for n in remaining.split(",") if n.strip()] + return prov, total, names + except Exception: + pass + return 0, 0, [] + + def stream_nautobot_logs( + self, + host: str, + port: int, + on_line: Callable[[str], None], + stop_event: threading.Event, + on_pod_found: Callable[[], None] | None = None, + ) -> None: + """Tail the nautobot pod logs over SSH, retrying until the deployment exists. + + Runs until stop_event is set. + """ + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + cmd = ( + f"sudo {kube} kubectl logs -f -n {CONFIG_MANAGER_NAMESPACE}" + f" deployment/{CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT}" + " -c run-migrations --since=30m 2>&1" + ) + on_line(f"Waiting for {CONFIG_MANAGER_NAUTOBOT_DEPLOYMENT} pod to start...") + pod_found = False + while not stop_event.is_set(): + try: + proc = subprocess.Popen( + [*ssh_base, cmd], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + assert proc.stdout is not None + for line in proc.stdout: + if stop_event.is_set(): + break + clean = _ANSI_ESCAPE.sub("", line).rstrip("\n").rstrip("\r") + if not clean: + continue + # Suppress kubectl "Error from server" noise while the pod isn't up yet. + if not pod_found and clean.startswith("Error from server"): + continue + if not pod_found: + pod_found = True + if on_pod_found: + on_pod_found() + on_line(clean) + if proc.poll() is None: + proc.terminate() + proc.wait(timeout=5) + except Exception: + pass + if not stop_event.is_set(): + if not pod_found: + on_line(" Still waiting for namespace/pod... (retrying in 10s)") + else: + on_line("[nautobot] reconnecting in 10s...") + stop_event.wait(10) + + def get_pod_status( + self, host: str, port: int, namespace: str = CONFIG_MANAGER_NAMESPACE + ) -> list[dict[str, str]]: + """Return pod rows from kubectl get pods over SSH. + + Each dict has keys: name, ready, status, restarts, age. + Returns an empty list if SSH or kubectl fails. + """ + ssh_base = self._ssh_cmd(host, port) + kube = "KUBECONFIG=/home/nvcm/.kube/config" + cmd = f"sudo {kube} kubectl get pods -n {namespace} --no-headers 2>/dev/null" + try: + result = subprocess.run( + [*ssh_base, cmd], + capture_output=True, + text=True, + timeout=15, + ) + pods: list[dict[str, str]] = [] + for line in result.stdout.splitlines(): + parts = line.split() + if len(parts) >= 5: + pods.append( + { + "name": parts[0], + "ready": parts[1], + "status": parts[2], + "restarts": parts[3], + "age": parts[4], + } + ) + return pods + except Exception: + return [] + + def monitor_services( + self, + host: str, + port: int, + expected_devices: int = 0, + stop_event: threading.Event | None = None, + ) -> None: + """Tail DHCP and ZTP logs (interleaved plain text via LOG). + + - DHCP deployment logs filtered to DHCP4 + - ZTP deployment logs filtered to non-health + + When *expected_devices* > 0, polls Nautobot for provisioning status and + automatically exits once all devices are Provisioned. + When *stop_event* is provided, the loop exits when it is set. + """ + self._monitor_services_plain(host, port, expected_devices, stop_event) + + def _monitor_services_plain( + self, + host: str, + port: int, + expected_devices: int = 0, + stop_event: threading.Event | None = None, + ) -> None: + """Plain-text fallback for monitoring DHCP + ZTP logs.""" + ssh_base = self._ssh_cmd(host, port) + + dhcp_cmd = ( + "sudo KUBECONFIG=/home/nvcm/.kube/config" + f" kubectl logs -f deployment/{CONFIG_MANAGER_DHCP_DEPLOYMENT}" + f" -c kea -n {CONFIG_MANAGER_NAMESPACE} 2>&1" + " | grep --line-buffered DHCP4" + ) + ztp_cmd = ( + "sudo KUBECONFIG=/home/nvcm/.kube/config" + f" kubectl logs -f deployment/{CONFIG_MANAGER_ZTP_DEPLOYMENT}" + f" -c http-lb -n {CONFIG_MANAGER_NAMESPACE} 2>&1" + " | grep --line-buffered -v health" + ) + + outer_stop = stop_event + stop_event = threading.Event() + done_event = threading.Event() + status_ref: dict[str, int | str] = { + "provisioned": 0, + "total": expected_devices, + "detail": "", + } + + if expected_devices > 0: + poll_thread = threading.Thread( + target=self._poll_provisioning_status, + args=(host, port, expected_devices, status_ref, stop_event, done_event), + daemon=True, + ) + poll_thread.start() + + LOG.info( + "Monitoring DHCP + ZTP logs%s (Ctrl+C to stop)...\n", + f" — waiting for {expected_devices}/{expected_devices} Provisioned" + if expected_devices + else "", + ) + procs: list[subprocess.Popen[str]] = [] + try: + dhcp_proc = subprocess.Popen( + [*ssh_base, dhcp_cmd], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + procs.append(dhcp_proc) + ztp_proc = subprocess.Popen( + [*ssh_base, ztp_cmd], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + procs.append(ztp_proc) + + fds = { + dhcp_proc.stdout: "DHCP", + ztp_proc.stdout: "ZTP", + } + last_prov = -1 + while fds: + if done_event.is_set(): + break + if outer_stop is not None and outer_stop.is_set(): + break + readable, _, _ = select.select(list(fds.keys()), [], [], 1.0) + for fd in readable: + line = fd.readline() # type: ignore[union-attr] + if not line: + fds.pop(fd) + continue + label = fds[fd] + LOG.info("[%s] %s", label, line.rstrip()) + prov = status_ref["provisioned"] + if prov != last_prov and expected_devices: + LOG.info( + ">>> Provisioning progress: %d/%d devices Provisioned", + prov, + expected_devices, + ) + detail = status_ref.get("detail", "") + if detail: + LOG.info(" Waiting on: %s", detail) + last_prov = prov + except KeyboardInterrupt: + LOG.info("\nMonitoring stopped.") + finally: + stop_event.set() + for p in procs: + if p.poll() is None: + p.terminate() + p.wait(timeout=5) + + def wait_for_cloud_init( + self, + host: str, + port: int, + timeout: int = 1800, + ) -> bool: + """Wait for SSH and cloud-init to finish completely. + + Phase 1: poll SSH until reachable (AIR auto-configures eth0 DHCP). + Phase 2: poll ``cloud-init status`` until it reports ``done``. + This ensures the full setup script (Kind, repo clones, topology + copy, etc.) has completed before deployment is attempted. + + Args: + host: SSH hostname (from AIR service). + port: SSH port (from AIR service). + timeout: Max seconds to wait (default 30 min). 0 = skip. + + Returns: + True if setup completed, False if timed out or errored. + """ + if timeout <= 0: + return False + + deadline = time.monotonic() + timeout + ssh_base = self._ssh_cmd(host, port) + + # -- Phase 1: wait for SSH ---------------------------------------- + LOG.info(f"Waiting for SSH to become reachable on {host}:{port}...") + start = time.monotonic() + while time.monotonic() < deadline: + try: + result = subprocess.run( + [*ssh_base, "true"], + capture_output=True, + timeout=10, + ) + if result.returncode == 0: + elapsed = int(time.monotonic() - start) + LOG.info(f"SSH is reachable (after {elapsed}s)") + break + except subprocess.TimeoutExpired: + pass + + elapsed = int(time.monotonic() - start) + if elapsed % 30 < 10: + LOG.info(f" [{elapsed}s] Still waiting for SSH...") + time.sleep(10) + else: + LOG.warning("Timed out waiting for SSH. Log in manually to check status.") + return False + + # -- Phase 2: tail cloud-init output -------------------------------- + LOG.info("Tailing cloud-init output ...") + LOG.info("(Ctrl+C to stop tailing and continue)\n") + + try: + proc = subprocess.Popen( + [*ssh_base, "sudo", "tail", "-n", "+1", "-f", "/var/log/cloud-init-output.log"], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + + assert proc.stdout is not None + for line in proc.stdout: + line = line.rstrip("\n") + LOG.info("[oob-mgmt-server] %s", line) + + if self._SETUP_COMPLETE_MARKER in line: + LOG.info("\nCloud-init setup finished successfully.") + proc.terminate() + proc.wait(timeout=5) + return True + + if time.monotonic() >= deadline: + LOG.warning( + "\nTimed out waiting for setup to complete. Check the server manually." + ) + proc.terminate() + proc.wait(timeout=5) + return False + + LOG.warning( + "Log tail ended unexpectedly. SSH session " + "may have dropped -- check the server manually." + ) + return False + + except KeyboardInterrupt: + LOG.info("\nTailing interrupted. Setup may still be running on the server.") + if proc.poll() is None: + proc.terminate() + proc.wait(timeout=5) + return False + + def start_simulation(self, simulation_id: str, wait: bool = True) -> None: + """Start a simulation and optionally wait for it to load. + + Args: + simulation_id: ID of the simulation + wait: Wait for simulation to be fully loaded + """ + simulation = self.client.simulations.get(simulation_id) + + if simulation.state not in ["BOOTING", "ACTIVE"]: + LOG.info(f"Starting simulation {simulation_id}...") + simulation.start() + + if wait: + LOG.info("Waiting for simulation to become active...") + while simulation.state != "ACTIVE": + LOG.info(f" State: {simulation.state}") + time.sleep(10) + simulation = self.client.simulations.get(simulation_id) + + LOG.info("Simulation is active!") + + def get_nvcm_server_ssh_command(self, simulation_id: str, server_name: str) -> str | None: + """Get the SSH command to connect to the nvcm server node. + + Args: + simulation_id: ID of the simulation + server_name: Name of the server node + + Returns: + SSH command string, or None if server not found + """ + for node in self.client.nodes.list(simulation=simulation_id): + if node.name == server_name: + for iface in self.client.interfaces.list(node=node): + if iface.name == "eth0": + services = self.client.services.list(simulation=simulation_id) + for service in services: + if service.interface.id == iface.id and service.service_type == "SSH": + return f"ssh -p {service.worker_port} nvcm@{service.worker_fqdn}" + break + return None + + def setup_nvcm_server( + self, + simulation_id: str, + nvcm_config: NVCMServerConfig, + config_manager_repo: str = DEFAULT_CONFIG_MANAGER_REPO, + config_manager_ref: str = "main", + ) -> dict[str, str]: + """Set up the NVCM server inside the simulation. + + This method: + 1. Creates an SSH service for the nvcm server + 2. Outputs instructions to install prerequisites + 3. Outputs instructions to create Kind cluster and deploy NVCM + + Args: + simulation_id: ID of the simulation + nvcm_config: NVCM server configuration + config_manager_repo: URL to the nv-config-manager git repository + config_manager_ref: Git branch to use + + Returns: + Dict with deployment info (hostname, metallb_ips, etc.) + """ + server_name = nvcm_config.server_name + LOG.info(f"Setting up NVCM on '{server_name}' inside the simulation...") + + # Find the nvcm server node + nvcm_node = None + for node in self.client.nodes.list(simulation=simulation_id): + if node.name == server_name: + nvcm_node = node + break + + if not nvcm_node: + raise ValueError(f"Node '{server_name}' not found in simulation") + + # Wait for node to be ready + LOG.info(f"Waiting for {server_name} to be ready...") + while nvcm_node.state != "RUNNING": + LOG.info(f" State: {nvcm_node.state}") + time.sleep(10) + nvcm_node.refresh() + + # Create SSH service for the server + LOG.info(f"Creating SSH service for {server_name}...") + ssh_service = None + for iface in self.client.interfaces.list(node=nvcm_node): + if iface.name == "eth0": + existing = self.client.services.list(simulation=simulation_id) + for svc in existing: + if svc.interface.id == iface.id and svc.node_port == 22: + ssh_service = svc + break + + if not ssh_service: + ssh_service = self.client.services.create( + name=f"{server_name} SSH", + interface=iface, + node_port=22, + service_type="SSH", + ) + break + + if not ssh_service: + raise ValueError(f"Could not create SSH service for {server_name}") + + ssh_host = ssh_service.worker_fqdn + ssh_port = ssh_service.worker_port + LOG.info(f"SSH available at: ssh -p {ssh_port} nvcm@{ssh_host}") + + # Wait for SSH to be ready + LOG.info("Waiting for SSH to be ready...") + time.sleep(60) # Give the node time to boot fully + + # Generate the deployment script + deploy_script = self._generate_nvcm_deploy_script( + nvcm_config=nvcm_config, + config_manager_repo=config_manager_repo, + config_manager_ref=config_manager_ref, + ) + + # The actual deployment would be done via SSH + # For now, we'll output the commands needed + LOG.info("\n" + "=" * 70) + LOG.info("NVCM SERVER SETUP INSTRUCTIONS") + LOG.info("=" * 70) + LOG.info("\n1. Connect to the nvcm-server:") + LOG.info(f" ssh -p {ssh_port} nvcm@{ssh_host}") + LOG.info("\n2. Run the setup script:") + LOG.info(f" {NVCM_SERVER_SETUP_SCRIPT}") + LOG.info("\n3. Run the deployment script:") + LOG.info(f" {deploy_script}") + LOG.info("=" * 70 + "\n") + + return { + "ssh_host": ssh_host, + "ssh_port": str(ssh_port), + "ssh_command": f"ssh -p {ssh_port} nvcm@{ssh_host}", + "metallb_range": nvcm_config.metallb_ip_range, + "nvcm_size": nvcm_config.nvcm_size, + # Predictable credentials for e2e testing + "switch_user": NVCM_SECRETS["nvcm_user"], + "switch_password": NVCM_SECRETS["nvcm_password"], + "nautobot_user": NVCM_SECRETS["nautobot_superuser"], + "nautobot_password": NVCM_SECRETS["nautobot_password"], + } + + def _generate_nvcm_deploy_script( + self, + nvcm_config: NVCMServerConfig, + config_manager_repo: str, + config_manager_ref: str, + ) -> str: + """Generate the NVCM deployment script to run on the server. + + Uses predictable secrets so that Temporal workers know the credentials + to use when connecting to switches after ZTP. + + Args: + nvcm_config: NVCM server configuration + config_manager_repo: Git repo URL + config_manager_ref: Git branch + + Returns: + Shell script as a string + """ + metallb_start, metallb_end = nvcm_config.metallb_ip_range.split("-") + secrets = NVCM_SECRETS + + script = f"""#!/bin/bash +set -euo pipefail + +echo "=== Deploying NVCM inside AIR simulation ===" + +# Clone the NVIDIA Config Manager repository +if [ ! -d "nv-config-manager" ]; then + git clone -b {config_manager_ref} {config_manager_repo} nv-config-manager +fi +cd nv-config-manager + +# Create Kind cluster with proper config +cat > /tmp/kind-config.yaml << 'EOF' +{NVCM_KIND_CONFIG} +EOF + +# Delete existing cluster if present +kind delete cluster --name nvcm || true + +# Create new cluster +kind create cluster --name nvcm --config /tmp/kind-config.yaml + +# Wait for cluster to be ready +kubectl wait --for=condition=Ready nodes --all --timeout=300s + +# Install MetalLB +kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.14.5/config/manifests/metallb-native.yaml + +# Wait for MetalLB to be ready +kubectl wait --namespace metallb-system \\ + --for=condition=ready pod \\ + --selector=app=metallb \\ + --timeout=120s + +# Configure MetalLB IP pool +cat << 'METALLB_EOF' | kubectl apply -f - +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: nvcm-pool + namespace: metallb-system +spec: + addresses: + - {metallb_start}-{metallb_end} +--- +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: nvcm-l2 + namespace: metallb-system +spec: + ipAddressPools: + - nvcm-pool +METALLB_EOF + +# Get IPs from the pool for NVCM services +NVCM_IP=$(echo {metallb_start} | awk -F. '{{print $1"."$2"."$3"."$4+1}}') +ZTP_IP=$(echo {metallb_start} | awk -F. '{{print $1"."$2"."$3"."$4+2}}') +DHCP_IP=$(echo {metallb_start} | awk -F. '{{print $1"."$2"."$3"."$4+3}}') + +echo "NVCM will be deployed with:" +echo " Hostname: nvcm.air.local" +echo " ZTP IP: $ZTP_IP" +echo " DHCP IP: $DHCP_IP" + +# Create predictable secrets file for e2e testing +# These credentials are used by Temporal workers to connect to switches +cat > /tmp/nvcm-secrets.ini << 'SECRETS_EOF' +[nautobot] +superuser_name = {secrets["nautobot_superuser"]} +superuser_password = {secrets["nautobot_password"]} +superuser_email = admin@nvcm.air.local +secret_key = {secrets["nautobot_secret_key"]} + +[database.nautobot] +password = {secrets["nautobot_db_password"]} + +[database.temporal] +password = {secrets["temporal_db_password"]} + +[database.temporal_visibility] +password = {secrets["temporal_visibility_db_password"]} + +[database.config_store] +password = {secrets["config_store_db_password"]} + +[database.dhcp] +password = {secrets["dhcp_db_password"]} + +[redis] +password = {secrets["redis_password"]} + +[temporal] +# Credentials for Temporal workers to use when connecting to switches +# These are the credentials that ZTP will configure on the switches +device_user = {secrets["nvcm_user"]} +device_password = {secrets["nvcm_password"]} +SECRETS_EOF + +echo "=== Predictable secrets for e2e testing ===" +echo "Switch credentials after ZTP:" +echo " User: {secrets["nvcm_user"]}" +echo " Password: {secrets["nvcm_password"]}" +echo "Nautobot admin:" +echo " User: {secrets["nautobot_superuser"]}" +echo " Password: {secrets["nautobot_password"]}" + +# Run the NVCM deployment with predictable secrets +./deploy/deploy.sh \\ + --hostname nvcm.air.local \\ + --size {nvcm_config.nvcm_size} \\ + --lb-provider metallb \\ + --ztp-lb-ip $ZTP_IP \\ + --dhcp-lb-ip $DHCP_IP \\ + --secrets-file /tmp/nvcm-secrets.ini \\ + --yes + +echo "=== NVCM deployment complete ===" +echo "" +echo "NVCM is now running inside the AIR simulation." +echo "Switches will receive configuration via ZTP from $ZTP_IP" +echo "" +echo "To access Nautobot UI, add this to /etc/hosts on nvcm-server:" +echo " $NVCM_IP nvcm.air.local" +echo "" +echo "Then access: https://nvcm.air.local" +echo " Username: {secrets["nautobot_superuser"]}" +echo " Password: {secrets["nautobot_password"]}" +""" + return script + + def delete_simulation(self, simulation_id: str) -> None: + """Delete a simulation. + + Args: + simulation_id: ID of the simulation to delete + """ + LOG.info(f"Deleting simulation {simulation_id}...") + self.client.simulations.delete(simulation_id) + LOG.info("Simulation deleted") diff --git a/installer/src/nv_config_manager_installer/air_sim/topology.py b/installer/src/nv_config_manager_installer/air_sim/topology.py new file mode 100644 index 0000000..f2c81c9 --- /dev/null +++ b/installer/src/nv_config_manager_installer/air_sim/topology.py @@ -0,0 +1,706 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""AIR topology builder for nvcm-air-simulation.""" + +from __future__ import annotations + +import atexit +import ipaddress +import logging +import os +import re +import sys +import tempfile +from pathlib import Path +from typing import Any + +import yaml + +from nv_config_manager_installer.air_sim.constants import ( + CUMULUS_VX_IMAGES, + DEFAULT_CUMULUS_VERSION, + DEFAULT_NODE_CPU, + DEFAULT_NODE_MEMORY, + DEFAULT_SERVER_OS, + _BlockStyleDumper, +) +from nv_config_manager_installer.air_sim.models import CableConnection, DeviceInfo, NVCMServerConfig + +LOG = logging.getLogger(__name__) + + +def _create_version_override_yaml(yaml_path: str, target_version: str) -> str: + """Create a modified copy of the topology YAML with intended-firmware overridden. + + Walks devices[*].local_config_context_data.intended-firmware.version and + replaces the value with *target_version* for every Cumulus device. The + modified YAML is written to a temp file whose path is returned. An atexit + handler is registered to clean it up. + """ + with open(yaml_path) as f: + site_design = yaml.safe_load(f) + + updated = 0 + for device in site_design.get("devices", []): + platform = device.get("platform", "") + if "Cumulus" not in platform: + continue + ctx = device.get("local_config_context_data") + if ctx is None: + ctx = {} + device["local_config_context_data"] = ctx + fw = ctx.get("intended-firmware") + if fw is None: + fw = {} + ctx["intended-firmware"] = fw + old = fw.get("version") + fw["version"] = target_version + LOG.debug( + "Version override %s: %s -> %s", + device.get("name", "?"), + old, + target_version, + ) + updated += 1 + + tmp = tempfile.NamedTemporaryFile( + mode="w", + suffix=".yaml", + prefix="cumulus_override_", + delete=False, + ) + yaml.dump(site_design, tmp, Dumper=_BlockStyleDumper, default_flow_style=False) + tmp.close() + atexit.register(os.unlink, tmp.name) + + LOG.info( + "Created version-override YAML: %s (%d devices updated to %s)", + tmp.name, + updated, + target_version, + ) + return tmp.name + + +def _resolve_oob_server_ips_from_topology( + site_design: dict[str, Any], + oob_server_name: str, +) -> tuple[str, str | None]: + """Resolve oob-mgmt-server eth1 IP and OOB gateway from topology. + + Scans site_design["ip_addresses"] for device == oob_server_name and + interface == "eth1". Returns (address, gateway) where gateway is the + other address in the same subnet (e.g. for 7.240.192.0/31, gateway is + 7.240.192.1). For non-/31 masks, gateway is None. + + Raises SystemExit if no matching ip_addresses entry is found -- the YAML + is the single source of truth and must define this. + """ + ip_addresses = site_design.get("ip_addresses", []) + for ip in ip_addresses: + if ip.get("device") != oob_server_name or ip.get("interface") != "eth1": + continue + addr = ip.get("address") + if not addr: + continue + try: + iface = ipaddress.IPv4Interface(addr) + except ValueError: + continue + if iface.network.prefixlen == 31: + other = next( + (h for h in iface.network.hosts() if h != iface.ip), + None, + ) + gateway = str(other) if other is not None else None + else: + gateway = None + return (addr, gateway) + LOG.error( + "No ip_addresses entry for %s eth1 in the topology YAML. " + "The YAML must define the oob-mgmt-server eth1 address.", + oob_server_name, + ) + sys.exit(1) + + +class AirTopologyBuilder: + """Build NVIDIA AIR topology from site export YAML.""" + + def __init__( + self, + yaml_path: str, + simulation_name: str | None = None, + minimal_mode: bool = False, + nvcm_server: NVCMServerConfig | None = None, + ) -> None: + """Initialize the topology builder. + + Args: + yaml_path: Path to the site export YAML file + simulation_name: Name for the AIR simulation (auto-generated if not provided) + minimal_mode: If True, group similar devices into single nodes for smaller sims + nvcm_server: Configuration for adding a NVCM server node to the topology + """ + self.yaml_path = Path(yaml_path) + self.minimal_mode = minimal_mode + self.nvcm_server = nvcm_server + + # Parse YAML + with open(self.yaml_path) as f: + self.site_design = yaml.safe_load(f) + + # Extract site name from hierarchy + self.site_name = self._extract_site_name() + self.simulation_name = simulation_name or f"NVCM-E2E-{self.site_name}" + + # Device and connection storage + self.devices: dict[str, DeviceInfo] = {} + self.connections: list[CableConnection] = [] + + # Resolve tagged prefixes from the YAML + self.lb_allowed_prefixes = self._resolve_tagged_prefixes("lb-allowed") or ["0.0.0.0/0"] + self.relay_return_prefixes = self._resolve_tagged_prefixes("relay-return") + + # Parse the topology + self._parse_devices() + self._parse_interfaces() + self._parse_cables() + + def _extract_site_name(self) -> str: + """Extract the site name from location hierarchy.""" + hierarchy = self.site_design.get("location_hierarchy", []) + for loc in hierarchy: + if loc.get("type") == "Site": + return loc.get("name", "Unknown") + return "Unknown" + + def resolve_device_bgp_asn(self, device_name: str) -> str | None: + """Return the BGP ASN for *device_name* from local_config_context_data.""" + for dev in self.site_design.get("devices", []): + if dev.get("name") == device_name: + ctx = dev.get("local_config_context_data", {}) + asn = (ctx.get("bgp") or {}).get("asn") + if asn is not None: + return str(asn) + return None + + def _resolve_tagged_prefixes(self, tag: str) -> list[str]: + """Return prefix CIDRs that carry the given tag.""" + prefixes_data = self.site_design.get("prefixes", []) + result = [] + for entry in prefixes_data: + tags = entry.get("tags", []) + if tag in tags: + pfx = entry.get("prefix") + if pfx: + result.append(pfx) + if result: + LOG.info("Prefixes tagged '%s': %s", tag, ", ".join(result)) + return result + + @staticmethod + def _index_to_mac(index: int) -> str: + """Convert a 1-based index to a deterministic MAC in 44:38:39:xx:xx:xx.""" + b3 = (index >> 16) & 0xFF + b4 = (index >> 8) & 0xFF + b5 = index & 0xFF + return f"44:38:39:{b3:02x}:{b4:02x}:{b5:02x}" + + def _parse_devices(self) -> None: + """Parse devices from the site design.""" + devices_data = self.site_design.get("devices", []) + cumulus_count = 0 + server_count = 0 + + auto_serial_names: list[str] = [] + + for device in devices_data: + name = device.get("name") + platform = device.get("platform", "") + role = device.get("role", "Unknown") + + device_type = device.get("device_type", {}) + model = device_type.get("model", "Unknown") + air_config = device.get("_air", {}) + raw_serial = device.get("serial", "") + serial = "" if raw_serial == "auto" else raw_serial + needs_auto = raw_serial == "auto" + + if "Cumulus" in platform: + config_context = device.get("local_config_context_data", {}) + intended_firmware = config_context.get("intended-firmware", {}) + version = intended_firmware.get("version", DEFAULT_CUMULUS_VERSION) + + nvcm_status = device.get("nvcm", {}) + nvcm_enabled = nvcm_status.get("render_enabled", False) + + self.devices[name] = DeviceInfo( + name=name, + platform=platform, + role=role, + model=model, + firmware_version=version, + serial=serial, + nvcm_enabled=nvcm_enabled, + ) + if air_config: + self.devices[name].air_config = air_config + cumulus_count += 1 + else: + self.devices[name] = DeviceInfo( + name=name, + platform=platform, + role=role, + model=model, + firmware_version=air_config.get("os", DEFAULT_SERVER_OS), + serial=serial, + nvcm_enabled=False, + ) + self.devices[name].air_config = air_config + server_count += 1 + + if needs_auto: + auto_serial_names.append(name) + + if auto_serial_names: + auto_serial_names.sort() + for idx, dev_name in enumerate(auto_serial_names, start=1): + mac = self._index_to_mac(idx) + self.devices[dev_name].serial = mac + LOG.debug("Auto serial %s -> %s", dev_name, mac) + LOG.info("Auto-generated serials for %d device(s)", len(auto_serial_names)) + + LOG.info(f"Found {cumulus_count} Cumulus Linux devices") + if server_count > 0: + LOG.info(f"Found {server_count} server nodes") + + def _parse_interfaces(self) -> None: + """Parse interfaces and attach them to devices.""" + interfaces_data = self.site_design.get("interfaces", []) + + self.exit_interfaces: list[tuple[str, str]] = [] + auto_mac_entries: list[tuple[str, str]] = [] + + for intf in interfaces_data: + device_name = intf.get("device") + intf_name = intf.get("name") + intf_type = intf.get("type", "") + description = intf.get("description") or "" + raw_mac = intf.get("mac_address") + if raw_mac is not None and not isinstance(raw_mac, str): + raise ValueError( + f"{device_name} {intf_name} mac_address must be a string; " + "quote MAC addresses in YAML" + ) + mac_address = None if raw_mac == "auto" else raw_mac + needs_auto = raw_mac == "auto" + + if device_name not in self.devices: + continue + + if description.lower() == "exit": + self.exit_interfaces.append((device_name, intf_name)) + self.devices[device_name].interfaces.append(intf_name) + if mac_address: + self.devices[device_name].interface_macs[intf_name] = mac_address + elif needs_auto: + auto_mac_entries.append((device_name, intf_name)) + continue + + device = self.devices[device_name] + is_cumulus = "Cumulus" in device.platform + + if not is_cumulus: + if intf_name.startswith("lo"): + continue + device.interfaces.append(intf_name) + if mac_address: + device.interface_macs[intf_name] = mac_address + elif needs_auto: + auto_mac_entries.append((device_name, intf_name)) + else: + if intf_type == "virtual": + continue + if re.match(r"^(swp\d+|eth\d+|Ethernet\d+(/\d+)?)$", intf_name): + if intf_name == "eth0" and not mac_address: + raise ValueError( + f"Cumulus device {device_name} interface eth0 must define " + "an explicit mac_address for DHCP/ZTP reservations" + ) + device.interfaces.append(intf_name) + if mac_address: + device.interface_macs[intf_name] = mac_address + elif needs_auto: + auto_mac_entries.append((device_name, intf_name)) + + if auto_mac_entries: + auto_mac_entries.sort() + offset = 0x010000 + for idx, (dev_name, iface) in enumerate(auto_mac_entries, start=1): + mac = self._index_to_mac(offset + idx) + self.devices[dev_name].interface_macs[iface] = mac + LOG.debug("Auto MAC %s:%s -> %s", dev_name, iface, mac) + LOG.info("Auto-generated MACs for %d interface(s)", len(auto_mac_entries)) + + if self.exit_interfaces: + LOG.info(f"Found {len(self.exit_interfaces)} exit interface(s) for SSH access") + + # Cumulus VX derives both serial-number and system-mac from eth0. The + # topology YAML must carry the same value that Nautobot will use for + # DHCP reservations; silently inventing it here hides broken input. + for device in self.devices.values(): + if "Cumulus" not in device.platform: + continue + if "eth0" not in device.interfaces or not device.interface_macs.get("eth0"): + raise ValueError( + f"Cumulus device {device.name} must define eth0 with an explicit " + "mac_address for DHCP/ZTP reservations" + ) + + # Log interface counts + for device in self.devices.values(): + LOG.debug(f"{device.name}: {len(device.interfaces)} interfaces") + + def _parse_cables(self) -> None: + """Parse cable connections from the site design.""" + cabling = self.site_design.get("cabling_assignments", {}) + connections_data = cabling.get("connections", []) + + seen_links: set[frozenset[tuple[str, str]]] = set() + + for conn in connections_data: + source = conn.get("source", {}) + dest = conn.get("destination", {}) + + source_device = source.get("device") + source_intf = source.get("component", {}).get("name") + dest_device = dest.get("device") + dest_intf = dest.get("component", {}).get("name") + + # Only include connections where both endpoints are in our devices + if source_device not in self.devices or dest_device not in self.devices: + continue + + # Deduplicate: export may list the same link twice (A->B and B->A) + link_key = frozenset({(source_device, source_intf), (dest_device, dest_intf)}) + if link_key in seen_links: + continue + seen_links.add(link_key) + + self.connections.append( + CableConnection( + source_device=source_device, + source_interface=source_intf, + dest_device=dest_device, + dest_interface=dest_intf, + ) + ) + + LOG.info(f"Found {len(self.connections)} cable connections between devices") + + def build_topology(self) -> dict[str, Any]: + """Build the AIR topology JSON. + + Returns: + Dictionary in AIR JSON topology format + """ + if self.minimal_mode: + return self._build_minimal_topology() + return self._build_full_topology() + + def _make_link_endpoint(self, device_name: str, intf_name: str) -> dict[str, str]: + """Build an AIR link endpoint dict, including MAC when set.""" + endpoint: dict[str, str] = {"node": device_name, "interface": intf_name} + device = self.devices.get(device_name) + if device: + mac = device.interface_macs.get(intf_name) + if mac: + endpoint["mac"] = mac.lower() + return endpoint + + def _build_full_topology(self) -> dict[str, Any]: + """Build full topology with one AIR node per device.""" + topology: dict[str, Any] = { + "oob": False, # We'll add eth0 as outbound interfaces manually + "nodes": {}, + "links": [], + } + + # Create a node for each device + for device in self.devices.values(): + is_cumulus = "Cumulus" in device.platform + + if is_cumulus: + air_image = CUMULUS_VX_IMAGES.get( + device.firmware_version, + f"cumulus-vx-{device.firmware_version}", + ) + node: dict[str, Any] = { + "memory": DEFAULT_NODE_MEMORY, + "cpu": DEFAULT_NODE_CPU, + "os": air_image, + } + else: + # Non-Cumulus node (servers, GPUs, DPUs) - use AIR config if available + air_config = getattr(device, "air_config", {}) + node = { + "memory": air_config.get("memory", DEFAULT_NODE_MEMORY), + "cpu": air_config.get("cpu", DEFAULT_NODE_CPU), + "os": air_config.get("os", DEFAULT_SERVER_OS), + } + if air_config.get("storage"): + node["storage"] = air_config["storage"] + + topology["nodes"][device.name] = node + + # Add all interfaces as unconnected first + for intf in device.interfaces: + topology["links"].append( + [self._make_link_endpoint(device.name, intf), "unconnected"] + ) + + # Override with actual connections + connected_intfs: set[tuple[str, str]] = set() + + for conn in self.connections: + topology["links"].append( + [ + self._make_link_endpoint(conn.source_device, conn.source_interface), + self._make_link_endpoint(conn.dest_device, conn.dest_interface), + ] + ) + connected_intfs.add((conn.source_device, conn.source_interface)) + connected_intfs.add((conn.dest_device, conn.dest_interface)) + + # Remove unconnected links for interfaces that are actually connected + # or that should be exit interfaces + exit_intf_set = set(self.exit_interfaces) if hasattr(self, "exit_interfaces") else set() + topology["links"] = [ + link + for link in topology["links"] + if not ( + isinstance(link[1], str) + and link[1] == "unconnected" + and ( + (link[0]["node"], link[0]["interface"]) in connected_intfs + or (link[0]["node"], link[0]["interface"]) in exit_intf_set + ) + ) + ] + + # Add "exit" links for SSH access (public-facing interfaces) + if hasattr(self, "exit_interfaces"): + for device_name, intf_name in self.exit_interfaces: + topology["links"].append([self._make_link_endpoint(device_name, intf_name), "exit"]) + + # Add NVCM server node if configured + if self.nvcm_server: + self._add_nvcm_server_to_topology(topology) + + LOG.info( + f"Built topology with {len(topology['nodes'])} nodes and {len(topology['links'])} links" + ) + return topology + + def _add_nvcm_server_to_topology(self, topology: dict[str, Any]) -> None: + """Add or configure the NVCM server in the topology. + + If using an existing server (use_existing_server), just logs the config. + If creating a new node (attach_switch/attach_interface), adds the node + and creates a link to the specified switch. + + Args: + topology: The topology dict to modify in place + """ + if not self.nvcm_server: + return + + server_name = self.nvcm_server.server_name + + # Option 1: Use existing server from simulation + if self.nvcm_server.use_existing_server: + if server_name not in topology["nodes"]: + raise ValueError( + f"Server '{server_name}' not found in topology. " + f"Available nodes: {list(topology['nodes'].keys())[:10]}..." + ) + topology["nodes"][server_name].update( + { + "os": self.nvcm_server.os, + "cpu": self.nvcm_server.cpu, + "memory": self.nvcm_server.memory, + "storage": self.nvcm_server.storage, + } + ) + LOG.info( + f"Overriding existing server '{server_name}' with nvcm-box image " + f"({self.nvcm_server.os}, {self.nvcm_server.cpu} CPU, " + f"{self.nvcm_server.memory}MB RAM, {self.nvcm_server.storage}GB storage)" + ) + return + + # Option 2: Create new node attached to a switch + if not self.nvcm_server.attach_switch or not self.nvcm_server.attach_interface: + raise ValueError( + "Must specify either --use-existing-server OR both " + "--attach-switch and --attach-interface" + ) + + # Validate the attach switch exists + if self.nvcm_server.attach_switch not in self.devices: + LOG.warning( + f"Switch '{self.nvcm_server.attach_switch}' not found in topology. " + f"Available switches: {list(self.devices.keys())[:10]}..." + ) + raise ValueError(f"Switch '{self.nvcm_server.attach_switch}' not found in topology") + + # Add the NVCM server node (nvcm-box with extra resources) + topology["nodes"][server_name] = { + "memory": self.nvcm_server.memory, + "cpu": self.nvcm_server.cpu, + "storage": self.nvcm_server.storage, + "os": self.nvcm_server.os, + } + + # If the server is already connected from the topology, do not add a duplicate link. + server_interface = self.nvcm_server.server_interface + for link in topology["links"]: + if not isinstance(link, list) or len(link) < 2: + continue + for ep in (link[0], link[1]): + if not (isinstance(ep, dict) and ep.get("node") == server_name): + continue + if ep.get("interface") == server_interface: + LOG.info( + f"NVCM server '{server_name}' already has link on {server_interface} " + "from topology; skipping duplicate link" + ) + return + + # Add the link between nvcm-server and the specified switch/interface + # First, remove any existing unconnected link for that interface + attach_switch = self.nvcm_server.attach_switch + attach_interface = self.nvcm_server.attach_interface + server_interface = self.nvcm_server.server_interface + + topology["links"] = [ + link + for link in topology["links"] + if not ( + isinstance(link[1], str) + and link[1] == "unconnected" + and link[0]["node"] == attach_switch + and link[0]["interface"] == attach_interface + ) + ] + + # Add the connection between nvcm-server and the switch + topology["links"].append( + [ + {"node": server_name, "interface": server_interface}, + {"node": attach_switch, "interface": attach_interface}, + ] + ) + + LOG.info( + f"Added NVCM server: {server_name} " + f"({self.nvcm_server.cpu} CPU, {self.nvcm_server.memory}MB RAM) " + f"connected to {attach_switch}:{attach_interface}" + ) + + def _build_minimal_topology(self) -> dict[str, Any]: + """Build minimal topology by grouping similar devices. + + Groups devices by (model, role, firmware_version) to reduce simulation size. + Useful for testing configuration rendering without full topology. + """ + topology: dict[str, Any] = { + "oob": False, + "nodes": {}, + "links": [], + } + + # Group devices by model-role-version + device_groups: dict[str, dict[str, Any]] = {} + + for device in self.devices.values(): + # Create group key + key = f"{device.model}-{device.role}-{device.firmware_version}".replace( + ".", "-" + ).replace(" ", "-") + + if key not in device_groups: + device_groups[key] = { + "firmware_version": device.firmware_version, + "interfaces": set(), + "devices": [], + } + + # Merge interfaces + device_groups[key]["interfaces"].update(device.interfaces) + device_groups[key]["devices"].append(device.name) + + # Create nodes from groups + for group_key, group_data in device_groups.items(): + air_image = CUMULUS_VX_IMAGES.get( + group_data["firmware_version"], + f"cumulus-vx-{group_data['firmware_version']}", + ) + + topology["nodes"][group_key] = { + "memory": DEFAULT_NODE_MEMORY, + "cpu": DEFAULT_NODE_CPU, + "os": air_image, + } + + # Add interfaces as unconnected + for intf in group_data["interfaces"]: + topology["links"].append([{"node": group_key, "interface": intf}, "unconnected"]) + + # Add NVCM server if configured (attach to first group that matches attach_switch) + if self.nvcm_server: + # Find which group contains the attach_switch + attach_group = None + for group_key, group_data in device_groups.items(): + if self.nvcm_server.attach_switch in group_data["devices"]: + attach_group = group_key + break + + if attach_group: + # Temporarily modify nvcm_server to use group key + original_switch = self.nvcm_server.attach_switch + self.nvcm_server.attach_switch = attach_group + # Add nvcm server (need to add to devices temporarily) + self.devices[attach_group] = DeviceInfo( + name=attach_group, + platform="Cumulus Linux", + role="grouped", + model="virtual", + firmware_version=device_groups[attach_group]["firmware_version"], + ) + self._add_nvcm_server_to_topology(topology) + self.nvcm_server.attach_switch = original_switch + else: + LOG.warning( + f"Could not find switch '{self.nvcm_server.attach_switch}' " + f"in minimal topology groups" + ) + + LOG.info( + f"Built minimal topology: {len(topology['nodes'])} nodes from " + f"{len(self.devices)} devices" + ) + return topology diff --git a/installer/src/nv_config_manager_installer/cli.py b/installer/src/nv_config_manager_installer/cli.py index a2c5433..398c8e9 100644 --- a/installer/src/nv_config_manager_installer/cli.py +++ b/installer/src/nv_config_manager_installer/cli.py @@ -29,17 +29,21 @@ import click from nv_config_manager_installer import __version__ +from nv_config_manager_installer.air_sim.cli import air_sim if TYPE_CHECKING: from nv_config_manager_installer.schema import NVConfigManagerInstallConfig @click.group() -@click.version_option(version=__version__, prog_name="nv-config-manager-installer") +@click.version_option(version=__version__) def main() -> None: """NVIDIA Config Manager Install Wizard.""" +main.add_command(air_sim) + + @main.command() @click.option( "--config", diff --git a/installer/src/nv_config_manager_installer/deployer.py b/installer/src/nv_config_manager_installer/deployer.py index d2065c3..8a2b35b 100644 --- a/installer/src/nv_config_manager_installer/deployer.py +++ b/installer/src/nv_config_manager_installer/deployer.py @@ -99,6 +99,29 @@ class DeployStep: error: str = "" +@dataclass(frozen=True) +class _ParallelCommand: + """A subprocess command that can run alongside other commands.""" + + label: str + cmd: list[str] + timeout: int | None = 600 + env: dict[str, str] | None = None + + +@dataclass +class _RunningCommand: + """Runtime state for a command managed by _run_logged_parallel.""" + + command: _ParallelCommand + proc: subprocess.Popen[str] + started_at: float + stdout_lines: list[str] = field(default_factory=list) + stderr_lines: list[str] = field(default_factory=list) + latest_line: str = "waiting for output" + open_streams: int = 0 + + class DeployCallback(Protocol): """Protocol for deployment progress callbacks.""" @@ -397,6 +420,173 @@ def _run_logged( return subprocess.CompletedProcess(cmd, proc.returncode, stdout_text, stderr_text) +def _format_elapsed(seconds: float) -> str: + """Return a compact elapsed-time string for long-running command status.""" + seconds_int = max(0, int(seconds)) + minutes, sec = divmod(seconds_int, 60) + hours, minutes = divmod(minutes, 60) + if hours: + return f"{hours}h{minutes:02d}m{sec:02d}s" + if minutes: + return f"{minutes}m{sec:02d}s" + return f"{sec}s" + + +def _command_summary(cmd: list[str]) -> str: + """Return a concise shell-style command summary for logs.""" + return " ".join(cmd[:4]) + ("..." if len(cmd) > 4 else "") + + +def _parallel_build_limit(command_count: int) -> int: + """Return how many Docker builds to run at once.""" + configured = os.environ.get("NVCM_DOCKER_BUILD_PARALLELISM", "").strip() + if configured: + try: + return max(1, min(command_count, int(configured))) + except ValueError: + pass + cpu_count = os.cpu_count() or 2 + return max(1, min(command_count, 4, cpu_count)) + + +def _append_command_line( + state: _RunningCommand, + stream: str, + line: str, + step: DeployStep, + callback: DeployCallback, +) -> None: + """Record and emit one line from a parallel subprocess.""" + for part in line.replace("\r", "\n").splitlines(): + text = part.strip() + if not text: + continue + if stream == "stdout": + state.stdout_lines.append(text) + else: + state.stderr_lines.append(text) + state.latest_line = text + msg = f"[{state.command.label}] {text}" + step.output.append(msg) + callback.on_log(msg) + + +def _kill_running_commands(states: list[_RunningCommand]) -> None: + """Terminate all still-running commands.""" + for state in states: + if state.proc.poll() is None: + state.proc.kill() + for state in states: + if state.proc.poll() is None: + state.proc.wait() + + +def _emit_parallel_status( + states: list[_RunningCommand], + step: DeployStep, + callback: DeployCallback, +) -> None: + """Emit elapsed-time status for each currently running command.""" + now = time.monotonic() + for state in states: + elapsed = _format_elapsed(now - state.started_at) + msg = f"[{state.command.label}] running {elapsed} | latest: {state.latest_line}" + step.output.append(msg) + callback.on_log(msg) + + +def _run_logged_parallel( + commands: list[_ParallelCommand], + step: DeployStep, + callback: DeployCallback, + *, + max_parallel: int, + progress_interval: float = 15.0, +) -> None: + """Run subprocess commands concurrently, streaming prefixed output to callback.""" + if not commands: + return + + pending = list(commands) + running: dict[subprocess.Popen[str], _RunningCommand] = {} + selector = selectors.DefaultSelector() + next_progress = time.monotonic() + progress_interval + + def start_available() -> None: + while pending and len(running) < max_parallel: + command = pending.pop(0) + callback.on_log(f"[{command.label}] $ {_command_summary(command.cmd)}") + proc = subprocess.Popen( + command.cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=command.env, + ) + state = _RunningCommand(command=command, proc=proc, started_at=time.monotonic()) + if proc.stdout: + selector.register(proc.stdout, selectors.EVENT_READ, (state, "stdout")) + state.open_streams += 1 + if proc.stderr: + selector.register(proc.stderr, selectors.EVENT_READ, (state, "stderr")) + state.open_streams += 1 + running[proc] = state + + def finish_completed() -> None: + completed: list[subprocess.Popen[str]] = [] + for proc, state in running.items(): + if proc.poll() is not None and state.open_streams == 0: + completed.append(proc) + + for proc in completed: + state = running.pop(proc) + elapsed = _format_elapsed(time.monotonic() - state.started_at) + if proc.returncode != 0: + _kill_running_commands(list(running.values())) + raise subprocess.CalledProcessError( + proc.returncode, + state.command.cmd, + "\n".join(state.stdout_lines), + "\n".join(state.stderr_lines), + ) + msg = f"[{state.command.label}] completed in {elapsed}" + step.output.append(msg) + callback.on_log(msg) + + try: + start_available() + while running or pending: + now = time.monotonic() + for state in list(running.values()): + if state.command.timeout is not None and ( + now - state.started_at > state.command.timeout + ): + _kill_running_commands(list(running.values())) + raise subprocess.TimeoutExpired(state.command.cmd, state.command.timeout) + + if now >= next_progress and running: + _emit_parallel_status(list(running.values()), step, callback) + next_progress = now + progress_interval + + timeout = max(0.1, min(1.0, next_progress - now)) + for key, _ in selector.select(timeout=timeout): + state, stream = key.data + line = key.fileobj.readline() # type: ignore[union-attr] + if line: + _append_command_line(state, stream, line.rstrip("\n"), step, callback) + continue + selector.unregister(key.fileobj) + state.open_streams -= 1 + + finish_completed() + start_available() + except BaseException: + _kill_running_commands(list(running.values())) + raise + finally: + selector.close() + + class Deployer: """Orchestrates the full NVIDIA Config Manager deployment pipeline.""" @@ -699,27 +889,49 @@ def _build_images(self) -> None: if val: apt_mirror_args += ["--build-arg", f"{env_var}={val}"] + build_env = {**os.environ, "DOCKER_BUILDKIT": "1"} + use_buildx = bool(build_env.get("BUILDX_BUILDER")) + build_cmd = ["docker", "buildx", "build"] if use_buildx else ["docker", "build"] + build_output_args = ["--load"] if use_buildx else [] + build_commands: list[_ParallelCommand] = [] for name, dockerfile, context in images: build_tag = f"{name}:local" - self.callback.on_log(f"Building {build_tag}...") - _run_logged( - [ - "docker", - "build", - "--provenance=false", - "--build-context", - "scripts=build/", - *apt_mirror_args, - "-t", - build_tag, - "-f", - dockerfile, - context, - ], - step, - self.callback, - timeout=900, + build_commands.append( + _ParallelCommand( + label=name, + cmd=[ + *build_cmd, + "--provenance=false", + "--progress=plain", + *build_output_args, + "--build-context", + "scripts=build/", + *apt_mirror_args, + "-t", + build_tag, + "-f", + dockerfile, + context, + ], + timeout=900, + env=build_env, + ) ) + + max_parallel = _parallel_build_limit(len(build_commands)) + self.callback.on_log( + f"Building {len(build_commands)} local image(s) with up to " + f"{max_parallel} parallel Docker build(s)..." + ) + _run_logged_parallel( + build_commands, + step, + self.callback, + max_parallel=max_parallel, + ) + + for name, _, _ in images: + build_tag = f"{name}:local" digest_tag = _get_image_digest_tag(build_tag) if digest_tag: content_tag = f"{name}:{digest_tag}" @@ -1285,7 +1497,7 @@ def _create_network_secrets(self, step: DeployStep, s: dict[str, str]) -> None: self.callback.on_log(msg) def _create_optional_integration_secrets(self, step: DeployStep, s: dict[str, str]) -> None: - """Create Kubernetes secrets for optional integrations (Slack, AIR, Jira, CNPG backup).""" + """Create Kubernetes secrets for optional integrations (Slack, Jira, CNPG backup).""" k8s = self.config.secrets.k8s if k8s.slack.enabled: @@ -1294,17 +1506,6 @@ def _create_optional_integration_secrets(self, step: DeployStep, s: dict[str, st raise ValueError("Slack is enabled but slack_token is empty") self._apply_secret(step, "slack-token", {"token": token}) - if k8s.air.enabled: - client_id = s.get("air_ssa_client_id", "") - client_secret = s.get("air_ssa_client_secret", "") - if not client_secret: - raise ValueError("AIR is enabled but air_ssa_client_secret is empty") - self._apply_secret( - step, - "air-creds", - {"ssa-client-id": client_id, "ssa-client-secret": client_secret}, - ) - if k8s.jira.enabled: api_token = s.get("jira_api_token", "") if not api_token: diff --git a/installer/src/nv_config_manager_installer/schema.py b/installer/src/nv_config_manager_installer/schema.py index 96d4114..b739031 100644 --- a/installer/src/nv_config_manager_installer/schema.py +++ b/installer/src/nv_config_manager_installer/schema.py @@ -176,11 +176,6 @@ class VaultPathsConfig(BaseModel): default_factory=lambda: _path(clientSecret="client_secret", cookieSecret="cookie_secret") ) slack: VaultPathConfig = Field(default_factory=lambda: _path(enabled=False, token="token")) - air: VaultPathConfig = Field( - default_factory=lambda: _path( - enabled=False, ssaClientId="ssa_client_id", ssaClientSecret="ssa_client_secret" - ) - ) jira: VaultPathConfig = Field( default_factory=lambda: _path(enabled=False, baseUrl="base_url", apiToken="api_token") ) @@ -234,7 +229,6 @@ class KubernetesSecretsConfig(BaseModel): network: K8sSecretGroup = Field(default_factory=K8sSecretGroup) nautobot_app: K8sSecretGroup = Field(default_factory=K8sSecretGroup) slack: K8sSecretGroup = Field(default_factory=lambda: K8sSecretGroup(enabled=False)) - air: K8sSecretGroup = Field(default_factory=lambda: K8sSecretGroup(enabled=False)) jira: K8sSecretGroup = Field(default_factory=lambda: K8sSecretGroup(enabled=False)) cnpg_backup: K8sSecretGroup = Field(default_factory=lambda: K8sSecretGroup(enabled=False)) diff --git a/installer/src/nv_config_manager_installer/secrets.py b/installer/src/nv_config_manager_installer/secrets.py index 83048d1..eac57be 100644 --- a/installer/src/nv_config_manager_installer/secrets.py +++ b/installer/src/nv_config_manager_installer/secrets.py @@ -101,15 +101,10 @@ def _generate_core_k8s_secrets(state: dict[str, str], _v: Any) -> None: def _generate_optional_k8s_secrets( config: NVConfigManagerInstallConfig, state: dict[str, str], _v: Any ) -> None: - """Populate optional integration secrets (Slack, AIR, Jira, CNPG backup).""" + """Populate optional integration secrets (Slack, Jira, CNPG backup).""" k8s = config.secrets.k8s if k8s.slack.enabled: state["slack_token"] = _v("slack", "token") or _generate_url_safe_password() - if k8s.air.enabled: - state["air_ssa_client_id"] = _v("air", "ssaClientId") or "" - state["air_ssa_client_secret"] = ( - _v("air", "ssaClientSecret") or _generate_url_safe_password() - ) if k8s.jira.enabled: state["jira_base_url"] = _v("jira", "baseUrl") or "" state["jira_api_token"] = _v("jira", "apiToken") or "" @@ -181,7 +176,6 @@ def _v(group: str, vault_key: str) -> str: ("nautobot_app", "nautobotApp", "nautobot-app"), ("oidc", "oidc", "oidc"), ("slack", "slack", "slack"), - ("air", "air", "air"), ("jira", "jira", "jira"), ("cnpg_backup", "cnpgBackup", "cnpg-backup"), ] diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/__init__.py b/installer/src/nv_config_manager_installer/tui/air_sim/__init__.py new file mode 100644 index 0000000..bd904e8 --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/app.py b/installer/src/nv_config_manager_installer/tui/air_sim/app.py new file mode 100644 index 0000000..900d063 --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/app.py @@ -0,0 +1,273 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NVCM AIR Simulation TUI - wizard for bringing up AIR simulations.""" + +from __future__ import annotations + +import argparse +import os +from pathlib import Path +from typing import ClassVar + +from textual.app import App, ComposeResult +from textual.binding import Binding +from textual.containers import Container, Horizontal, VerticalScroll +from textual.screen import ModalScreen +from textual.widgets import Button, Footer, Label, Static + +from nv_config_manager_installer.air_sim.constants import DEFAULT_AIR_SIM_CONFIG_PATH +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.air_sim.screens.launch import LaunchScreen +from nv_config_manager_installer.tui.air_sim.screens.options import OptionsScreen +from nv_config_manager_installer.tui.air_sim.screens.topology import TopologyScreen + +SECTION_LABELS: list[tuple[str, str]] = [ + ("topology", "Topology"), + ("options", "Options"), + ("launch", "Launch"), +] + +CSS_PATH = Path(__file__).parent / "app.tcss" + +_STATUS_CLASS_MAP = { + "[*]": "--complete", + "[!]": "--incomplete", + "[>]": "--running", + "[ ]": "--empty", +} + +_STATUS_TOOLTIP = { + "[*]": "Ready", + "[!]": "Needs attention — check required fields", + "[ ]": "Not configured", + "[>]": "In progress", +} + + +class NavItem(Static): + """Clickable sidebar navigation item.""" + + def __init__(self, section_id: str, label: str) -> None: + super().__init__() + self.section_id = section_id + self.label_text = label + self.status = " " + + def render(self) -> str: # type: ignore[override] + prefix = "* " if self.status == "[!]" else " " + return f"{prefix}{self.label_text}" + + def on_click(self) -> None: + app = self.app + if isinstance(app, NVCMAirSimApp): + app.switch_section(self.section_id) + + def set_status(self, status: str) -> None: + self.status = status + for cls in _STATUS_CLASS_MAP.values(): + self.remove_class(cls) + self.add_class(_STATUS_CLASS_MAP.get(status, "--empty")) + self.tooltip = _STATUS_TOOLTIP.get(status, "") + self.refresh() + + +class QuitConfirmScreen(ModalScreen[bool]): + DEFAULT_CSS = """ + QuitConfirmScreen { align: center middle; } + #quit-dialog { + width: 44; height: auto; padding: 1 2; + border: thick $accent; background: $surface; + } + #quit-dialog Label { width: 100%; content-align: center middle; } + #quit-buttons { height: auto; width: 100%; align: center middle; margin-top: 1; } + #quit-buttons Button { min-width: 12; margin: 0 2; } + """ + + def compose(self) -> ComposeResult: + with Container(id="quit-dialog"): + yield Label("Quit NVCM AIR Sim Wizard?") + with Horizontal(id="quit-buttons"): + yield Button("Quit", variant="error", id="quit-yes") + yield Button("Cancel", variant="primary", id="quit-no") + + def on_button_pressed(self, event: Button.Pressed) -> None: + self.dismiss(event.button.id == "quit-yes") + + +class NVCMAirSimApp(App[None]): + """NVCM AIR Simulation Wizard TUI.""" + + TITLE = "NVCM AIR Sim Wizard" + CSS_PATH = CSS_PATH + ENABLE_COMMAND_PALETTE = False + + BINDINGS: ClassVar[list[Binding]] = [ + Binding("f2", "save", "Save", key_display="F2"), + Binding("f9", "launch", "Launch", key_display="F9"), + Binding("f10", "save_and_exit", "Save & Exit", key_display="F10"), + Binding("ctrl+c", "request_quit", "Quit"), + Binding("ctrl+n", "next_section", "Next Section", key_display="^N"), + Binding("ctrl+p", "prev_section", "Prev Section", key_display="^P"), + ] + + def __init__( + self, + config: SimConfig | None = None, + config_path: Path | None = None, + ) -> None: + super().__init__() + self.config = config or SimConfig() + self.config_path = config_path or DEFAULT_AIR_SIM_CONFIG_PATH + self.active_section = "topology" + self._nav_items: dict[str, NavItem] = {} + self._screens: dict[str, Container] = {} + + def compose(self) -> ComposeResult: + with Horizontal(): + with VerticalScroll(id="sidebar"): + yield Label("NVCM AIR Sim Wizard", id="sidebar-title") + for section_id, label in SECTION_LABELS: + item = NavItem(section_id, label) + item.add_class("nav-item") + self._nav_items[section_id] = item + yield item + with VerticalScroll(id="content-area"): + yield from self._build_screens() + yield Footer() + + def _build_screens(self) -> list[Container]: + screen_classes: dict[str, type[Container]] = { + "topology": TopologyScreen, + "options": OptionsScreen, + "launch": LaunchScreen, + } + screens = [] + for section_id, cls in screen_classes.items(): + screen = cls(self.config, id=f"screen-{section_id}") + screen.display = section_id == self.active_section + self._screens[section_id] = screen + screens.append(screen) + return screens + + def on_mount(self) -> None: + self._highlight_nav(self.active_section) + self._update_all_statuses() + + def apply_prebuilt_config(self, config: SimConfig) -> None: + """Replace current wizard values from a pre-built config without changing save path.""" + self.config = config + for screen in self._screens.values(): + if isinstance(screen, TopologyScreen | OptionsScreen | LaunchScreen): + screen._config = config + if hasattr(screen, "sync_from_config"): + screen.sync_from_config(config) + self._update_all_statuses() + + def switch_section(self, section_id: str) -> None: + if section_id == self.active_section: + return + outgoing = self._screens.get(self.active_section) + if outgoing and self.active_section != "launch" and hasattr(outgoing, "write_to_config"): + outgoing.write_to_config(self.config) + if outgoing: + outgoing.display = False + self.active_section = section_id + incoming = self._screens.get(section_id) + if incoming: + incoming.display = True + if hasattr(incoming, "sync_from_config"): + incoming.sync_from_config(self.config) + self._highlight_nav(section_id) + self._update_all_statuses() + + def _highlight_nav(self, section_id: str) -> None: + for sid, item in self._nav_items.items(): + if sid == section_id: + item.add_class("--highlight") + else: + item.remove_class("--highlight") + + def _update_all_statuses(self) -> None: + for section_id, item in self._nav_items.items(): + screen = self._screens.get(section_id) + if screen and hasattr(screen, "get_status"): + item.set_status(screen.get_status(self.config)) + else: + item.set_status("[ ]") + + def collect_config(self) -> None: + for section_id, screen in self._screens.items(): + if section_id != "launch" and hasattr(screen, "write_to_config"): + screen.write_to_config(self.config) + + def action_next_section(self) -> None: + sections = [section_id for section_id, _ in SECTION_LABELS] + idx = sections.index(self.active_section) if self.active_section in sections else -1 + if idx < len(sections) - 1: + self.switch_section(sections[idx + 1]) + + def action_prev_section(self) -> None: + sections = [section_id for section_id, _ in SECTION_LABELS] + idx = sections.index(self.active_section) if self.active_section in sections else 0 + if idx > 0: + self.switch_section(sections[idx - 1]) + + def action_save(self) -> None: + self.collect_config() + self.config.to_yaml(self.config_path) + self._update_all_statuses() + self.notify(f"Saved to {self.config_path}") + + def action_launch(self) -> None: + self.collect_config() + self.switch_section("launch") + + def action_save_and_exit(self) -> None: + self.collect_config() + self.config.to_yaml(self.config_path) + self.exit(message=f"Config saved to {self.config_path}") + + def action_request_quit(self) -> None: + def _on_dismiss(result: bool) -> None: + if result: + self.exit() + + self.push_screen(QuitConfirmScreen(), callback=_on_dismiss) + + +def _resolve_config_path(argv: list[str] | None = None) -> Path: + """Resolve the TUI config path from CLI args, env, or the default path.""" + parser = argparse.ArgumentParser(description="Launch the NVCM AIR simulation TUI") + parser.add_argument( + "config_path", + nargs="?", + help="Optional YAML config path to load and save", + ) + parser.add_argument( + "--config", + dest="config_path_flag", + help="YAML config path to load and save", + ) + args = parser.parse_args(argv) + path = args.config_path_flag or args.config_path or os.environ.get("NVCM_AIR_CONFIG") + return Path(path).expanduser() if path else DEFAULT_AIR_SIM_CONFIG_PATH + + +def run(argv: list[str] | None = None) -> None: + """Entry point for nvcm-air-tui command.""" + config_path = _resolve_config_path(argv) + config = SimConfig.load_or_default(config_path) + app = NVCMAirSimApp(config=config, config_path=config_path) + app.run() diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/app.tcss b/installer/src/nv_config_manager_installer/tui/air_sim/app.tcss new file mode 100644 index 0000000..fb79350 --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/app.tcss @@ -0,0 +1,315 @@ +/* NVCM AIR Sim Wizard - Textual CSS */ + +NVCMAirSimApp { + layout: grid; + grid-size: 2; + grid-columns: 28 1fr; + grid-rows: 1fr 3; +} + +#sidebar { + width: 28; + height: 100%; + dock: left; + border-right: solid $primary-background-lighten-2; + padding: 0; + scrollbar-size: 0 0; +} + +#sidebar-title { + text-align: center; + text-style: bold; + color: $text; + padding: 1 1; + border-bottom: solid $primary-background-lighten-2; + margin-bottom: 1; +} + +/* Nav items */ +.nav-item { + padding: 1 1; + height: auto; + margin: 0 1; + background: $surface; +} + +.nav-item.--empty { background: $surface; } +.nav-item.--complete { background: #0d1f0d; } +.nav-item.--incomplete { background: #1f1f0d; text-style: italic; } +.nav-item.--running { background: #0d1520; } +.nav-item:hover { background: $surface-lighten-1; } + +.nav-item.--highlight.--complete { background: #1a3a1a; text-style: bold; } +.nav-item.--highlight.--incomplete { background: #3a3a1a; text-style: bold italic; } +.nav-item.--highlight.--empty { background: $success 15%; text-style: bold; } +.nav-item.--highlight.--running { background: #1a2a3a; text-style: bold; } + +#content-area { + height: 100%; + padding: 1 2; + overflow-y: auto; +} + +/* All section screens size to content for scroll */ +#content-area > Container { + height: auto; +} + +/* Shared text styles */ +.section-title { text-style: bold; margin-bottom: 1; } +.section-divider { margin-bottom: 1; color: $text-muted; } +.subsection-label { text-style: bold; margin-top: 1; color: $accent; } +.field-label { margin-top: 1; margin-bottom: 0; } +.field-hint { color: $text-muted; margin-bottom: 1; } + +/* Switches */ +Switch > .switch--slider { color: $error; background: $error 30%; } +Switch.-on > .switch--slider { color: $success; background: $success 30%; } + +LabeledSwitch { + height: auto; + width: auto; + margin-bottom: 1; + padding: 0 1 0 0; + align: left middle; +} + +LabeledSwitch .labeled-switch--label { + height: 3; + width: auto; + content-align: center middle; + padding: 0 1 0 0; +} + +/* Field rows */ +.field-row { + height: auto; + margin-bottom: 1; +} + +.field-row Input { width: 1fr; } +.field-row Button { width: auto; margin-left: 1; } + +/* Topology screen */ +#attach-fields, +#mock-topology-fields, +#direct-topology-fields { + height: auto; + margin-top: 1; +} + +/* Options screen */ +#autocfg-fields { height: auto; border-left: solid $primary-background-lighten-2; padding-left: 2; margin-top: 1; } + +/* Topology screen template plugins */ +#template-plugin-list { + height: auto; + margin-bottom: 1; +} + +.template-plugin-row { + height: auto; + margin-bottom: 1; + padding: 0 1; + border: solid $primary-background-lighten-2; +} + +.template-plugin-row Input { + width: 1fr; +} + +.template-plugin-row Button { + width: auto; + margin-top: 1; +} + +/* Launch screen */ +#launch-controls { + height: auto; + margin-bottom: 1; + align: left middle; +} + +#btn-launch { + min-width: 24; +} + +#btn-launch:disabled { + opacity: 0.4; +} + +/* SSH info bar */ +.ssh-info-bar { + display: none; + height: 5; + margin-bottom: 1; + padding: 0 1; + border: solid $success 50%; + background: $success 10%; + align: left middle; +} + +.ssh-badge { + width: auto; + background: $success 60%; + color: $text; + text-style: bold; + padding: 0 1; + margin-right: 1; +} + +.ssh-cmd { + width: 1fr; + height: 3; + min-height: 3; + border: none; + background: $surface; + padding: 0 1; + content-align: left middle; +} + +.ssh-copy-btn { + margin-left: 1; + width: 7; + min-width: 7; +} + +#log-viewer { + width: 1fr; + height: 100%; + padding: 0 1; +} + +#log-toolbar { + height: auto; + margin-top: 1; + margin-bottom: 1; + align: left middle; +} + +#log-tabs { + width: auto; + height: 3; + margin-right: 1; +} + +#log-toolbar Button { + height: 3; +} + +#log-toolbar .copy-icon-btn { + min-width: 5; +} + +.log-follow-btn { + display: none; + min-width: 12; +} + +#log-pane { + height: 1fr; +} + +#access-pane { + display: none; + height: 1fr; + border: solid $primary-background-lighten-2; + padding: 0 1; + overflow-y: auto; +} + +/* Proxy access panel */ +#proxy-access { + height: auto; + padding: 1 1; +} + +.copy-panel { + height: auto; + border: solid $primary-background-lighten-2; + padding: 0 1 1 1; + margin-bottom: 1; +} + +.copy-panel:hover { + background: $surface; +} + +.copy-panel-header { + height: 3; + align: left middle; +} + +.copy-panel-title { + width: 1fr; + content-align: left middle; + color: $text-muted; +} + +.copy-icon-btn { + width: 5; + min-width: 5; + height: 3; + margin-left: 1; + padding: 0; +} + +.proxy-cmd { + width: 1fr; + height: auto; + min-height: 3; + background: $surface; + padding: 0 1; +} + +#proxy-controls { + height: auto; + margin-top: 1; + align: left middle; +} + +#launch-controls Button { margin-right: 1; } + +#launch-status { + height: auto; + margin-bottom: 1; + min-height: 2; +} + +#launch-dashboard { + height: 50; + margin-top: 1; + border: solid $primary-background-lighten-2; +} + +#dashboard-top { + height: 18; + border-bottom: solid $primary-background-lighten-2; +} + +#step-panel { + width: 44; + height: 100%; + border-right: solid $primary-background-lighten-2; + padding: 0 1; + overflow-y: auto; +} + +#step-panel Static { height: 1; } + +#pod-status-panel { + width: 1fr; + height: 100%; + padding: 0 1; + overflow: hidden; +} + +#pod-table { + height: 1fr; +} + +#log-output, +.log-output { + height: 1fr; + border: solid $primary-background-lighten-2; +} diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/screens/__init__.py b/installer/src/nv_config_manager_installer/tui/air_sim/screens/__init__.py new file mode 100644 index 0000000..bd904e8 --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/screens/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/screens/launch.py b/installer/src/nv_config_manager_installer/tui/air_sim/screens/launch.py new file mode 100644 index 0000000..0006d60 --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/screens/launch.py @@ -0,0 +1,1079 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Launch screen - step progress and log output for simulation bringup.""" + +from __future__ import annotations + +import json +import logging +import os +import subprocess +import tempfile +import threading +import time +from datetime import datetime +from pathlib import Path +from queue import Empty, SimpleQueue +from typing import IO + +from textual import events, work +from textual.app import ComposeResult +from textual.containers import Container, Horizontal, Vertical, VerticalScroll +from textual.message import Message +from textual.reactive import reactive +from textual.widgets import Button, DataTable, Label, RichLog, Static, Tab, Tabs +from textual.worker import Worker, WorkerState + +from nv_config_manager_installer.air_sim.constants import NVCM_BOX_PASSWORD, NVCM_BOX_USER +from nv_config_manager_installer.air_sim.orchestrator import ( + STEPS, + OrchestratorCallback, + SimOrchestrator, + StepStatus, +) +from nv_config_manager_installer.air_sim.proxy import ProxyInfo +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.air_sim.sim_manager import AirSimulationManager + +_STATUS_ICON = { + StepStatus.PENDING: "[ ]", + StepStatus.RUNNING: "[>]", + StepStatus.SUCCESS: "[*]", + StepStatus.FAILED: "[!]", + StepStatus.SKIPPED: "[-]", +} + +_COPY_ICON = "⧉" +_COPIED_ICON = "✓" +_VISIBLE_LOG_LINES = 200 +_LOG_FLUSH_INTERVAL = 0.25 +_MAX_LOG_DRAIN_PER_FLUSH = 5000 +_MAX_ACTIVE_RENDER_LINES_PER_FLUSH = 80 + + +def _copy_button( + button_id: str, + tooltip: str, + *, + classes: str = "copy-icon-btn", + variant: str = "default", +) -> Button: + button = Button(_COPY_ICON, id=button_id, variant=variant, classes=classes) + button.tooltip = tooltip + return button + + +def _clean_dhcp_line(line: str) -> str: + """Trim a kea-dhcp4 log line to start at the DHCP4 message identifier.""" + idx = line.find("DHCP4") + return line[idx:] if idx >= 0 else line + + +def _clean_ztp_line(line: str) -> str: + """Extract the msg/message field from a JSON-structured ZTP log line.""" + idx = line.find("{") + if idx >= 0: + try: + data = json.loads(line[idx:]) + return str(data.get("msg") or data.get("message") or line) + except json.JSONDecodeError: + pass + return line + + +# Rough typical durations shown as hints for pending/running steps. +_TYPICAL: dict[str, str] = { + "parse-topology": "~5s", + "create-sim": "~10s", + "attach-cloud-init": "~5s", + "start-sim": "4-6m", + "create-ssh": "~15s", + "wait-setup": "3-5m", + "upload-files": "~5s", + "run-deploy": "~20m", + "post-deploy": "~1m", +} + + +def _fmt_duration(seconds: float) -> str: + m, s = divmod(int(seconds), 60) + return f"{m}:{s:02d}" + + +# ── Messages ────────────────────────────────────────────────────────────────── + + +class _StepUpdated(Message): + def __init__(self, step_id: str, status: StepStatus, message: str) -> None: + super().__init__() + self.step_id = step_id + self.status = status + self.message = message + + +class _LogLine(Message): + def __init__(self, line: str, stream: str = "deploy") -> None: + super().__init__() + self.line = line + self.stream = stream + + +class _SshReady(Message): + def __init__(self, host: str, port: int) -> None: + super().__init__() + self.host = host + self.port = port + + +class _DeployStarted(Message): + def __init__(self, host: str, port: int) -> None: + super().__init__() + self.host = host + self.port = port + + +class _BringupComplete(Message): + def __init__(self, success: bool, host: str, port: int) -> None: + super().__init__() + self.success = success + self.host = host + self.port = port + + +# ── Callback bridge ─────────────────────────────────────────────────────────── + + +class _TuiCallback(OrchestratorCallback): + def __init__(self, screen: LaunchScreen, log_file: IO[str] | None = None) -> None: + self._screen = screen + self._log_file = log_file + + def on_step(self, step_id: str, status: StepStatus, message: str = "") -> None: + self._screen.post_message(_StepUpdated(step_id, status, message)) + + def on_log(self, line: str) -> None: + stream = "deploy" + if line.startswith("[DHCP]"): + stream = "dhcp" + elif line.startswith("[ZTP]"): + stream = "ztp" + self._screen.enqueue_log_line(line, stream) + if self._log_file: + self._log_file.write(line + "\n") + self._log_file.flush() + + def on_ssh_ready(self, host: str, port: int) -> None: + self._screen.post_message(_SshReady(host, port)) + + def on_deploy_started(self, host: str, port: int) -> None: + self._screen.post_message(_DeployStarted(host, port)) + + def on_complete(self, success: bool, host: str = "", port: int = 0) -> None: + self._screen.post_message(_BringupComplete(success, host, port)) + + +# ── Step list widget ────────────────────────────────────────────────────────── + + +class _StepListWidget(Vertical): + """Left panel: deployment steps with live status icons and elapsed timing.""" + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._labels: dict[str, Static] = {} + self._statuses: dict[str, StepStatus] = {} + self._start_times: dict[str, float] = {} + self._durations: dict[str, float] = {} + self._running_step: str | None = None + + def compose(self) -> ComposeResult: + yield Label("Steps", classes="section-title") + yield Label("─" * 24, classes="section-divider") + for step_id, _label in STEPS: + w = Static(self._render_text(step_id, StepStatus.PENDING), id=f"step-{step_id}") + self._labels[step_id] = w + self._statuses[step_id] = StepStatus.PENDING + yield w + + def on_mount(self) -> None: + self.set_interval(1.0, self._tick) + + def _tick(self) -> None: + if self._running_step: + self._refresh(self._running_step) + + def update_step(self, step_id: str, status: StepStatus) -> None: + self._statuses[step_id] = status + if status == StepStatus.RUNNING: + self._start_times[step_id] = time.monotonic() + self._running_step = step_id + else: + if step_id in self._start_times and step_id not in self._durations: + self._durations[step_id] = time.monotonic() - self._start_times[step_id] + if self._running_step == step_id: + self._running_step = None + self._refresh(step_id) + + def _refresh(self, step_id: str) -> None: + widget = self._labels.get(step_id) + if widget: + status = self._statuses.get(step_id, StepStatus.PENDING) + widget.update(self._render_text(step_id, status)) + + def _render_text(self, step_id: str, status: StepStatus) -> str: + label = dict(STEPS).get(step_id, step_id) + icon = _STATUS_ICON[status] + + timing = "" + if status == StepStatus.RUNNING and step_id in self._start_times: + elapsed = time.monotonic() - self._start_times[step_id] + timing = f" {_fmt_duration(elapsed)}" + elif step_id in self._durations: + timing = f" {_fmt_duration(self._durations[step_id])}" + + hint = "" + if status in (StepStatus.PENDING, StepStatus.RUNNING) and step_id in _TYPICAL: + hint = f" [dim](~{_TYPICAL[step_id]})[/dim]" + + return f"{icon} {label}{timing}{hint}" + + +# ── Log widget with scroll-to-follow tracking ───────────────────────────────── + + +class _FollowLog(RichLog): + """Log widget that pauses auto-scroll when the user scrolls up.""" + + following: reactive[bool] = reactive(True) + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._line_count = 0 + self._scroll_pending = False + + @property + def line_count(self) -> int: + return self._line_count + + def clear(self) -> None: + self._line_count = 0 + self._scroll_pending = False + super().clear() + + def write_line(self, line: str) -> None: + self._line_count += 1 + super().write(line, scroll_end=False) + if self.following: + self._schedule_scroll_end() + + def write_lines(self, lines: list[str]) -> None: + if not lines: + return + self._line_count += len(lines) + super().write("\n".join(lines), scroll_end=False) + if self.following: + self._schedule_scroll_end() + + def replace_lines(self, lines: list[str]) -> None: + self._line_count = len(lines) + self._scroll_pending = False + super().clear() + if lines: + super().write("\n".join(lines), scroll_end=False) + self.follow_end() + + def follow_end(self) -> None: + """Resume following the newest log line.""" + self.auto_scroll = True + self.following = True + self._schedule_scroll_end() + + def _pause_following(self) -> None: + self.auto_scroll = False + self.following = False + + def _schedule_scroll_end(self) -> None: + if self._scroll_pending: + return + self._scroll_pending = True + + def _scroll() -> None: + self._scroll_pending = False + if self.following: + self.scroll_end(animate=False, immediate=True, x_axis=False) + + self.call_after_refresh(_scroll) + + def on_mouse_scroll_up(self, event: events.MouseScrollUp) -> None: + self._pause_following() + + def action_scroll_up(self) -> None: + self._pause_following() + super().action_scroll_up() + + def action_page_up(self) -> None: + self._pause_following() + super().action_page_up() + + def action_scroll_home(self) -> None: + self._pause_following() + super().action_scroll_home() + + def action_scroll_end(self) -> None: + self.follow_end() + super().action_scroll_end() + + def watch_scroll_y(self, old: float, new: float) -> None: + super().watch_scroll_y(old, new) + at_bottom = self.max_scroll_y <= 0 or new >= self.max_scroll_y - 1 + if at_bottom and not self.following: + self.follow_end() + elif not at_bottom and new < old - 0.5 and self.following: + self._pause_following() + + +# ── Tabbed log viewer ───────────────────────────────────────────────────────── + + +class _LogViewerWidget(Vertical): + """Log panel with phase-aware log tabs plus an access-details tab.""" + + DEFAULT_CSS = """ + _LogViewerWidget { height: 1fr; } + _LogViewerWidget #log-pane { height: 1fr; } + _LogViewerWidget #access-pane { + display: none; + height: 1fr; + overflow-y: auto; + } + _LogViewerWidget .log-output { height: 1fr; } + """ + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._buffers: dict[str, list[str]] = {} + self._active_tab = "deploy" + self._logs: dict[str, _FollowLog] = {} + self._log_ids: dict[str, str] = {"deploy": "log-output"} + self._rendered_lengths: dict[str, int] = {} + + def compose(self) -> ComposeResult: + yield Label("Output", classes="section-title") + with Horizontal(id="log-toolbar"): + yield Tabs( + Tab("Deploy Log", id="log-tab-deploy"), id="log-tabs", active="log-tab-deploy" + ) + yield _copy_button("log-copy", "Copy active log") + yield Button("↓ Follow", id="log-follow", variant="warning", classes="log-follow-btn") + with Container(id="log-pane"): + yield _FollowLog( + id="log-output", + classes="log-output", + highlight=False, + max_lines=_VISIBLE_LOG_LINES + 1, + wrap=True, + auto_scroll=True, + ) + with VerticalScroll(id="access-pane"): + yield Static("Access details will appear after deployment completes.") + + def on_mount(self) -> None: + log = self.query_one("#log-output", _FollowLog) + self._logs["deploy"] = log + self.watch(log, "following", self._on_following_changed) + self.query_one("#log-pane").display = True + self.query_one("#access-pane").display = False + + def _on_following_changed(self, following: bool) -> None: + self.query_one("#log-follow", Button).display = ( + self._active_tab != "access" and not self._active_log().following + ) + + def add_tab(self, tab_id: str, tab_label: str) -> None: + """Add a new log tab if it doesn't already exist.""" + tab_bar = self.query_one("#log-tabs", Tabs) + btn_id = f"log-tab-{tab_id}" + if tab_bar.query(f"#{btn_id}"): + return + tab_bar.add_tab(Tab(tab_label, id=btn_id)) + + def _active_log(self) -> _FollowLog: + return self._ensure_log(self._active_tab) + + def _ensure_log(self, tab_id: str) -> _FollowLog: + if tab_id in self._logs: + return self._logs[tab_id] + + log_id = f"log-output-{tab_id}" + log = _FollowLog( + id=log_id, + classes="log-output", + highlight=False, + max_lines=_VISIBLE_LOG_LINES + 1, + wrap=True, + auto_scroll=True, + ) + log.display = False + self._log_ids[tab_id] = log_id + self._logs[tab_id] = log + self.query_one("#log-pane", Container).mount(log) + self.watch(log, "following", self._on_following_changed) + return log + + def _shown_lines(self, buf: list[str]) -> list[str]: + lines = buf[-_VISIBLE_LOG_LINES:] + if len(buf) > _VISIBLE_LOG_LINES: + hidden = len(buf) - _VISIBLE_LOG_LINES + note = f"... {hidden} earlier lines not shown - use log clipboard for full content ..." + lines = [note, *lines] + return lines + + def _sync_log(self, tab_id: str) -> _FollowLog: + log = self._ensure_log(tab_id) + buf = self._buffers.get(tab_id, []) + if self._rendered_lengths.get(tab_id) != len(buf): + log.replace_lines(self._shown_lines(buf)) + self._rendered_lengths[tab_id] = len(buf) + return log + + def _activate_tab(self, tab_id: str) -> None: + self._active_tab = tab_id + tabs = self.query_one("#log-tabs", Tabs) + tab_widget_id = f"log-tab-{tab_id}" + if tabs.active != tab_widget_id and tabs.query(f"#{tab_widget_id}"): + tabs.active = tab_widget_id + self.query_one("#log-pane").display = tab_id != "access" + self.query_one("#access-pane").display = tab_id == "access" + self.query_one("#log-copy", Button).display = tab_id != "access" + if tab_id == "access": + self.query_one("#log-follow", Button).display = False + return + active_log = self._sync_log(tab_id) + for stream, log in self._logs.items(): + log.display = stream == tab_id + self.query_one("#log-follow", Button).display = not active_log.following + + def on_button_pressed(self, event: Button.Pressed) -> None: + bid = event.button.id or "" + if bid == "log-copy": + self._copy_log() + event.stop() + return + if bid == "log-follow": + self._active_log().follow_end() + event.stop() + return + + def on_tabs_tab_activated(self, event: Tabs.TabActivated) -> None: + if event.tabs.id != "log-tabs": + return + tab_id = (event.tab.id or "").removeprefix("log-tab-") + if not tab_id: + return + self._activate_tab(tab_id) + event.stop() + + def append_line(self, line: str, stream: str = "deploy") -> None: + """Buffer a line and write it to the Log widget if its tab is active.""" + self.append_lines([(line, stream)]) + + def append_lines(self, entries: list[tuple[str, str]]) -> None: + """Buffer log lines and render the active stream in one batch.""" + active_lines: list[str] = [] + for line, stream in entries: + if stream not in self._buffers: + self._buffers[stream] = [] + self._buffers[stream].append(line) + if stream == self._active_tab: + active_lines.append(line) + if active_lines: + if len(active_lines) > _MAX_ACTIVE_RENDER_LINES_PER_FLUSH: + skipped = len(active_lines) - _MAX_ACTIVE_RENDER_LINES_PER_FLUSH + active_lines = [ + f"... {skipped} log lines skipped in live view - use log clipboard for full content ...", + *active_lines[-_MAX_ACTIVE_RENDER_LINES_PER_FLUSH:], + ] + self._ensure_log(self._active_tab).write_lines(active_lines) + self._rendered_lengths[self._active_tab] = len(self._buffers[self._active_tab]) + + def _copy_log(self) -> None: + button = self.query_one("#log-copy", Button) + text = "\n".join(self._buffers.get(self._active_tab, [])) + self.app.copy_to_clipboard(text) + button.label = _COPIED_ICON + self.app.notify("Copied to clipboard") + self.set_timer(1.0, self._restore_copy_button) + + def _restore_copy_button(self) -> None: + button = self.query_one("#log-copy", Button) + if str(button.label) == _COPIED_ICON: + button.label = _COPY_ICON + + def set_access_widget(self, widget: _ProxyAccessWidget) -> None: + """Install access details and expose them as a first-class tab.""" + pane = self.query_one("#access-pane", VerticalScroll) + pane.remove_children() + pane.mount(widget) + self.add_tab("access", "Access") + self._activate_tab("access") + + +class _CopyCommandPanel(Container): + """Copyable command panel with a large click target.""" + + def __init__( + self, + title: str, + command: str, + button_id: str, + tooltip: str, + *, + panel_id: str, + command_id: str, + ) -> None: + super().__init__(id=panel_id, classes="copy-panel") + self._title = title + self._command = command + self._button_id = button_id + self._tooltip = tooltip + self._command_id = command_id + self.tooltip = tooltip + + def compose(self) -> ComposeResult: + with Horizontal(classes="copy-panel-header"): + yield Label(self._title, classes="copy-panel-title") + yield _copy_button(self._button_id, self._tooltip) + yield Static(self._command, id=self._command_id, classes="proxy-cmd") + + def on_click(self) -> None: + self._copy_command() + + def on_button_pressed(self, event: Button.Pressed) -> None: + if event.button.id == self._button_id: + self._copy_command() + event.stop() + + def _copy_command(self) -> None: + self.app.copy_to_clipboard(self._command) + button = self.query_one(f"#{self._button_id}", Button) + button.label = _COPIED_ICON + self.app.notify("Copied to clipboard") + self.set_timer(1.0, lambda: self._restore_copy_button(button)) + + def _restore_copy_button(self, button: Button) -> None: + if str(button.label) == _COPIED_ICON: + button.label = _COPY_ICON + + +class _SshCommandBar(Horizontal): + """Copyable SSH command strip shown once the AIR worker is reachable.""" + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._command = "" + self.tooltip = "Copy SSH command" + + def compose(self) -> ComposeResult: + yield Label("SSH", classes="ssh-badge") + yield Static("", id="ssh-cmd", classes="ssh-cmd") + yield _copy_button( + "copy-ssh", + "Copy SSH command", + classes="copy-icon-btn ssh-copy-btn", + ) + + def set_command(self, command: str) -> None: + self._command = command + self.query_one("#ssh-cmd", Static).update(command) + self.display = True + + def on_click(self, event: events.Click) -> None: + self._copy_command() + event.stop() + + def on_button_pressed(self, event: Button.Pressed) -> None: + if event.button.id == "copy-ssh": + self._copy_command() + event.stop() + + def _copy_command(self) -> None: + if not self._command: + return + self.app.copy_to_clipboard(self._command) + button = self.query_one("#copy-ssh", Button) + button.label = _COPIED_ICON + self.app.notify("Copied to clipboard") + self.set_timer(1.0, lambda: self._restore_copy_button(button)) + + def _restore_copy_button(self, button: Button) -> None: + if str(button.label) == _COPIED_ICON: + button.label = _COPY_ICON + + +# ── Proxy access widget ─────────────────────────────────────────────────────── + + +class _ProxyAccessWidget(Container): + """Shows per-platform SOCKS proxy commands and a Launch Browser button.""" + + def __init__(self, proxy: ProxyInfo, **kwargs: object) -> None: + super().__init__(**kwargs) + self._proxy = proxy + self._tunnel_proc: subprocess.Popen[bytes] | None = None + + def compose(self) -> ComposeResult: + p = self._proxy + + yield Label("Proxy Access", classes="subsection-label") + yield Label( + "Start the SOCKS tunnel, then open the browser with the proxy.", + classes="field-hint", + ) + + yield _CopyCommandPanel( + "Linux / macOS - SOCKS tunnel", + p.ssh_cmd_unix(), + "copy-ssh-unix", + "Copy Linux/macOS SOCKS tunnel", + panel_id="panel-ssh-unix", + command_id="cmd-ssh-unix", + ) + yield _CopyCommandPanel( + "Windows OpenSSH - SOCKS tunnel", + p.ssh_cmd_windows(), + "copy-ssh-win", + "Copy Windows SOCKS tunnel", + panel_id="panel-ssh-win", + command_id="cmd-ssh-win", + ) + yield _CopyCommandPanel( + "Linux / macOS - browser", + p.browser_cmd_unix(), + "copy-browser-unix", + "Copy Linux/macOS browser command", + panel_id="panel-browser-unix", + command_id="cmd-browser-unix", + ) + yield _CopyCommandPanel( + "Windows - browser", + p.browser_cmd_windows(), + "copy-browser-win", + "Copy Windows browser command", + panel_id="panel-browser-win", + command_id="cmd-browser-win", + ) + + with Horizontal(id="proxy-controls"): + yield Button("Launch Browser", id="btn-launch-browser", variant="primary") + yield Static("", id="proxy-status") + + def on_button_pressed(self, event: Button.Pressed) -> None: + btn = event.button.id or "" + if btn == "btn-launch-browser": + self._launch_browser() + event.stop() + + @work(thread=True) + def _launch_browser(self) -> None: + self.app.call_from_thread( + self.query_one("#proxy-status", Static).update, + "[yellow]Starting SOCKS tunnel...[/yellow]", + ) + proc = self._proxy.start_tunnel() + if proc is None: + self.app.call_from_thread( + self.query_one("#proxy-status", Static).update, + "[red]Could not start tunnel — run the SSH command manually.[/red]", + ) + return + self._tunnel_proc = proc + ok = self._proxy.launch_browser() + if ok: + self.app.call_from_thread( + self.query_one("#proxy-status", Static).update, + f"[green]Tunnel running (PID {proc.pid}). Browser launched.[/green]", + ) + else: + self.app.call_from_thread( + self.query_one("#proxy-status", Static).update, + f"[yellow]Tunnel running (PID {proc.pid}). " + "No browser found — use the commands above.[/yellow]", + ) + + def on_unmount(self) -> None: + if self._tunnel_proc and self._tunnel_proc.poll() is None: + self._tunnel_proc.terminate() + + +# ── Pod status widget ───────────────────────────────────────────────────────── + + +class _PodStatusWidget(Vertical): + """Polls `kubectl get pods -n nvcm` over SSH every 5 s and shows a DataTable.""" + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._host = "" + self._port = 0 + self._manager: object = None # AirSimulationManager, set at start_polling + self._stop = threading.Event() + self._polling = False + self._prov_polling = False + self._last_pod_rows: tuple[tuple[str, str, str, str, str], ...] = () + self._last_prov: tuple[int, int, tuple[str, ...]] = (0, 0, ()) + + def compose(self) -> ComposeResult: + yield Label("Pod Status", classes="section-title") + yield Label("─" * 30, classes="section-divider") + yield Static("Provisioned: —", id="prov-count") + yield Static("", id="prov-detail") + yield Label("─" * 30, classes="section-divider") + yield DataTable(id="pod-table", show_cursor=False) + + def on_mount(self) -> None: + table = self.query_one("#pod-table", DataTable) + table.add_columns("NAME", "READY", "STATUS", "RESTARTS", "AGE") + + def start_polling(self, host: str, port: int, manager: object) -> None: + self._host = host + self._port = port + self._manager = manager + self._stop.clear() + self._polling = False + self._prov_polling = False + self.set_interval(5.0, self._tick) + self._prov_tick() + self.set_interval(30.0, self._prov_tick) + + def stop_polling(self) -> None: + self._stop.set() + + def _tick(self) -> None: + if self._stop.is_set() or self._manager is None or self._polling: + return + self._do_refresh() + + def _prov_tick(self) -> None: + if self._stop.is_set() or self._manager is None or self._prov_polling: + return + self._do_prov_refresh() + + @work(thread=True, exclusive=False) + def _do_refresh(self) -> None: + self._polling = True + try: + if not isinstance(self._manager, AirSimulationManager): + raise TypeError("expected AirSimulationManager for self._manager") + pods = self._manager.get_pod_status(self._host, self._port) + self.app.call_from_thread(self._update_table, pods) + finally: + self._polling = False + + @work(thread=True, exclusive=False) + def _do_prov_refresh(self) -> None: + self._prov_polling = True + try: + if not isinstance(self._manager, AirSimulationManager): + raise TypeError("expected AirSimulationManager for self._manager") + prov, total, remaining = self._manager.get_provisioning_status(self._host, self._port) + self.app.call_from_thread(self._update_prov, prov, total, remaining) + finally: + self._prov_polling = False + + def _update_prov(self, prov: int, total: int, remaining: list[str]) -> None: + next_state = (prov, total, tuple(remaining)) + if next_state == self._last_prov: + return + self._last_prov = next_state + try: + self.query_one("#prov-count", Static).update( + f"Provisioned: {prov}/{total}" if total else "Provisioned: —" + ) + detail = "" + if remaining and total and prov < total: + detail = "Pending: " + ", ".join(remaining) + self.query_one("#prov-detail", Static).update(detail) + except Exception: + pass + + def _update_table(self, pods: list[dict[str, str]]) -> None: + rows = tuple( + ( + p["name"][:39] + "..." if len(p["name"]) > 42 else p["name"], + p["ready"], + p["status"], + p["restarts"], + p["age"], + ) + for p in pods + ) + if rows == self._last_pod_rows: + return + self._last_pod_rows = rows + try: + table = self.query_one("#pod-table", DataTable) + except Exception: + return + table.clear() + for row in rows: + table.add_row(*row) + + +# ── Launch screen ───────────────────────────────────────────────────────────── + + +class LaunchScreen(Container): + """Launch panel: summary, launch button, step list, and log stream.""" + + def __init__(self, config: SimConfig, **kwargs: object) -> None: + super().__init__(**kwargs) + self._config = config + self._bringup_running = False + self._host = "" + self._port = 0 + self._ssh_cmd_text = "" + self._monitor_stop = threading.Event() + self._deploy_log_path: Path | None = None + self._pending_log_lines: SimpleQueue[tuple[str, str]] = SimpleQueue() + self._log_flush_lock = threading.Lock() + self._log_flush_scheduled = False + + def compose(self) -> ComposeResult: + yield Label("Launch", classes="section-title") + yield Label("─" * 40, classes="section-divider") + + with Horizontal(id="launch-controls"): + yield Button("Launch Simulation", id="btn-launch", variant="success") + yield Static("", id="launch-status") + + yield Label("─" * 40, classes="section-divider") + + yield _SshCommandBar(id="ssh-info-bar", classes="ssh-info-bar") + + with Vertical(id="launch-dashboard"): + with Horizontal(id="dashboard-top"): + with VerticalScroll(id="step-panel"): + yield _StepListWidget(id="step-list") + yield _PodStatusWidget(id="pod-status-panel") + yield _LogViewerWidget(id="log-viewer") + + def on_button_pressed(self, event: Button.Pressed) -> None: + if event.button.id == "btn-launch" and not self._bringup_running: + self._start_bringup() + event.stop() + + def _set_status(self, markup: str) -> None: + self.query_one("#launch-status", Static).update(markup) + + def _show_ssh_command(self, ssh_cmd: str) -> None: + self._ssh_cmd_text = ssh_cmd + self.query_one("#ssh-info-bar", _SshCommandBar).set_command(ssh_cmd) + + def _start_bringup(self) -> None: + if self._config.run_mock_topology_job: + if not self._config.mock_blueprint: + self._set_status( + "[bold red][!] Mock blueprint required — set it on the Topology screen.[/bold red]" + ) + return + if not self._config.deployment_name: + self._set_status( + "[bold red][!] Deployment name required — set it on the Topology screen.[/bold red]" + ) + return + if not self._config.mock_topology_path: + self._set_status( + "[bold red][!] Mock topology path required — set it on the Topology screen.[/bold red]" + ) + return + elif not self._config.topology_path: + self._set_status( + "[bold red][!] No topology file — set one on the Topology screen.[/bold red]" + ) + return + if not self._config.ngc_api_key: + self._set_status( + "[bold red][!] NGC API key required — set it on the Options screen.[/bold red]" + ) + return + + stamp = datetime.now().strftime("%Y%m%d-%H%M%S") + self._deploy_log_path = Path(tempfile.gettempdir()) / f"nvcm-deploy-{stamp}.log" + self._bringup_running = True + self._monitor_stop.clear() + self.query_one("#btn-launch", Button).disabled = True + self._set_status(f"[yellow]Running... log → {self._deploy_log_path}[/yellow]") + self._run_orchestrator() + + @work(thread=True, exclusive=False) + def _run_orchestrator(self) -> None: + log_path = self._deploy_log_path + with open(log_path if log_path else os.devnull, "w") as lf: + cb = _TuiCallback(self, log_file=lf) + orchestrator = SimOrchestrator(self._config, cb) + orchestrator.run() + + @work(thread=True, exclusive=False) + def _run_monitoring(self, host: str, port: int) -> None: + manager = AirSimulationManager( + ngc_api_key=self._config.ngc_api_key, + use_internal=self._config.use_internal, + org_id=self._config.org_id, + ) + + class _Fwd(logging.Handler): + def __init__(self, screen: LaunchScreen) -> None: + super().__init__() + self._s = screen + + def emit(self, record: logging.LogRecord) -> None: + line = self.format(record) + stream = "deploy" + if "[DHCP]" in line: + stream = "dhcp" + line = _clean_dhcp_line(line) + elif "[ZTP]" in line: + stream = "ztp" + line = _clean_ztp_line(line) + self._s.enqueue_log_line(line, stream) + + pkg_logger = logging.getLogger("nv_config_manager_installer.air_sim") + prev_level = pkg_logger.level + fwd = _Fwd(self) + fwd.setFormatter(logging.Formatter("%(asctime)s %(message)s", datefmt="%H:%M:%S")) + pkg_logger.addHandler(fwd) + pkg_logger.setLevel(logging.DEBUG) + try: + manager.monitor_services(host, port, stop_event=self._monitor_stop) + finally: + pkg_logger.removeHandler(fwd) + pkg_logger.setLevel(prev_level) + + def on_worker_state_changed(self, event: Worker.StateChanged) -> None: + if event.worker.name == "_run_orchestrator" and event.state == WorkerState.ERROR: + self._bringup_running = False + self.query_one("#btn-launch", Button).disabled = False + self._set_status( + "[bold red][!] Worker crashed — check the Textual log for details.[/bold red]" + ) + + def on__step_updated(self, event: _StepUpdated) -> None: + self.query_one("#step-list", _StepListWidget).update_step(event.step_id, event.status) + + def on__log_line(self, event: _LogLine) -> None: + self.enqueue_log_line(event.line, event.stream) + + def enqueue_log_line(self, line: str, stream: str = "deploy") -> None: + """Queue a log line from any thread and batch UI refresh work.""" + self._pending_log_lines.put((line, stream)) + self._schedule_log_flush() + + def _schedule_log_flush(self) -> None: + with self._log_flush_lock: + if self._log_flush_scheduled: + return + self._log_flush_scheduled = True + + def schedule() -> None: + self.set_timer(_LOG_FLUSH_INTERVAL, self._flush_log_lines) + + if threading.current_thread() is threading.main_thread(): + schedule() + else: + self.app.call_from_thread(schedule) + + def _flush_log_lines(self) -> None: + with self._log_flush_lock: + self._log_flush_scheduled = False + + try: + viewer = self.query_one("#log-viewer", _LogViewerWidget) + except Exception: + return + + batch: list[tuple[str, str]] = [] + processed = 0 + while processed < _MAX_LOG_DRAIN_PER_FLUSH: + try: + line, stream = self._pending_log_lines.get_nowait() + except Empty: + break + batch.append((line, stream)) + processed += 1 + + if batch: + viewer.append_lines(batch) + + if not self._pending_log_lines.empty(): + self._schedule_log_flush() + + def on__ssh_ready(self, event: _SshReady) -> None: + self._host = event.host + self._port = event.port + ssh_cmd = ( + f"sshpass -p {NVCM_BOX_PASSWORD} ssh" + f" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + f" -o PreferredAuthentications=password" + f" -p {event.port} {NVCM_BOX_USER}@{event.host}" + ) + self._show_ssh_command(ssh_cmd) + manager = AirSimulationManager( + ngc_api_key=self._config.ngc_api_key, + use_internal=self._config.use_internal, + org_id=self._config.org_id, + ) + self.query_one("#pod-status-panel", _PodStatusWidget).start_polling( + event.host, event.port, manager + ) + + def on__deploy_started(self, event: _DeployStarted) -> None: + pass + + def on__bringup_complete(self, event: _BringupComplete) -> None: + self._bringup_running = False + self.query_one("#btn-launch", Button).disabled = False + if event.success: + self._host = event.host + self._port = event.port + self._set_status("[bold green][*] Bringup complete![/bold green]") + self.app.notify("Simulation bringup complete!", severity="information") + viewer = self.query_one("#log-viewer", _LogViewerWidget) + viewer.add_tab("dhcp", "DHCP") + viewer.add_tab("ztp", "ZTP") + if event.host: + self._show_proxy_panel(event.host, event.port) + self._run_monitoring(event.host, event.port) + else: + self._set_status("[bold red][!] Bringup failed — check the log above[/bold red]") + self.app.notify("Bringup failed. See log for details.", severity="error") + + def _show_proxy_panel(self, host: str, port: int) -> None: + proxy = ProxyInfo(host=host, port=port) + widget = _ProxyAccessWidget(proxy, id="proxy-access") + self.query_one("#log-viewer", _LogViewerWidget).set_access_widget(widget) + + def on_unmount(self) -> None: + self._monitor_stop.set() + try: + self.query_one("#pod-status-panel", _PodStatusWidget).stop_polling() + except Exception: + pass + + def get_status(self, config: SimConfig) -> str: + if self._bringup_running: + return "[>]" + if self._host: + return "[*]" + return "[ ]" diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/screens/options.py b/installer/src/nv_config_manager_installer/tui/air_sim/screens/options.py new file mode 100644 index 0000000..1e5d0ff --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/screens/options.py @@ -0,0 +1,188 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Options screen - auto-configure settings, size, nvcm branch, etc.""" + +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Container, Vertical +from textual.widgets import Input, Label, RadioButton, RadioSet, Static + +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.widgets import LabeledSwitch + +_SIZES = ["small", "medium", "large"] + + +class OptionsScreen(Container): + """Configure deployment options: auto-configure, size, branch, tokens.""" + + def __init__(self, config: SimConfig, **kwargs: object) -> None: + super().__init__(**kwargs) + self._config = config + + def compose(self) -> ComposeResult: + yield Label("Options", classes="section-title") + yield Label("─" * 40, classes="section-divider") + + yield Label("AIR / Auth", classes="subsection-label") + yield Label("NGC API Key (or set NGC_API_KEY env var)", classes="field-label") + yield Input( + value=self._config.ngc_api_key, + password=True, + placeholder="nvapi-...", + id="ngc-api-key", + ) + yield LabeledSwitch( + "Use Public Air", + value=not self._config.use_internal, + id="use-public-air", + ) + + yield Label("─" * 40, classes="section-divider") + yield Label("Auto-Configure", classes="subsection-label") + yield LabeledSwitch( + "Auto-configure server on boot (attach cloud-init)", + value=self._config.auto_configure, + id="auto-configure", + ) + with Vertical(id="autocfg-fields"): + yield Label( + "Git Token (optional; only needed for private forks)", + classes="field-label", + ) + yield Input( + value=self._config.git_token, + password=True, + placeholder="token for a private fork", + id="git-token", + ) + yield Label("nv-config-manager repo URL", classes="field-label") + yield Input(value=self._config.config_manager_repo, id="config-manager-repo") + + yield Label("─" * 40, classes="section-divider") + yield Label("Deployment", classes="subsection-label") + yield Label("nv-config-manager Git Ref", classes="field-label") + yield Input(value=self._config.config_manager_ref, id="config-manager-ref") + yield Static("", id="build-mode-hint", classes="field-hint") + yield Label( + "Cumulus Version Override (leave blank to use topology values)", + classes="field-label", + ) + yield Input( + value=self._config.cumulus_version, + placeholder="5.16.1", + id="cumulus-version", + ) + + yield Label("Deployment Size", classes="field-label") + with RadioSet(id="size-radio"): + for s in _SIZES: + yield RadioButton(s, id=f"size-{s}", value=self._config.size == s) + + yield LabeledSwitch( + "Run nv-config-manager-installer deploy after setup", + value=self._config.deploy, + id="deploy", + ) + + yield Label("─" * 40, classes="section-divider") + yield Label("Advanced", classes="subsection-label") + yield LabeledSwitch( + "Disable aggressive DHCP tuning on Cumulus switches", + value=self._config.no_aggressive_dhcp, + id="no-aggressive-dhcp", + ) + yield LabeledSwitch( + "Skip reset of Cumulus nodes before DHCP refresh", + value=self._config.no_reset_before_dhcp, + id="no-reset-dhcp", + ) + yield Label("Cloud-init Wait Timeout (seconds)", classes="field-label") + yield Input(value=str(self._config.wait_timeout), id="wait-timeout") + yield Label("Deploy Timeout (seconds)", classes="field-label") + yield Input(value=str(self._config.deploy_timeout), id="deploy-timeout") + + def on_mount(self) -> None: + self._update_autocfg_fields() + self._update_build_mode_hint() + + def on_input_changed(self, event: Input.Changed) -> None: + if event.input.id == "config-manager-ref": + self._update_build_mode_hint() + + def on_labeled_switch_changed(self, event: LabeledSwitch.Changed) -> None: + if event.labeled_switch.id == "auto-configure": + self._update_autocfg_fields() + + def _update_build_mode_hint(self) -> None: + branch = self.query_one("#config-manager-ref", Input).value.strip() + hint = self.query_one("#build-mode-hint", Static) + ref = branch or "main" + hint.update( + f"[dim]Images will be built locally from nv-config-manager ref {ref!r}; " + "registry pulls are disabled for AIR demos.[/dim]" + ) + + def _update_autocfg_fields(self) -> None: + enabled = self.query_one("#auto-configure", LabeledSwitch).value + self.query_one("#autocfg-fields").display = enabled + + def write_to_config(self, config: SimConfig) -> None: + config.ngc_api_key = self.query_one("#ngc-api-key", Input).value.strip() + config.use_internal = not self.query_one("#use-public-air", LabeledSwitch).value + config.auto_configure = self.query_one("#auto-configure", LabeledSwitch).value + config.git_token = self.query_one("#git-token", Input).value.strip() + config.config_manager_repo = self.query_one("#config-manager-repo", Input).value.strip() + config.config_manager_ref = self.query_one("#config-manager-ref", Input).value.strip() + config.cumulus_version = self.query_one("#cumulus-version", Input).value.strip() + for s in _SIZES: + if self.query_one(f"#size-{s}", RadioButton).value: + config.size = s + break + config.deploy = self.query_one("#deploy", LabeledSwitch).value + config.no_aggressive_dhcp = self.query_one("#no-aggressive-dhcp", LabeledSwitch).value + config.no_reset_before_dhcp = self.query_one("#no-reset-dhcp", LabeledSwitch).value + try: + config.wait_timeout = int(self.query_one("#wait-timeout", Input).value) + except ValueError: + self.app.notify("Invalid wait timeout - must be an integer", severity="warning") + try: + config.deploy_timeout = int(self.query_one("#deploy-timeout", Input).value) + except ValueError: + self.app.notify("Invalid deploy timeout - must be an integer", severity="warning") + + def sync_from_config(self, config: SimConfig) -> None: + self.query_one("#ngc-api-key", Input).value = config.ngc_api_key + self.query_one("#use-public-air", LabeledSwitch).value = not config.use_internal + self.query_one("#auto-configure", LabeledSwitch).value = config.auto_configure + self.query_one("#git-token", Input).value = config.git_token + self.query_one("#config-manager-repo", Input).value = config.config_manager_repo + self.query_one("#config-manager-ref", Input).value = config.config_manager_ref + self.query_one("#cumulus-version", Input).value = config.cumulus_version + for s in _SIZES: + self.query_one(f"#size-{s}", RadioButton).value = config.size == s + self.query_one("#deploy", LabeledSwitch).value = config.deploy + self.query_one("#no-aggressive-dhcp", LabeledSwitch).value = config.no_aggressive_dhcp + self.query_one("#no-reset-dhcp", LabeledSwitch).value = config.no_reset_before_dhcp + self.query_one("#wait-timeout", Input).value = str(config.wait_timeout) + self.query_one("#deploy-timeout", Input).value = str(config.deploy_timeout) + self._update_autocfg_fields() + self._update_build_mode_hint() + + def get_status(self, config: SimConfig) -> str: + if not config.ngc_api_key: + return "[!]" + return "[*]" diff --git a/installer/src/nv_config_manager_installer/tui/air_sim/screens/topology.py b/installer/src/nv_config_manager_installer/tui/air_sim/screens/topology.py new file mode 100644 index 0000000..5940f2e --- /dev/null +++ b/installer/src/nv_config_manager_installer/tui/air_sim/screens/topology.py @@ -0,0 +1,298 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Topology screen - pick topology YAML and simulation name.""" + +from __future__ import annotations + +from textual import work +from textual.app import ComposeResult +from textual.containers import Container, Horizontal, Vertical +from textual.css.query import NoMatches +from textual.widgets import Button, Input, Label, RadioButton, RadioSet, Select +from textual_fspicker import FileOpen, Filters, SelectDirectory + +from nv_config_manager_installer.air_sim.prebuilt_configs import ( + PREBUILT_CONFIGS, + load_prebuilt_config, +) +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.widgets import LabeledSwitch + + +class TopologyScreen(Container): + """Select the topology YAML file, name, and server attachment mode.""" + + def __init__(self, config: SimConfig, **kwargs: object) -> None: + super().__init__(**kwargs) + self._config = config + self._syncing = False + + def compose(self) -> ComposeResult: + yield Label("Topology", classes="section-title") + yield Label("─" * 40, classes="section-divider") + + yield Label("Pre-built Config", classes="field-label") + yield Select( + [(config.label, config.id) for config in PREBUILT_CONFIGS], + prompt="Custom / manual", + allow_blank=True, + id="prebuilt-config", + ) + yield Label( + "Selecting a preset replaces the wizard fields but keeps your save file unchanged.", + classes="field-hint", + ) + + yield Label("─" * 40, classes="section-divider") + yield Label("Mock Topology", classes="subsection-label") + yield LabeledSwitch( + "Build AIR topology from development/mock_topology context", + value=self._config.run_mock_topology_job, + id="run-mock-topology-job", + ) + with Vertical(id="mock-topology-fields"): + yield Label("Blueprint", classes="field-label") + yield Input( + value=self._config.mock_blueprint, + placeholder="air_superpod", + id="mock-blueprint", + ) + yield Label("Deployment Name", classes="field-label") + yield Input( + value=self._config.deployment_name, + placeholder="demo", + id="deployment-name", + ) + yield Label("Mock Topology Path", classes="field-label") + yield Input( + value=self._config.mock_topology_path, + placeholder="development/mock_topology", + id="mock-topology-path", + ) + + with Vertical(id="direct-topology-fields"): + yield Label("AIR Topology YAML", classes="field-label") + with Horizontal(classes="field-row"): + yield Input( + value=self._config.topology_path, + placeholder="samples/custom_air_topology.yaml", + id="topology-path", + ) + yield Button("Browse", id="browse-topology", variant="default") + + yield Label("─" * 40, classes="section-divider") + yield Label("Template Plugins", classes="subsection-label") + yield Label( + "Paths to template plugin directories or .tar.gz files paired with this topology", + classes="field-hint", + ) + yield Button("+ Add Template Plugin", id="add-template-plugin", classes="add-button") + yield Vertical(id="template-plugin-list") + + yield Label("Simulation Name (leave blank to auto-generate)", classes="field-label") + yield Input( + value=self._config.simulation_name, + placeholder="NVCM-E2E-SUPERPOD-DEMO", + id="sim-name", + ) + + yield Label("OOB Management Server Name", classes="field-label") + yield Input( + value=self._config.oob_server_name, + id="oob-server-name", + ) + + yield Label("─" * 40, classes="section-divider") + yield Label("Server Mode", classes="field-label") + with RadioSet(id="server-mode"): + yield RadioButton( + "Use existing server (e.g. oob-mgmt-server)", + id="mode-existing", + value=self._config.server_mode == "use-existing", + ) + yield RadioButton( + "Create new server node attached to a switch", + id="mode-create", + value=self._config.server_mode == "create-new", + ) + + with Vertical(id="attach-fields"): + yield Label("Switch Name", classes="field-label") + yield Input( + value=self._config.attach_switch, + placeholder="leaf1-gp1-smn1-hfa01", + id="attach-switch", + ) + yield Label("Switch Interface", classes="field-label") + yield Input( + value=self._config.attach_interface, + placeholder="swp48", + id="attach-interface", + ) + + def on_mount(self) -> None: + self._update_attach_fields() + self._update_topology_fields() + self._rebuild_template_plugins() + + def on_radio_set_changed(self, event: RadioSet.Changed) -> None: + if event.radio_set.id == "server-mode": + self._update_attach_fields() + + def on_labeled_switch_changed(self, event: LabeledSwitch.Changed) -> None: + if event.labeled_switch.id == "run-mock-topology-job": + self._update_topology_fields() + + def on_select_changed(self, event: Select.Changed) -> None: + if event.select.id != "prebuilt-config" or self._syncing: + return + if event.value == Select.BLANK: + return + config = load_prebuilt_config(str(event.value)) + app = self.app + if hasattr(app, "apply_prebuilt_config"): + app.apply_prebuilt_config(config) + self.app.notify("Loaded pre-built config") + + def _update_attach_fields(self) -> None: + mode = "create-new" if self.query_one("#mode-create", RadioButton).value else "use-existing" + self.query_one("#attach-fields").display = mode == "create-new" + + def _update_topology_fields(self) -> None: + use_mock = self.query_one("#run-mock-topology-job", LabeledSwitch).value + self.query_one("#mock-topology-fields").display = use_mock + self.query_one("#direct-topology-fields").display = not use_mock + + def on_button_pressed(self, event: Button.Pressed) -> None: + button_id = event.button.id or "" + if button_id == "browse-topology": + self._pick_topology() + elif button_id == "add-template-plugin": + self._collect_template_plugins() + self._pick_template_plugin() + elif button_id.startswith("template-plugin-") and button_id.endswith("-remove"): + self._collect_template_plugins() + try: + idx = int(button_id.split("-")[2]) + except (ValueError, IndexError): + return + if 0 <= idx < len(self._config.template_plugin_paths): + self._config.template_plugin_paths.pop(idx) + self._rebuild_template_plugins() + + @work + async def _pick_topology(self) -> None: + picked = await self.app.push_screen_wait( + FileOpen( + title="Select Topology YAML", + filters=Filters(("YAML", lambda p: p.suffix in {".yaml", ".yml"})), + ) + ) + if picked: + self.query_one("#topology-path", Input).value = str(picked) + + def _rebuild_template_plugins(self) -> None: + container = self.query_one("#template-plugin-list", Vertical) + container.remove_children() + for idx, path in enumerate(self._config.template_plugin_paths): + row = Container(classes="template-plugin-row") + row.compose_add_child( + Input( + value=path, + placeholder="development/air_sim/template_plugins/my-plugin", + id=f"template-plugin-{idx}-path", + ) + ) + row.compose_add_child( + Button( + "Remove", + variant="error", + id=f"template-plugin-{idx}-remove", + classes="remove-button", + ) + ) + container.mount(row) + + @work + async def _pick_template_plugin(self) -> None: + picked = await self.app.push_screen_wait( + SelectDirectory(title="Select template plugin directory") + ) + if picked is None: + return + self._collect_template_plugins() + self._config.template_plugin_paths.append(str(picked)) + self._rebuild_template_plugins() + + def _collect_template_plugins(self) -> None: + paths: list[str] = [] + for idx in range(len(self._config.template_plugin_paths)): + try: + value = self.query_one(f"#template-plugin-{idx}-path", Input).value.strip() + except NoMatches: + break + if value: + paths.append(value) + self._config.template_plugin_paths = paths + + def write_to_config(self, config: SimConfig) -> None: + self._collect_template_plugins() + config.topology_path = self.query_one("#topology-path", Input).value.strip() + config.run_mock_topology_job = self.query_one("#run-mock-topology-job", LabeledSwitch).value + config.mock_blueprint = self.query_one("#mock-blueprint", Input).value.strip() + config.deployment_name = self.query_one("#deployment-name", Input).value.strip() + config.mock_topology_path = self.query_one("#mock-topology-path", Input).value.strip() + config.template_plugin_paths = list(self._config.template_plugin_paths) + config.simulation_name = self.query_one("#sim-name", Input).value.strip() + config.oob_server_name = self.query_one("#oob-server-name", Input).value.strip() + config.server_mode = ( + "create-new" if self.query_one("#mode-create", RadioButton).value else "use-existing" + ) + config.attach_switch = self.query_one("#attach-switch", Input).value.strip() + config.attach_interface = self.query_one("#attach-interface", Input).value.strip() + + def sync_from_config(self, config: SimConfig) -> None: + self._syncing = True + try: + self.query_one("#topology-path", Input).value = config.topology_path + self.query_one( + "#run-mock-topology-job", LabeledSwitch + ).value = config.run_mock_topology_job + self.query_one("#mock-blueprint", Input).value = config.mock_blueprint + self.query_one("#deployment-name", Input).value = config.deployment_name + self.query_one("#mock-topology-path", Input).value = config.mock_topology_path + self._config.template_plugin_paths = list(config.template_plugin_paths) + self._rebuild_template_plugins() + self.query_one("#sim-name", Input).value = config.simulation_name + self.query_one("#oob-server-name", Input).value = config.oob_server_name + self.query_one("#mode-existing", RadioButton).value = ( + config.server_mode == "use-existing" + ) + self.query_one("#mode-create", RadioButton).value = config.server_mode == "create-new" + self.query_one("#attach-switch", Input).value = config.attach_switch + self.query_one("#attach-interface", Input).value = config.attach_interface + finally: + self._syncing = False + self._update_attach_fields() + self._update_topology_fields() + + def get_status(self, config: SimConfig) -> str: + if config.run_mock_topology_job: + if config.mock_blueprint and config.deployment_name and config.mock_topology_path: + return "[*]" + return "[!]" + if config.topology_path: + return "[*]" + return "[!]" diff --git a/installer/src/nv_config_manager_installer/tui/screens/vault.py b/installer/src/nv_config_manager_installer/tui/screens/vault.py index ed3b1ed..ee3cff5 100644 --- a/installer/src/nv_config_manager_installer/tui/screens/vault.py +++ b/installer/src/nv_config_manager_installer/tui/screens/vault.py @@ -42,7 +42,6 @@ ("nautobot_app", "Nautobot App (admin/django)"), ("oidc", "OIDC / SSO"), ("slack", "Slack"), - ("air", "AIR"), ("jira", "Jira"), ("cnpg_backup", "CNPG Backup S3"), ] @@ -113,15 +112,6 @@ ("token", "Bot Token"), ], ), - ( - "air", - "AIR", - True, - [ - ("ssaClientId", "SSA Client ID"), - ("ssaClientSecret", "SSA Client Secret"), - ], - ), ( "jira", "Jira", @@ -519,7 +509,7 @@ def compose(self) -> ComposeResult: yield Label("─" * 40, classes="section-divider") yield Label( "Leave any field empty to auto-generate a password at deploy time. " - "Enable optional integrations (Slack, AIR, UFM, Jira, CNPG Backup) to configure their credentials." + "Enable optional integrations (Slack, Jira, CNPG Backup) to configure their credentials." ) for field_name, label, optional, keys in _K8S_GROUPS: grp: K8sSecretGroup = getattr(s.k8s, field_name) diff --git a/installer/tests/__init__.py b/installer/tests/__init__.py index e69de29..bd904e8 100644 --- a/installer/tests/__init__.py +++ b/installer/tests/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/installer/tests/air_sim/test_context_topology.py b/installer/tests/air_sim/test_context_topology.py new file mode 100644 index 0000000..7803498 --- /dev/null +++ b/installer/tests/air_sim/test_context_topology.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for AIR topology generation from mock_topology context.""" + +from __future__ import annotations + +from typing import Any + +from nv_config_manager_installer.air_sim.context_topology import ( + build_site_design_from_mock_context, +) + + +def _devices_by_name(site_design: dict[str, Any]) -> dict[str, dict[str, Any]]: + return {device["name"]: device for device in site_design["devices"]} + + +def test_air_trial_builds_devices_and_explicit_eth0_macs() -> None: + site_design = build_site_design_from_mock_context("air_trial", "demo") + + devices = _devices_by_name(site_design) + assert "oob-mgmt-server" in devices + assert len(devices) >= 6 + + cumulus_names = { + device["name"] + for device in site_design["devices"] + if "Cumulus" in device.get("platform", "") + } + assert cumulus_names + + eth0_macs = { + interface["device"]: interface.get("mac_address") + for interface in site_design["interfaces"] + if interface["device"] in cumulus_names and interface["name"] == "eth0" + } + assert set(eth0_macs) == cumulus_names + assert all(isinstance(mac, str) and mac for mac in eth0_macs.values()) + + +def test_air_superpod_builds_non_empty_topology_source() -> None: + site_design = build_site_design_from_mock_context("air_superpod", "demo") + + assert site_design["devices"] + assert site_design["interfaces"] + assert site_design["ip_addresses"] + assert site_design["cabling_assignments"]["connections"] + assert "oob-mgmt-server" in _devices_by_name(site_design) diff --git a/installer/tests/air_sim/test_installer_config.py b/installer/tests/air_sim/test_installer_config.py new file mode 100644 index 0000000..fab2528 --- /dev/null +++ b/installer/tests/air_sim/test_installer_config.py @@ -0,0 +1,141 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for AIR sim installer config generation.""" + +from __future__ import annotations + +import json + +from nv_config_manager_installer.air_sim.constants import ( + CONFIG_MANAGER_REMOTE_DIR, + DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH, + PROJECT_ROOT, +) +from nv_config_manager_installer.air_sim.installer_config import ( + build_content_jobs, + build_template_plugins, + generate_air_sim_install_config, +) +from nv_config_manager_installer.air_sim.prebuilt_configs import load_prebuilt_config +from nv_config_manager_installer.air_sim.sim_config import SimConfig + + +def test_install_config_uses_mock_topology_without_ingest_or_template_plugin_paths() -> None: + cfg = SimConfig( + mock_blueprint="air_trial", + deployment_name="demo", + mock_topology_path="development/mock_topology", + ) + + install_config = generate_air_sim_install_config( + cfg, + site_name="air-demo", + lb_allowed_prefixes=["172.18.255.0/24"], + ) + + content = install_config["content"] + assert content["template_plugins"] == [] + assert content["jobs"] == [{"path": f"{CONFIG_MANAGER_REMOTE_DIR}/development/mock_topology"}] + assert content["run_after_deploy"] == [ + { + "job": "mock_topology.jobs.mock_topology_design.MockTopologyDesign", + "input": json.dumps({"blueprint": "air_trial", "deployment_name": "demo"}), + } + ] + assert "ingest" not in json.dumps(content).lower() + assert "superpod-template" not in json.dumps(content).lower() + + +def test_build_content_jobs_appends_extra_jobs() -> None: + cfg = SimConfig( + run_mock_topology_job=True, + mock_blueprint="air_superpod", + deployment_name="demo", + mock_topology_path="development/mock_topology", + extra_job_paths=["development/custom_jobs"], + extra_run_after_deploy=[{"job": "custom.jobs.RunDemo", "input": {"name": "demo"}}], + ) + + jobs, run_after_deploy = build_content_jobs(cfg) + + assert jobs == [ + {"path": f"{CONFIG_MANAGER_REMOTE_DIR}/development/mock_topology"}, + {"path": f"{CONFIG_MANAGER_REMOTE_DIR}/development/custom_jobs"}, + ] + assert run_after_deploy[0]["job"] == ( + "mock_topology.jobs.mock_topology_design.MockTopologyDesign" + ) + assert run_after_deploy[1] == { + "job": "custom.jobs.RunDemo", + "input": json.dumps({"name": "demo"}), + } + + +def test_custom_jobs_do_not_infer_mock_topology() -> None: + cfg = SimConfig( + run_mock_topology_job=False, + extra_job_paths=["/opt/custom/jobs"], + extra_run_after_deploy=[{"job": "custom.jobs.RunDemo", "input": ""}], + ) + + jobs, run_after_deploy = build_content_jobs(cfg) + + assert jobs == [{"path": "/opt/custom/jobs"}] + assert run_after_deploy == [{"job": "custom.jobs.RunDemo", "input": ""}] + + +def test_template_plugin_paths_are_included_without_generation() -> None: + cfg = SimConfig( + template_plugin_paths=[ + "development/template_plugins/demo", + "/opt/external/template-plugin.tar.gz", + ] + ) + + assert build_template_plugins(cfg) == [ + {"path": f"{CONFIG_MANAGER_REMOTE_DIR}/development/template_plugins/demo"}, + {"path": "/opt/external/template-plugin.tar.gz"}, + ] + + install_config = generate_air_sim_install_config( + cfg, + site_name="air-demo", + lb_allowed_prefixes=["172.18.255.0/24"], + ) + assert install_config["content"]["template_plugins"] == [ + {"path": f"{CONFIG_MANAGER_REMOTE_DIR}/development/template_plugins/demo"}, + {"path": "/opt/external/template-plugin.tar.gz"}, + ] + + +def test_prebuilt_demos_include_static_template_plugin() -> None: + expected = DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH.as_posix() + + for config_id in ("air-trial", "superpod"): + cfg = load_prebuilt_config(config_id) + assert cfg.template_plugin_paths == [expected] + assert build_template_plugins(cfg) == [{"path": f"{CONFIG_MANAGER_REMOTE_DIR}/{expected}"}] + + +def test_demo_template_plugin_is_static_and_public_named() -> None: + plugin_dir = PROJECT_ROOT / DEFAULT_AIR_DEMO_TEMPLATE_PLUGIN_PATH + + assert (plugin_dir / "pyproject.toml").is_file() + assert not (plugin_dir / "scripts").exists() + + plugin_text = "\n".join(path.read_text() for path in plugin_dir.rglob("*") if path.is_file()) + assert "generate_template_plugin" not in plugin_text + assert "kiwi" not in plugin_text.lower() + assert 'dhcp_servers("nvcm", true)' in plugin_text diff --git a/installer/tests/air_sim/test_orchestrator.py b/installer/tests/air_sim/test_orchestrator.py new file mode 100644 index 0000000..7e6eebe --- /dev/null +++ b/installer/tests/air_sim/test_orchestrator.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for AIR sim orchestrator topology resolution.""" + +from __future__ import annotations + +import pytest + +from nv_config_manager_installer.air_sim.orchestrator import SimOrchestrator +from nv_config_manager_installer.air_sim.sim_config import SimConfig + + +class _Callback: + def on_step(self, step_id, status, message=""): + pass + + def on_log(self, line): + pass + + def on_ssh_ready(self, host, port): + pass + + def on_deploy_started(self, host, port): + pass + + def on_complete(self, success, host="", port=0): + pass + + +def test_resolve_topology_prefers_direct_path() -> None: + cfg = SimConfig(topology_path="/tmp/direct.yaml", run_mock_topology_job=True) + orchestrator = SimOrchestrator(cfg, _Callback()) + + assert orchestrator._resolve_topology_path(cfg) == "/tmp/direct.yaml" + + +def test_resolve_topology_generates_from_mock_context(monkeypatch: pytest.MonkeyPatch) -> None: + calls = [] + + def fake_write_site_design_from_mock_context(blueprint: str, deployment_name: str) -> str: + calls.append((blueprint, deployment_name)) + return "/tmp/generated.yaml" + + monkeypatch.setattr( + "nv_config_manager_installer.air_sim.orchestrator.write_site_design_from_mock_context", + fake_write_site_design_from_mock_context, + ) + cfg = SimConfig( + topology_path="", + run_mock_topology_job=True, + mock_blueprint="air_trial", + deployment_name="demo", + ) + orchestrator = SimOrchestrator(cfg, _Callback()) + + assert orchestrator._resolve_topology_path(cfg) == "/tmp/generated.yaml" + assert calls == [("air_trial", "demo")] + + +def test_resolve_topology_requires_direct_path_for_custom_flows() -> None: + cfg = SimConfig(topology_path="", run_mock_topology_job=False) + orchestrator = SimOrchestrator(cfg, _Callback()) + + with pytest.raises(RuntimeError, match="topology_path is required"): + orchestrator._resolve_topology_path(cfg) diff --git a/installer/tests/air_sim/test_tui_launch.py b/installer/tests/air_sim/test_tui_launch.py new file mode 100644 index 0000000..8f7ca1e --- /dev/null +++ b/installer/tests/air_sim/test_tui_launch.py @@ -0,0 +1,194 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Textual tests for the AIR simulation launch screen.""" + +from __future__ import annotations + +import pytest +from textual.widgets import Tabs + +from nv_config_manager_installer.air_sim.constants import NVCM_BOX_PASSWORD +from nv_config_manager_installer.air_sim.sim_config import SimConfig +from nv_config_manager_installer.tui.air_sim.app import NVCMAirSimApp +from nv_config_manager_installer.tui.air_sim.screens.launch import ( + LaunchScreen, + _FollowLog, + _LogViewerWidget, +) + +PUBLIC_AIR_WORKER = "eb515e50.workers.ngc.air.nvidia.com" + + +class ClipboardAirSimApp(NVCMAirSimApp): + """Test app that records clipboard writes.""" + + copied_text: str | None = None + + def copy_to_clipboard(self, text: str) -> None: + self.copied_text = text + + +@pytest.mark.asyncio +async def test_ssh_copy_button_and_command_bar_copy_command() -> None: + app = ClipboardAirSimApp(config=SimConfig(ngc_api_key="nvapi-test")) + command = f"sshpass -p {NVCM_BOX_PASSWORD} ssh -p 17117 nvcm@example.air" + + async with app.run_test(size=(180, 70)) as pilot: + app.switch_section("launch") + await pilot.pause(0.1) + + launch = app.query_one("#screen-launch", LaunchScreen) + launch._show_ssh_command(command) + await pilot.pause(0.1) + + await pilot.click("#copy-ssh") + await pilot.pause(0.1) + assert app.copied_text == command + + app.copied_text = None + await pilot.click("#ssh-cmd") + await pilot.pause(0.1) + assert app.copied_text == command + + +@pytest.mark.asyncio +async def test_access_panel_copy_button_and_panel_body_copy_command() -> None: + app = ClipboardAirSimApp(config=SimConfig(ngc_api_key="nvapi-test")) + + async with app.run_test(size=(180, 70)) as pilot: + app.switch_section("launch") + await pilot.pause(0.1) + + launch = app.query_one("#screen-launch", LaunchScreen) + launch._show_proxy_panel(PUBLIC_AIR_WORKER, 17117) + await pilot.pause(0.1) + + await pilot.click("#copy-ssh-unix") + await pilot.pause(0.1) + assert app.copied_text is not None + assert f"sshpass -p {NVCM_BOX_PASSWORD}" in app.copied_text + assert PUBLIC_AIR_WORKER in app.copied_text + + app.copied_text = None + await pilot.click("#cmd-ssh-unix") + await pilot.pause(0.1) + assert app.copied_text is not None + assert f"sshpass -p {NVCM_BOX_PASSWORD}" in app.copied_text + assert PUBLIC_AIR_WORKER in app.copied_text + + +@pytest.mark.asyncio +async def test_log_tabs_keep_independent_log_widgets() -> None: + app = ClipboardAirSimApp(config=SimConfig(ngc_api_key="nvapi-test")) + + async with app.run_test(size=(180, 70)) as pilot: + app.switch_section("launch") + await pilot.pause(0.1) + + launch = app.query_one("#screen-launch", LaunchScreen) + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + viewer.append_line("deploy line 1", "deploy") + viewer.append_line("deploy line 2", "deploy") + viewer.add_tab("dhcp", "DHCP") + viewer.append_line("dhcp line 1", "dhcp") + await pilot.pause(0.1) + + await pilot.click("#log-tab-dhcp") + await pilot.pause(0.1) + + deploy_log = viewer._logs["deploy"] + dhcp_log = viewer._logs["dhcp"] + assert deploy_log is not dhcp_log + assert deploy_log.display is False + assert dhcp_log.display is True + assert dhcp_log.line_count == 1 + + await pilot.click("#log-tab-deploy") + await pilot.pause(0.1) + + assert deploy_log.display is True + assert dhcp_log.display is False + assert deploy_log.line_count == 2 + + +@pytest.mark.asyncio +async def test_deploy_log_scrollback_pauses_following() -> None: + app = ClipboardAirSimApp(config=SimConfig(ngc_api_key="nvapi-test")) + + async with app.run_test(size=(180, 70)) as pilot: + app.switch_section("launch") + await pilot.pause(0.1) + + launch = app.query_one("#screen-launch", LaunchScreen) + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + for i in range(120): + viewer.append_line(f"deploy line {i:03d} " + ("x" * 120), "deploy") + await pilot.pause(0.2) + + log = viewer.query_one("#log-output", _FollowLog) + assert log.max_scroll_y > 0 + log.scroll_end(animate=False, immediate=True, x_axis=False) + await pilot.pause(0.1) + + bottom = log.scroll_y + log.scroll_to(y=max(0, bottom - 8), animate=False, force=True, immediate=True) + await pilot.pause(0.1) + scrolled_y = log.scroll_y + assert scrolled_y < bottom + assert log.following is False + + viewer.append_line("deploy line after manual scroll", "deploy") + await pilot.pause(0.1) + + assert log.scroll_y == scrolled_y + assert log.following is False + + +@pytest.mark.asyncio +async def test_log_flood_does_not_block_tabs_or_save_key(tmp_path) -> None: + app = ClipboardAirSimApp( + config=SimConfig(ngc_api_key="nvapi-test"), + config_path=tmp_path / "air-sim.yaml", + ) + + async with app.run_test(size=(180, 70)) as pilot: + app.switch_section("launch") + await pilot.pause(0.1) + + launch = app.query_one("#screen-launch", LaunchScreen) + viewer = launch.query_one("#log-viewer", _LogViewerWidget) + viewer.add_tab("dhcp", "DHCP") + viewer.add_tab("ztp", "ZTP") + ssh_command = f"sshpass -p {NVCM_BOX_PASSWORD} ssh -p 17117 nvcm@{PUBLIC_AIR_WORKER}" + launch._show_ssh_command(ssh_command) + launch._show_proxy_panel(PUBLIC_AIR_WORKER, 17117) + await pilot.pause(0.1) + + await pilot.click("#log-tab-ztp") + await pilot.pause(0.1) + assert viewer._active_tab == "ztp" + + for i in range(2000): + launch.enqueue_log_line(f"ztp line {i:04d}", "ztp") + + await pilot.click("#log-tab-access") + await pilot.click("#copy-ssh") + await pilot.press("f2") + await pilot.pause(0.2) + + assert viewer._active_tab == "access" + assert app.copied_text == ssh_command + assert app.config_path.exists() + assert app.query_one("#log-tabs", Tabs).active == "log-tab-access" diff --git a/installer/tests/conftest.py b/installer/tests/conftest.py index 1fa3b97..bee3ef7 100644 --- a/installer/tests/conftest.py +++ b/installer/tests/conftest.py @@ -1,3 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from unittest.mock import patch import pytest diff --git a/installer/tests/test_cli.py b/installer/tests/test_cli.py index 684f727..11cb6e0 100644 --- a/installer/tests/test_cli.py +++ b/installer/tests/test_cli.py @@ -1,10 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.cli -- Click command validation.""" from __future__ import annotations import tempfile +import tomllib from pathlib import Path from click.testing import CliRunner @@ -72,16 +85,35 @@ def test_validate_missing_sites(self): def test_version(self): runner = CliRunner() - result = runner.invoke(main, ["--version"]) + result = runner.invoke(main, ["--version"], prog_name="nv-config-manager-installer") assert result.exit_code == 0 assert "nv-config-manager-installer" in result.output + def test_short_alias_script_is_registered(self): + pyproject = Path(__file__).resolve().parents[1] / "pyproject.toml" + data = tomllib.loads(pyproject.read_text()) + scripts = data["project"]["scripts"] + assert scripts["nvcm-installer"] == scripts["nv-config-manager-installer"] + def test_generate_values_help(self): runner = CliRunner() result = runner.invoke(main, ["generate-values", "--help"]) assert result.exit_code == 0 assert "--chart-dir" in result.output + def test_air_sim_help(self): + runner = CliRunner() + result = runner.invoke(main, ["air-sim", "--help"]) + assert result.exit_code == 0 + assert "init" in result.output + assert "deploy" in result.output + + def test_air_sim_deploy_help(self): + runner = CliRunner() + result = runner.invoke(main, ["air-sim", "deploy", "--help"]) + assert result.exit_code == 0 + assert "--config" in result.output + class TestDeployCommand: def test_deploy_missing_config(self): diff --git a/installer/tests/test_conditional_fields.py b/installer/tests/test_conditional_fields.py index 0b6455a..097fcc9 100644 --- a/installer/tests/test_conditional_fields.py +++ b/installer/tests/test_conditional_fields.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for conditional field visibility in TUI screens.""" from __future__ import annotations diff --git a/installer/tests/test_deployer.py b/installer/tests/test_deployer.py index 51e2bed..fd69978 100644 --- a/installer/tests/test_deployer.py +++ b/installer/tests/test_deployer.py @@ -1,10 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.deployer -- step sequencing, callbacks, and re-run detection.""" from __future__ import annotations import subprocess +import sys import tempfile from pathlib import Path from unittest.mock import MagicMock, patch @@ -19,7 +32,10 @@ StepStatus, _get_image_digest_tag, _hash_content_dir, + _parallel_build_limit, + _ParallelCommand, _RerunState, + _run_logged_parallel, ) from nv_config_manager_installer.schema import ( ClusterConfig, @@ -362,6 +378,152 @@ def test_custom_options(self): assert opts.kind_cluster == "test-cluster" +class TestImageBuilds: + def test_parallel_build_limit_defaults_and_env_override(self, monkeypatch): + monkeypatch.delenv("NVCM_DOCKER_BUILD_PARALLELISM", raising=False) + assert _parallel_build_limit(6) >= 1 + assert _parallel_build_limit(6) <= 4 + + monkeypatch.setenv("NVCM_DOCKER_BUILD_PARALLELISM", "2") + assert _parallel_build_limit(6) == 2 + + monkeypatch.setenv("NVCM_DOCKER_BUILD_PARALLELISM", "99") + assert _parallel_build_limit(6) == 6 + + monkeypatch.setenv("NVCM_DOCKER_BUILD_PARALLELISM", "not-an-int") + assert _parallel_build_limit(6) >= 1 + + def test_run_logged_parallel_prefixes_logs_and_reports_progress(self): + step = DeployStep("build-images", "Build local images") + callback = RecordingCallback() + commands = [ + _ParallelCommand( + "one", + [ + sys.executable, + "-c", + ( + "import time; " + "print('one start', flush=True); " + "time.sleep(0.4); " + "print('one done', flush=True)" + ), + ], + timeout=5, + ), + _ParallelCommand( + "two", + [ + sys.executable, + "-c", + ( + "import time; " + "print('two start', flush=True); " + "time.sleep(0.4); " + "print('two done', flush=True)" + ), + ], + timeout=5, + ), + ] + + _run_logged_parallel(commands, step, callback, max_parallel=2, progress_interval=0.05) + + one_start = callback.logs.index("[one] one start") + two_start = callback.logs.index("[two] two start") + one_done = callback.logs.index("[one] one done") + + assert one_start < one_done + assert two_start < one_done + assert any( + "[one] running" in line and "latest: one start" in line for line in callback.logs + ) + assert any(line.startswith("[one] completed in ") for line in callback.logs) + assert any(line.startswith("[two] completed in ") for line in callback.logs) + + def test_build_images_runs_parallel_builds_and_tags(self, monkeypatch): + parallel_calls: list[tuple[list[_ParallelCommand], int]] = [] + run_commands: list[list[str]] = [] + + def fake_run_logged_parallel(commands, step, callback, *, max_parallel, **kwargs): + parallel_calls.append((commands, max_parallel)) + for command in commands: + callback.on_log(f"[{command.label}] completed in 0s") + + def fake_digest(image: str) -> str: + return f"sha-{image.removeprefix('nv-config-manager-')[:8]}" + + def fake_run(cmd, **kwargs): + run_commands.append(cmd) + return MagicMock(returncode=0, stdout="", stderr="") + + monkeypatch.setenv("NVCM_DOCKER_BUILD_PARALLELISM", "2") + monkeypatch.delenv("BUILDX_BUILDER", raising=False) + monkeypatch.setattr( + "nv_config_manager_installer.deployer._run_logged_parallel", + fake_run_logged_parallel, + ) + monkeypatch.setattr( + "nv_config_manager_installer.deployer._get_image_digest_tag", + fake_digest, + ) + monkeypatch.setattr("nv_config_manager_installer.deployer._run", fake_run) + + deployer = Deployer( + _make_config(), + DeployOptions(build_images=True), + RecordingCallback(), + ) + deployer._build_images() + + commands, max_parallel = parallel_calls[0] + assert max_parallel == 2 + assert len(commands) == 6 + assert all( + command.cmd[:4] == ["docker", "build", "--provenance=false", "--progress=plain"] + for command in commands + ) + assert all("--load" not in command.cmd for command in commands) + assert all(command.timeout == 900 for command in commands) + assert all(command.env and command.env["DOCKER_BUILDKIT"] == "1" for command in commands) + assert len(run_commands) == 6 + assert all(cmd[:2] == ["docker", "tag"] for cmd in run_commands) + assert deployer._local_image_tags["nv-config-manager-ui"].startswith("sha-") + + def test_build_images_loads_buildx_container_outputs(self, monkeypatch): + parallel_calls: list[list[_ParallelCommand]] = [] + + def fake_run_logged_parallel(commands, step, callback, *, max_parallel, **kwargs): + parallel_calls.append(commands) + for command in commands: + callback.on_log(f"[{command.label}] completed in 0s") + + monkeypatch.setenv("BUILDX_BUILDER", "ci-builder") + monkeypatch.setattr( + "nv_config_manager_installer.deployer._run_logged_parallel", + fake_run_logged_parallel, + ) + monkeypatch.setattr( + "nv_config_manager_installer.deployer._get_image_digest_tag", + lambda image: "", + ) + + deployer = Deployer( + _make_config(), + DeployOptions(build_images=True), + RecordingCallback(), + ) + deployer._build_images() + + commands = parallel_calls[0] + assert len(commands) == 6 + assert all("--load" in command.cmd for command in commands) + assert all(command.cmd[:3] == ["docker", "buildx", "build"] for command in commands) + assert all( + command.env and command.env["BUILDX_BUILDER"] == "ci-builder" for command in commands + ) + + class TestContentHashing: def test_deterministic_hash(self): with tempfile.TemporaryDirectory() as d: diff --git a/installer/tests/test_helm_values.py b/installer/tests/test_helm_values.py index 31b19af..78ccee2 100644 --- a/installer/tests/test_helm_values.py +++ b/installer/tests/test_helm_values.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.helm_values -- Helm values generation.""" from __future__ import annotations diff --git a/installer/tests/test_operator_versions.py b/installer/tests/test_operator_versions.py index b65cde7..5580805 100644 --- a/installer/tests/test_operator_versions.py +++ b/installer/tests/test_operator_versions.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for shared operator version manifest loading.""" from __future__ import annotations diff --git a/installer/tests/test_registry_client.py b/installer/tests/test_registry_client.py index 52d2056..10f16d5 100644 --- a/installer/tests/test_registry_client.py +++ b/installer/tests/test_registry_client.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.registry_client -- Docker V2 tag listing.""" from __future__ import annotations diff --git a/installer/tests/test_schema.py b/installer/tests/test_schema.py index b2c60e3..14ca7b3 100644 --- a/installer/tests/test_schema.py +++ b/installer/tests/test_schema.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.schema -- config validation and serialization.""" from __future__ import annotations diff --git a/installer/tests/test_secrets.py b/installer/tests/test_secrets.py index e0a1c42..157abce 100644 --- a/installer/tests/test_secrets.py +++ b/installer/tests/test_secrets.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for nv_config_manager_installer.secrets -- secret generation and ESO config building.""" from __future__ import annotations @@ -221,7 +233,7 @@ def test_all_default_path_groups(self): assert "keys" in paths[group] # Optional groups disabled by default - for group in ("slack", "air", "jira", "cnpgBackup"): + for group in ("slack", "jira", "cnpgBackup"): assert group not in paths, f"{group} should be disabled by default" def test_custom_path_preserves_default_keys(self): diff --git a/installer/tests/test_template_scanner.py b/installer/tests/test_template_scanner.py index 964f463..1518e6f 100644 --- a/installer/tests/test_template_scanner.py +++ b/installer/tests/test_template_scanner.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Tests for the template_scanner module.""" from __future__ import annotations diff --git a/installer/tests/test_tui.py b/installer/tests/test_tui.py index a69d21d..2c5f297 100644 --- a/installer/tests/test_tui.py +++ b/installer/tests/test_tui.py @@ -1,5 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Textual pilot tests for the TUI app.""" from __future__ import annotations diff --git a/installer/uv.lock b/installer/uv.lock index 99bd256..c894b47 100644 --- a/installer/uv.lock +++ b/installer/uv.lock @@ -261,49 +261,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] -[[package]] -name = "nv-config-manager-installer" -version = "1.0.0" -source = { editable = "." } -dependencies = [ - { name = "click" }, - { name = "kubernetes" }, - { name = "pydantic" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "textual" }, - { name = "textual-fspicker" }, -] - -[package.dev-dependencies] -dev = [ - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-cov" }, - { name = "responses" }, - { name = "ruff" }, -] - -[package.metadata] -requires-dist = [ - { name = "click", specifier = ">=8.1.7" }, - { name = "kubernetes", specifier = ">=32.0.0" }, - { name = "pydantic", specifier = ">=2.10.2" }, - { name = "pyyaml", specifier = ">=6.0.2" }, - { name = "requests", specifier = ">=2.32.2" }, - { name = "textual", specifier = ">=3.1.0" }, - { name = "textual-fspicker", specifier = ">=1.0.0" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "pytest", specifier = ">=9.0.2" }, - { name = "pytest-asyncio", specifier = ">=1.3.0" }, - { name = "pytest-cov", specifier = ">=6.0.0" }, - { name = "responses", specifier = ">=0.25.0" }, - { name = "ruff", specifier = ">=0.15.6" }, -] - [[package]] name = "kubernetes" version = "35.0.0" @@ -374,6 +331,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "nv-air-sdk" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "python-dotenv" }, + { name = "requests" }, + { name = "urllib3" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/a5/8de39a9f9cb47e51cafb1956eca8a21872b5e76f7f8c50f55745987e69e0/nv_air_sdk-1.4.0-py3-none-any.whl", hash = "sha256:3b87f9d744e666fd64531c7c5b7efefd269b14a78bfd66dfa4ee19db356681cd", size = 156473, upload-time = "2026-05-26T18:05:43.23Z" }, +] + +[[package]] +name = "nv-config-manager-installer" +version = "1.0.0" +source = { editable = "." } +dependencies = [ + { name = "click" }, + { name = "kubernetes" }, + { name = "nv-air-sdk" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "textual" }, + { name = "textual-fspicker" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "responses" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.1.7" }, + { name = "kubernetes", specifier = ">=32.0.0" }, + { name = "nv-air-sdk", specifier = "==1.4.0" }, + { name = "pydantic", specifier = ">=2.10.2" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "requests", specifier = ">=2.32.2" }, + { name = "textual", specifier = ">=3.1.0" }, + { name = "textual-fspicker", specifier = ">=1.0.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=9.0.2" }, + { name = "pytest-asyncio", specifier = ">=1.3.0" }, + { name = "pytest-cov", specifier = ">=6.0.0" }, + { name = "responses", specifier = ">=0.25.0" }, + { name = "ruff", specifier = ">=0.15.6" }, +] + [[package]] name = "oauthlib" version = "3.3.1" @@ -586,6 +602,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" diff --git a/pyproject.toml b/pyproject.toml index eefe835..a13572d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,8 +89,6 @@ dependencies = [ "brotli>=1.1.0", # Slack "slack-sdk>=3.35.0", - # AIR SDK (NVIDIA) - "air-sdk>=2.16.0", # Logging "python-json-logger>=2.0.7", # Markdown @@ -228,7 +226,6 @@ module = [ "py_markdown_table.*", "ruamel.*", "brotli.*", - "air_sdk.*", "netaddr.*", "pynautobot.*", "macaddress", diff --git a/scripts/add_spdx_headers.py b/scripts/add_spdx_headers.py index 5321fdb..3d13bcb 100755 --- a/scripts/add_spdx_headers.py +++ b/scripts/add_spdx_headers.py @@ -80,8 +80,8 @@ "scripts", "components/nautobot", "components/network-templates", - "development/mock_topology", - "installer/src", + "development/", + "installer/", ] JS_TS_DIRS = [ diff --git a/src/nv_config_manager/temporal/api/parameter_v1.py b/src/nv_config_manager/temporal/api/parameter_v1.py index ae703ce..9d67f70 100644 --- a/src/nv_config_manager/temporal/api/parameter_v1.py +++ b/src/nv_config_manager/temporal/api/parameter_v1.py @@ -19,7 +19,6 @@ from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel -from nv_config_manager.temporal.client.air import AirClient, Simulation from nv_config_manager.temporal.client.nautobot import NautobotClient from nv_config_manager.temporal.common.mixins.device import NetworkDeviceData, Platform from nv_config_manager.temporal.ngc.activities.diagnostics import get_available_commands @@ -394,16 +393,6 @@ async def get_diagnostics_commands( return [CommandEntry(name=name, description=desc) for name, desc in sorted(seen.items())] -@router.get( - "/simulations", - summary="Get AIR Simulations", -) -async def get_simulations() -> list[Simulation]: - """Return a list of NVIDIA Config Manager-managed AIR simulations.""" - air_client = AirClient() - return air_client.list_simulations() - - @router.get("/device/{device_id}/secrets") async def get_device_secrets(device_id: str) -> list[Secret]: """Return a list of secrets available in device config context. diff --git a/src/nv_config_manager/temporal/client/air.py b/src/nv_config_manager/temporal/client/air.py deleted file mode 100644 index 663827e..0000000 --- a/src/nv_config_manager/temporal/client/air.py +++ /dev/null @@ -1,318 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""AIR SDK Client wrapper.""" - -from __future__ import annotations - -import socket -import time -from typing import Any - -import requests -from air_sdk import AirApi as AirApiV1 -from air_sdk.v2 import AirApi -from pydantic import BaseModel - -from nv_config_manager.common.config import load_config -from nv_config_manager.common.log import LogCategory, get_logger - -logger = get_logger(__name__, category=LogCategory.TEMPORAL_ACTIVITY) - - -class AirDevice(BaseModel): - """Model representing an AIR device in a simulation. - - Attributes: - id: Unique identifier for the device - name: Name of the device - worker_ip: IP address of the device's worker - api_port: Port number for the device's API - """ - - id: str - name: str - worker_ip: str - api_port: int - - -class Simulation(BaseModel): - """Model representing an AIR simulation.""" - - id: str - name: str - state: str - - -class AirClient: - """Client wrapper for AIR SDK interactions.""" - - def __init__(self) -> None: - """Initialize the AIR client with authenticated API clients.""" - self.cfg = load_config() - self.v1_client, self.v2_client = self._get_clients() - - def _resolve_hostname(self, hostname: str) -> str: - """Resolve a hostname to an IP address if it's not already an IP. - - Args: - hostname: The hostname or IP address to resolve - - Returns: - The resolved IP address - - Raises: - socket.gaierror: If the hostname cannot be resolved - """ - try: - # Check if it's already an IP address - socket.inet_aton(hostname) - return hostname - except OSError: - # If not an IP, resolve the hostname - return socket.gethostbyname(hostname) - - def _get_clients(self) -> tuple[AirApiV1, AirApi]: - """Get AIR API clients for both v1 and v2 APIs. - - Returns: - A tuple containing the v1 and v2 AIR API clients - - Raises: - requests.exceptions.RequestException: If authentication fails - """ - ssa_client_id = self.cfg["temporal.air"]["ssa_client_id"] - ssa_client_secret = self.cfg["temporal.air"]["ssa_client_secret"] - url = "https://tkpfg13ml3wy1hpcurczo5m2f0qxoxhifu4h7erevvo.ssa.nvidia.com/token" - headers = {"Content-Type": "application/x-www-form-urlencoded"} - data = {"grant_type": "client_credentials", "scope": "api-access"} - response = requests.post( - url, auth=(ssa_client_id, ssa_client_secret), headers=headers, data=data - ) - response.raise_for_status() - token = response.json()["access_token"] - v1_client = AirApiV1( - api_url=self.cfg["temporal.air"]["air_api_url"], - username=ssa_client_id, - bearer_token=token, - ) - v2_client = AirApi( - api_url=self.cfg["temporal.air"]["air_api_url"], - username=ssa_client_id, - bearer_token=token, - ) - return v1_client, v2_client - - def create_simulation(self, simulation_name: str, topology: dict[str, Any]) -> str: - """Create a new AIR simulation with the specified topology. - - Args: - simulation_name: Name of the simulation - topology: Topology configuration - - Returns: - The simulation ID - - Raises: - air_sdk.exceptions.AirApiError: If simulation creation fails - """ - org_id = self.cfg["temporal.air"]["org_id"] - simulation = self.v2_client.simulations.create_from( - simulation_name, - "JSON", - topology, - org_id, - ) - return simulation.id # type: ignore[no-any-return] - - def prepare_simulation_nodes(self, simulation_id: str) -> None: - """Prepare simulation nodes by creating eth0 interfaces and setting up NVIDIA Config Manager accounts. - - This method: - 1. Creates eth0 outbound interfaces for all nodes that don't have one - 2. Creates NVIDIA Config Manager accounts with default passwords for all nodes - 3. Resets all nodes to apply the changes - - Args: - simulation_id: ID of the simulation to prepare - - Raises: - air_sdk.exceptions.AirApiError: If node preparation fails - """ - # Create Eth0 Outbound interfaces for all nodes - simulation = self.v2_client.simulations.get(simulation_id) - for node in self.v2_client.nodes.list(simulation=simulation): - has_eth0 = False - for iface in self.v2_client.interfaces.list(node=node): - if iface.name == "eth0": - has_eth0 = True - break - if not has_eth0: - logger.info(f"Creating eth0 interface for {node.name}") - self.v2_client.interfaces.create( - name="eth0", - node=node, - interface_type="OOB_INTF", - link_up=True, - outbound=True, - ) - - # Create NVIDIA Config Manager Account for all nodes - default_username = self.cfg["temporal.air"]["air_node_user"] - default_password = self.cfg["temporal.air"]["air_node_password"] - all_nodes = self.v1_client.simulation_nodes.list(simulation=simulation_id) - for node in all_nodes: - node.create_instructions( - data=f"nv set system aaa user {default_username} password {default_password}", - executor="shell", - ) - node.create_instructions( - data=f"nv set system aaa user {default_username} role system-admin", - executor="shell", - ) - node.create_instructions(data="nv config apply -y", executor="shell") - node.control(action="reset") - - def start_simulation(self, simulation_id: str) -> None: - """Start a simulation and wait for it to be fully loaded. - - Args: - simulation_id: ID of the simulation to start - - Raises: - air_sdk.exceptions.AirApiError: If simulation start fails - """ - simulation = self.v2_client.simulations.get(simulation_id) - if simulation.state not in ["LOADING", "LOADED"]: - self.v1_client.simulation.control(simulation_id, "load") - - while simulation.state != "LOADED": - time.sleep(5) - simulation = self.v2_client.simulations.get(simulation_id) - - def create_simulation_node_services(self, simulation_id: str) -> list[AirDevice]: - """Create HTTPS services for all nodes in a simulation. - - This method: - 1. Creates HTTPS services on eth0 interfaces for all nodes - 2. Resolves hostnames to IP addresses - 3. Returns a list of configured devices with their connection details - - Args: - simulation_id: ID of the simulation - - Returns: - List of configured devices with their connection details - - Raises: - air_sdk.exceptions.AirApiError: If service creation fails - socket.gaierror: If hostname resolution fails - """ - service_map = {} - devices = [] - # Check if any services already exist from previous attempts - existing_services = self.v1_client.services.list(simulation=simulation_id) - for service in existing_services: - service_map[service.interface.id] = service - - for node in self.v2_client.nodes.list(simulation=simulation_id): - for iface in self.v2_client.interfaces.list(node=node): - if iface.name == "eth0": - service = service_map.get(iface.id) - if service is None: - service_name = f"{node.name} HTTPS" - dest_port = 8765 - service = self.v2_client.services.create( - name=service_name, - interface=iface, - dest_port=dest_port, - service_type="https", - ) - # Resolve the hostname to IP if needed - if not service.host: - raise ValueError(f"Service {service_name} has no host") - worker_ip = self._resolve_hostname(service.host) - devices.append( - AirDevice( - id=node.id, - name=node.name, - worker_ip=worker_ip, - api_port=service.src_port, - ) - ) - return devices - - def wait_for_simulation_node(self, node: AirDevice) -> None: - """Wait for a simulation node to be ready and accessible. - - This method: - 1. Waits for the node to be in RUNNING state - 2. Attempts to connect to the node's API - 3. Rebuilds the node if connection fails - - Args: - node: The node to wait for - - Raises: - requests.exceptions.RequestException: If API connection fails - air_sdk.exceptions.AirApiError: If node rebuild fails - """ - default_username = self.cfg["temporal.air"]["air_node_user"] - default_password = self.cfg["temporal.air"]["air_node_password"] - session = requests.Session() - session.auth = (default_username, default_password) - session.verify = False - - while True: - air_node = self.v2_client.nodes.get(node.id) - while air_node.state != "RUNNING": - logger.info(f"Node {node.name} is not yet running, waiting for 5 seconds") - time.sleep(5) - air_node.refresh() - # Attempt NVUE Query after node has time to build/rebuild - time.sleep(30) - try: - rsp = session.get( - f"https://{node.worker_ip}:{node.api_port}/nvue_v1/system/api", - timeout=30, - ) - rsp.raise_for_status() - return - except Exception as e: - logger.warning(f"Error querying {node.name}, rebuilding: {e}") - # v1_node = self.v1_client.simulation_nodes.get(simulation_node_id=node.id) - # v1_node.control(action="rebuild") - - def delete_simulation(self, simulation_id: str) -> None: - """Delete an AIR simulation. - - Args: - simulation_id: ID of the simulation to delete - - Raises: - air_sdk.exceptions.AirApiError: If simulation deletion fails - """ - self.v2_client.simulations.delete(simulation_id) - - def list_simulations(self) -> list[Simulation]: - """List all simulations managed by NVIDIA Config Manager.""" - simulations = self.v2_client.simulations.list() - return [ - Simulation( - id=sim.id, - name=sim.title, - state=sim.state, - ) - for sim in simulations - ] diff --git a/src/nv_config_manager/temporal/ngc/activities/__init__.py b/src/nv_config_manager/temporal/ngc/activities/__init__.py index b8a814b..eafa366 100644 --- a/src/nv_config_manager/temporal/ngc/activities/__init__.py +++ b/src/nv_config_manager/temporal/ngc/activities/__init__.py @@ -14,17 +14,6 @@ # limitations under the License. """NGC Network Activities.""" -from nv_config_manager.temporal.ngc.activities.air import ( - create_simulation, - create_simulation_node_services, - delete_simulation, - generate_air_topology_for_location, - generate_minimal_topology_for_site, - prepare_simulation_nodes, - start_simulation, - validate_configuration_against_air_device, - wait_for_simulation_node, -) from nv_config_manager.temporal.ngc.activities.backup import ( load_running_configuration, persist_config_backup, @@ -220,15 +209,6 @@ validate_rendered_password_change, check_recorded_config_drift, wait_reboot, - generate_air_topology_for_location, - create_simulation, - prepare_simulation_nodes, - start_simulation, - create_simulation_node_services, - wait_for_simulation_node, - delete_simulation, - generate_minimal_topology_for_site, - validate_configuration_against_air_device, get_mlnx_os_version, download_mlnx_os, install_mlnx_os, diff --git a/src/nv_config_manager/temporal/ngc/activities/air.py b/src/nv_config_manager/temporal/ngc/activities/air.py deleted file mode 100644 index 36b5c58..0000000 --- a/src/nv_config_manager/temporal/ngc/activities/air.py +++ /dev/null @@ -1,459 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""AIR Simulation Activities.""" - -from __future__ import annotations - -import hashlib -from io import StringIO -from typing import Any - -from pydantic import BaseModel -from ruamel.yaml import YAML -from ruamel.yaml.error import YAMLError -from temporalio import activity - -from nv_config_manager.common.log import LogCategory, get_logger -from nv_config_manager.temporal.client.air import AirClient, AirDevice -from nv_config_manager.temporal.client.device import ConfigSyntaxException, CumulusConnection -from nv_config_manager.temporal.client.nautobot import NautobotClient - -logger = get_logger(__name__, category=LogCategory.TEMPORAL_ACTIVITY) - - -class CreateSimulationInput(BaseModel): - """Input for create_simulation activity.""" - - simulation_name: str - topology: dict[str, Any] - - -class CreateSimulationOutput(BaseModel): - """Output for create_simulation activity.""" - - simulation_id: str - - -class PrepareSimulationNodesInput(BaseModel): - """Input for prepare_simulation_nodes activity.""" - - simulation_id: str - - -class StartSimulationInput(BaseModel): - """Input for start_simulation activity.""" - - simulation_id: str - - -class CreateSimulationNodeServicesInput(BaseModel): - """Input for create_simulation_node_services activity.""" - - simulation_id: str - - -class CreateSimulationNodeServicesOutput(BaseModel): - """Output for create_simulation_node_services activity.""" - - devices: list[AirDevice] - - -class WaitForSimulationNodeInput(BaseModel): - """Input for wait_for_simulation_node activity.""" - - node: AirDevice - - -class DeleteSimulationInput(BaseModel): - """Input for delete_simulation activity.""" - - simulation_id: str - - -@activity.defn -def create_simulation( - input: CreateSimulationInput, -) -> CreateSimulationOutput: - """Create a new AIR simulation with the specified topology. - - Args: - input: CreateSimulationInput containing simulation name and topology - - Returns: - CreateSimulationOutput containing the simulation ID - - Raises: - air_sdk.exceptions.AirApiError: If simulation creation fails - """ - client = AirClient() - simulation_id = client.create_simulation(input.simulation_name, input.topology) - return CreateSimulationOutput(simulation_id=simulation_id) - - -@activity.defn -def prepare_simulation_nodes(input: PrepareSimulationNodesInput) -> None: - """Prepare simulation nodes by creating eth0 interfaces and setting up NVIDIA Config Manager accounts. - - This activity: - 1. Creates eth0 outbound interfaces for all nodes that don't have one - 2. Creates NVIDIA Config Manager accounts with default passwords for all nodes - 3. Resets all nodes to apply the changes - - Args: - input: PrepareSimulationNodesInput containing the simulation ID - - Raises: - air_sdk.exceptions.AirApiError: If node preparation fails - """ - client = AirClient() - client.prepare_simulation_nodes(input.simulation_id) - - -@activity.defn -def start_simulation(input: StartSimulationInput) -> None: - """Start a simulation and wait for it to be fully loaded. - - Args: - input: StartSimulationInput containing the simulation ID - - Raises: - air_sdk.exceptions.AirApiError: If simulation start fails - """ - client = AirClient() - client.start_simulation(input.simulation_id) - - -@activity.defn -def create_simulation_node_services( - input: CreateSimulationNodeServicesInput, -) -> CreateSimulationNodeServicesOutput: - """Create HTTPS services for all nodes in a simulation. - - This activity: - 1. Creates HTTPS services on eth0 interfaces for all nodes - 2. Resolves hostnames to IP addresses - 3. Returns a list of configured devices with their connection details - - Args: - input: CreateSimulationNodeServicesInput containing the simulation ID - - Returns: - CreateSimulationNodeServicesOutput containing the list of configured devices - - Raises: - air_sdk.exceptions.AirApiError: If service creation fails - socket.gaierror: If hostname resolution fails - """ - client = AirClient() - devices = client.create_simulation_node_services(input.simulation_id) - return CreateSimulationNodeServicesOutput(devices=devices) - - -@activity.defn -def wait_for_simulation_node(input: WaitForSimulationNodeInput) -> None: - """Wait for a simulation node to be ready and accessible. - - This activity: - 1. Waits for the node to be in RUNNING state - 2. Attempts to connect to the node's API - 3. Rebuilds the node if connection fails - - Args: - input: WaitForSimulationNodeInput containing the node to wait for - - Raises: - requests.exceptions.RequestException: If API connection fails - air_sdk.exceptions.AirApiError: If node rebuild fails - """ - client = AirClient() - client.wait_for_simulation_node(input.node) - - -@activity.defn -def delete_simulation(input: DeleteSimulationInput) -> None: - """Delete an AIR simulation. - - Args: - input: DeleteSimulationInput containing the simulation ID - - Raises: - air_sdk.exceptions.AirApiError: If simulation deletion fails - """ - client = AirClient() - client.delete_simulation(input.simulation_id) - - -class ConfigTestInput(BaseModel): - """Input for test_configuration_against_air_device activity.""" - - node: AirDevice - config: str - - -class ConfigTestOutput(BaseModel): - """Output for test_configuration_against_air_device activity.""" - - error: str | None - - -def _sanitize_config(config: str, air_user: str, air_password: str) -> str: - """Sanitize a configuration string by removing sensitive information.""" - # NVUE (cumulus/nvos) only for now - # Communication with AIR does not happen over our private VPN tunnels - # nor do we have a way to validate that the node we're sending config to - # is genuinely from AIR, therefore we should strip any production secrets - # prior to testing the configuration against the node - try: - config_obj = YAML().load(config) - # Replace any passwords or hashed passwords with dummy data - dummy_password = "DuMMyP4SSW0RD!" - dummy_hash = hashlib.sha512(dummy_password.encode()).hexdigest() - - def _replace_passwords(obj: dict[str, Any] | list[Any]) -> None: - if isinstance(obj, dict): - for key, value in obj.items(): - if key in ["password", "hashed-password", "secret"]: - obj[key] = dummy_hash if key == "hashed-password" else dummy_password - elif isinstance(value, dict | list): - _replace_passwords(value) - elif isinstance(obj, list): - for item in obj: - if isinstance(item, dict | list): - _replace_passwords(item) - - _replace_passwords(config_obj) - - # Blow away any local user accounts - # and replace with air user - if config_obj[0]["set"]["system"]["aaa"]["user"]: - config_obj[0]["set"]["system"]["aaa"]["user"] = { - air_user: { - "password": air_password, - "role": "system-admin", - } - } - - yaml_config_stream = StringIO() - YAML().dump(config_obj, yaml_config_stream) - return yaml_config_stream.getvalue() - except (KeyError, IndexError, YAMLError) as exc: - raise ConfigSyntaxException("Invalid yaml loaded from the Config Store.") from exc - - -@activity.defn -def validate_configuration_against_air_device( - input: ConfigTestInput, -) -> ConfigTestOutput: - """Test a configuration against an AIR device.""" - # For now we're only supporting Cumulus - client = AirClient() - air_user = client.cfg["temporal.air"]["air_node_user"] - air_password = client.cfg["temporal.air"]["air_node_password"] - connection = CumulusConnection( - input.node.worker_ip, input.node.api_port, air_user, air_password - ) - sanitized_config = _sanitize_config(input.config, air_user, air_password) - error = None - try: - connection.perform_candidate_diff(sanitized_config) - except Exception as e: - error = str(e) - return ConfigTestOutput(error=error) - - -@activity.defn -def generate_air_topology_for_location(location_id: str) -> dict[str, Any]: - """Generate AIR topology for a location.""" - # TODO: GraphQL query to get requisite data - return { - "oob": False, - "nodes": { - "aggleaf1-gp1-smn1-sitea": { - "memory": 4096, - "os": "cumulus-vx-5.11.0", - "cpu": 2, - }, - "core1-cp1-smn1-sitea": { - "memory": 4096, - "os": "cumulus-vx-5.11.0", - "cpu": 2, - }, - "leaf1-cp1-smn1-sitea": { - "memory": 4096, - "os": "cumulus-vx-5.11.0", - "cpu": 2, - }, - "spine1-cp1-smn1-sitea": { - "memory": 4096, - "os": "cumulus-vx-5.11.0", - "cpu": 2, - }, - }, - "links": [ - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp1"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp2"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp3"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp4"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp5"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp6"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp7"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp8"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp9"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp10"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp11"}, "unconnected"], - [{"node": "aggleaf1-gp1-smn1-sitea", "interface": "swp12"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp1"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp2"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp3"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp4"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp5"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp6"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp7"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp8"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp9"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp10"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp11"}, "unconnected"], - [{"node": "core1-cp1-smn1-sitea", "interface": "swp12"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp1"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp2"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp3"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp4"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp5"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp6"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp7"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp8"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp9"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp10"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp11"}, "unconnected"], - [{"node": "leaf1-cp1-smn1-sitea", "interface": "swp12"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp1"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp2"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp3"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp4"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp5"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp6"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp7"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp8"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp9"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp10"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp11"}, "unconnected"], - [{"node": "spine1-cp1-smn1-sitea", "interface": "swp12"}, "unconnected"], - ], - } - - -class MinimalTopologyInput(BaseModel): - """Minimal Topology Input.""" - - site_name: str - - -class MinimalTopologyOutput(BaseModel): - topology: dict[str, Any] - node_map: dict[str, str] - - -@activity.defn -async def generate_minimal_topology_for_site( - activity_input: MinimalTopologyInput, -) -> MinimalTopologyOutput: - """Generate minimal topology for a site.""" - managed_devices = [] - query = """ -query($location: [String]!) { - devices(nv_config_manager_device_status: true, platform: "Cumulus Linux", location: $location) { - id - name - config_context - role { - name - } - device_type { - model - } - interfaces { - name - type - } - configmanagerdevicestatus { - intended_config { - path - } - } - } -} -""" - client = NautobotClient() - async with client: - response = await client.graphql_query( - query, {"location": [activity_input.site_name]}, timeout=30 - ) - for device in response["data"]["devices"]: - if not device["configmanagerdevicestatus"]["intended_config"]: - continue - # Virtual interfaces are not needed for the topology - # We want eth0 to be created as part of the simulation bringup - # to set it up properly as the outbound interface for landing - # the API service - physical_interfaces = { - interface["name"] - for interface in device["interfaces"] - if interface["type"] != "VIRTUAL" and interface["name"] != "eth0" - } - managed_devices.append( - { - "id": device["id"], - "name": device["name"], - "role": device["role"]["name"], - "image": device["config_context"]["intended-firmware"]["version"], - "model": device["device_type"]["model"], - "interfaces": physical_interfaces, - } - ) - - # Group devices to smallest set of AIR nodes - device_groups = {} - node_map = {} - for device in managed_devices: - key = "-".join((device["model"], device["role"], device["image"])).replace(".", "-") - if key not in device_groups: - device_groups[key] = { - "interfaces": set(), - "image": device["image"], - "devices": [], - } - device_groups[key]["interfaces"] |= device["interfaces"] - device_groups[key]["devices"].append(device) - node_map[device["id"]] = key - topology: dict[str, Any] = {"oob": False, "nodes": {}, "links": []} - - # Create a node for each device group - for group_key, group_data in device_groups.items(): - # Create a representative node name from the group key - node_name = group_key - - # Add node with specified resources - mem = 4 * 1024 - topology["nodes"][node_name] = { - "memory": mem, - "cpu": 4, - "os": f"cumulus-vx-{group_data['image']}", - } - - # Add unconnected interfaces for each interface in the group - for interface in group_data["interfaces"]: - topology["links"].append([{"node": node_name, "interface": interface}, "unconnected"]) - - return MinimalTopologyOutput(topology=topology, node_map=node_map) diff --git a/src/nv_config_manager/temporal/ngc/workflows/__init__.py b/src/nv_config_manager/temporal/ngc/workflows/__init__.py index dbb57b0..cfe584e 100644 --- a/src/nv_config_manager/temporal/ngc/workflows/__init__.py +++ b/src/nv_config_manager/temporal/ngc/workflows/__init__.py @@ -14,12 +14,6 @@ # limitations under the License. """NGC Workflow Definitions.""" -from nv_config_manager.temporal.ngc.workflows.air import ( - AIRCreateBlueprintSimulationWorkflow, - AIRCreateSimulationWorkflow, - AIRDeleteSimulationWorkflow, - AIRValidateSiteWorkflow, -) from nv_config_manager.temporal.ngc.workflows.backup import BackupWorkflow from nv_config_manager.temporal.ngc.workflows.bmc import RedfishProvisioningWorkflow from nv_config_manager.temporal.ngc.workflows.cable_validation import ( @@ -94,10 +88,6 @@ SwitchOSUpgradeWorkflow, ReprovisionWorkflow, InfinibandCableValidationWorkflow, - AIRCreateBlueprintSimulationWorkflow, - AIRCreateSimulationWorkflow, - AIRDeleteSimulationWorkflow, - AIRValidateSiteWorkflow, InfinibandMlnxOSUpgradeWorkflow, ValidateHardwareWorkflow, DevicePasswordRotationWorkflow, diff --git a/src/nv_config_manager/temporal/ngc/workflows/air.py b/src/nv_config_manager/temporal/ngc/workflows/air.py deleted file mode 100644 index e39e488..0000000 --- a/src/nv_config_manager/temporal/ngc/workflows/air.py +++ /dev/null @@ -1,668 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""AIR Simulation Workflow Definitions.""" - -import asyncio -import urllib.parse -from datetime import timedelta -from typing import Any - -from py_markdown_table.markdown_table import markdown_table -from pydantic import BaseModel -from temporalio import workflow -from temporalio.common import RetryPolicy -from temporalio.exceptions import ApplicationError - -from nv_config_manager.temporal.common.decorators.workflow import run_nv_config_manager_workflow -from nv_config_manager.temporal.common.mixins.archive import ArchiveMixin -from nv_config_manager.temporal.common.mixins.metadata import WorkflowMetadataMixin -from nv_config_manager.temporal.common.mixins.stage import ( - StageInput, - StageMixin, - StageOutput, - stage_executor, -) - -with workflow.unsafe.imports_passed_through(): - from nv_config_manager.temporal.ngc.activities.air import ( - AirDevice, - ConfigTestInput, - CreateSimulationInput, - CreateSimulationNodeServicesInput, - DeleteSimulationInput, - MinimalTopologyInput, - PrepareSimulationNodesInput, - StartSimulationInput, - WaitForSimulationNodeInput, - create_simulation, - create_simulation_node_services, - delete_simulation, - generate_air_topology_for_location, - generate_minimal_topology_for_site, - prepare_simulation_nodes, - start_simulation, - validate_configuration_against_air_device, - wait_for_simulation_node, - ) - from nv_config_manager.temporal.ngc.activities.deploy import ( - load_intended_configuration, - ) - from nv_config_manager.temporal.ngc.activities.nautobot import ( - GetNetworkDevicesInput, - get_network_devices, - ) - -DEFAULT_ACTIVITY_RETRY_POLICY = RetryPolicy(maximum_attempts=3) -DEFAULT_ACTIVITY_TIMEOUT = timedelta(minutes=5) - - -async def _setup_simulation(name: str, topology: dict[str, Any]) -> tuple[str, list[AirDevice]]: - """Create a new AIR simulation.""" - create_sim_result = await workflow.execute_activity( - create_simulation, - CreateSimulationInput( - simulation_name=name, - topology=topology, - ), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - # Prepare simulation nodes - await workflow.execute_activity( - prepare_simulation_nodes, - PrepareSimulationNodesInput(simulation_id=create_sim_result.simulation_id), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - # Start the simulation - await workflow.execute_activity( - start_simulation, - StartSimulationInput(simulation_id=create_sim_result.simulation_id), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - # Create node services and get device info - services_result = await workflow.execute_activity( - create_simulation_node_services, - CreateSimulationNodeServicesInput(simulation_id=create_sim_result.simulation_id), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - # Wait for all nodes to be ready in parallel - await asyncio.gather( - *[ - workflow.execute_activity( - wait_for_simulation_node, - WaitForSimulationNodeInput(node=device), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - for device in services_result.devices - ] - ) - - return create_sim_result.simulation_id, services_result.devices - - -class AIRCreateSimulationInput(BaseModel): - """AIR Workflow Input Definition.""" - - name: str - topology: dict[str, Any] - user: str - - -@workflow.defn -class AIRCreateSimulationWorkflow(WorkflowMetadataMixin, StageMixin, ArchiveMixin): - """AIR network simulation creation workflow for testing and validation.""" - - # Workflow metadata - workflow_description = "Create AIR network simulation from topology for configuration testing" - workflow_input_class = AIRCreateSimulationInput - workflow_api_endpoint = "/ngc/air_create_simulation" - workflow_namespace = "ngc" - - def __init__(self) -> None: - """Initialize workflow.""" - StageMixin.__init__(self) - self.define_stage( - name="setup_simulation", - description="Create and populate a new AIR simulation.", - requires_approval=False, - depends_on=[], - ) - - class SetupSimulationStageInput(StageInput): - """Setup Simulation Stage Input.""" - - topology: dict[str, Any] - name: str - user: str - - class SetupSimulationStageOutput(StageOutput): - """Setup Simulation Stage Output.""" - - simulation_id: str - devices: list[AirDevice] - - @stage_executor("setup_simulation") - async def setup_simulation( - self, stage_input: SetupSimulationStageInput - ) -> SetupSimulationStageOutput: - """Create and populate a new AIR simulation.""" - simulation_name = f"{stage_input.name} ({stage_input.user})" - simulation_id, devices = await _setup_simulation(simulation_name, stage_input.topology) - - return AIRCreateSimulationWorkflow.SetupSimulationStageOutput( - simulation_id=simulation_id, - devices=devices, - display=f"Created simulation with ID: {simulation_id}", - ) - - @run_nv_config_manager_workflow - async def run(self, workflow_input: AIRCreateSimulationInput) -> str: # type: ignore[override, ty:invalid-method-override] - """Execute AIR simulation workflow.""" - self.set_input(workflow_input) - - setup_output = await self.setup_simulation( - AIRCreateSimulationWorkflow.SetupSimulationStageInput( - name=workflow_input.name, - topology=workflow_input.topology, - user=workflow_input.user, - ) - ) - - await self.archive_results() - return setup_output.simulation_id - - -class AIRCreateBlueprintSimulationInput(BaseModel): - """AIR Workflow Input Definition.""" - - blueprint_name: str - user: str | None - user_domain: str | None - - -@workflow.defn -class AIRCreateBlueprintSimulationWorkflow(WorkflowMetadataMixin, StageMixin, ArchiveMixin): - """AIR blueprint simulation creation workflow for standardized network testing.""" - - # Workflow metadata - workflow_description = ( - "Create AIR simulation from blueprint template for standardized network testing" - ) - workflow_input_class = AIRCreateBlueprintSimulationInput - workflow_api_endpoint = "/ngc/air_create_blueprint_simulation" - workflow_namespace = "ngc" - - def __init__(self) -> None: - """Initialize workflow.""" - StageMixin.__init__(self) - self.define_stage( - name="create_cerebro_location", - description="Create Cerebro location from blueprint.", - requires_approval=False, - depends_on=[], - ) - self.define_stage( - name="setup_simulation", - description="Create and populate a new AIR simulation.", - requires_approval=False, - depends_on=["create_cerebro_location"], - ) - self.define_stage( - name="configure_devices", - description="Configure devices in the simulation.", - requires_approval=False, - depends_on=["setup_simulation"], - ) - - class CreateCerebroLocationStageInput(StageInput): - """Create Cerebro Location Stage Input.""" - - blueprint_name: str - - class CreateCerebroLocationStageOutput(StageOutput): - """Create Cerebro Location Stage Output.""" - - location_id: str - - @stage_executor("create_cerebro_location") - async def create_cerebro_location( - self, stage_input: CreateCerebroLocationStageInput - ) -> CreateCerebroLocationStageOutput: - """Create Cerebro location from blueprint.""" - # TODO: Implement activity call to create location - location_id = "placeholder" - return AIRCreateBlueprintSimulationWorkflow.CreateCerebroLocationStageOutput( - location_id=location_id, - display=f"Created location with ID: {location_id}", - ) - - class SetupSimulationStageInput(StageInput): - """Setup Simulation Stage Input.""" - - user: str | None - user_domain: str | None - location_id: str - - class SetupSimulationStageOutput(StageOutput): - """Setup Simulation Stage Output.""" - - simulation_id: str - - @stage_executor("setup_simulation") - async def setup_simulation( - self, stage_input: SetupSimulationStageInput - ) -> SetupSimulationStageOutput: - """Create and populate a new AIR simulation.""" - topology = await workflow.execute_activity( - generate_air_topology_for_location, - stage_input.location_id, - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - simulation_name = f"{stage_input.user}-{stage_input.location_id}" - - simulation_id, _ = await _setup_simulation(simulation_name, topology) - - return AIRCreateBlueprintSimulationWorkflow.SetupSimulationStageOutput( - simulation_id=simulation_id, - display=f"Created simulation with ID: {simulation_id}", - ) - - class ConfigureDevicesStageInput(StageInput): - """Configure Devices Stage Input.""" - - location_id: str - - class ConfigureDevicesStageOutput(StageOutput): - """Configure Devices Stage Output.""" - - configured_devices: list[str] - - @stage_executor("configure_devices") - async def configure_devices( - self, stage_input: ConfigureDevicesStageInput - ) -> ConfigureDevicesStageOutput: - """Configure devices in the simulation.""" - # TODO: Implement activity call to configure devices - configured_devices = ["device1", "device2"] - return AIRCreateBlueprintSimulationWorkflow.ConfigureDevicesStageOutput( - configured_devices=configured_devices, - display=f"Configured devices: {', '.join(configured_devices)}", - ) - - @run_nv_config_manager_workflow - async def run(self, workflow_input: AIRCreateBlueprintSimulationInput) -> list[str]: # type: ignore[override, ty:invalid-method-override] - """Execute AIR simulation workflow.""" - if not workflow_input.user: - raise ApplicationError("Missing user for workflow attribution.") - self.set_input(workflow_input) - - create_location_output = await self.create_cerebro_location( - AIRCreateBlueprintSimulationWorkflow.CreateCerebroLocationStageInput( - blueprint_name=workflow_input.blueprint_name, - ) - ) - - await self.setup_simulation( - AIRCreateBlueprintSimulationWorkflow.SetupSimulationStageInput( - user=workflow_input.user, - user_domain=workflow_input.user_domain, - location_id=create_location_output.location_id, - ) - ) - - configure_output = await self.configure_devices( - AIRCreateBlueprintSimulationWorkflow.ConfigureDevicesStageInput( - location_id=create_location_output.location_id, - ) - ) - - await self.archive_results() - - return configure_output.configured_devices - - -class AIRDeleteInput(BaseModel): - """AIR Delete Workflow Input Definition.""" - - simulation_id: str - - -@workflow.defn -class AIRDeleteSimulationWorkflow(WorkflowMetadataMixin, StageMixin, ArchiveMixin): - """AIR simulation cleanup workflow for resource management.""" - - # Workflow metadata - workflow_description = "Delete AIR simulation and clean up associated resources" - workflow_input_class = AIRDeleteInput - workflow_api_endpoint = "/ngc/air_delete" - workflow_namespace = "ngc" - - def __init__(self) -> None: - """Initialize workflow.""" - StageMixin.__init__(self) - self.define_stage( - name="delete_simulation", - description="Delete an AIR simulation.", - requires_approval=False, - depends_on=[], - ) - - class DeleteSimulationStageInput(StageInput): - """Delete Simulation Stage Input.""" - - simulation_id: str - - class DeleteSimulationStageOutput(StageOutput): - """Delete Simulation Stage Output.""" - - success: bool - - @stage_executor("delete_simulation") - async def delete_simulation( - self, stage_input: DeleteSimulationStageInput - ) -> DeleteSimulationStageOutput: - """Delete an AIR simulation.""" - await workflow.execute_activity( - delete_simulation, - DeleteSimulationInput(simulation_id=stage_input.simulation_id), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - return AIRDeleteSimulationWorkflow.DeleteSimulationStageOutput( - success=True, - display=f"Successfully deleted simulation with ID: {stage_input.simulation_id}", - ) - - @run_nv_config_manager_workflow - async def run(self, workflow_input: AIRDeleteInput) -> bool: # type: ignore[override, ty:invalid-method-override] - """Execute AIR simulation delete workflow.""" - self.set_input(workflow_input) - - delete_output = await self.delete_simulation( - AIRDeleteSimulationWorkflow.DeleteSimulationStageInput( - simulation_id=workflow_input.simulation_id, - ) - ) - - await self.archive_results() - - return delete_output.success - - -class AIRValidateSiteInput(BaseModel): - """AIR Validate Site Workflow Input Definition.""" - - site_name: str - user: str - - -@workflow.defn -class AIRValidateSiteWorkflow(WorkflowMetadataMixin, StageMixin, ArchiveMixin): - """AIR site validation workflow for network configuration testing.""" - - # Workflow metadata - workflow_description = "Validate site network configuration using AIR simulation environment" - workflow_input_class = AIRValidateSiteInput - workflow_api_endpoint = "/ngc/air_validate_site" - workflow_namespace = "ngc" - - def __init__(self) -> None: - """Initialize workflow.""" - StageMixin.__init__(self) - self.define_stage( - name="generate_minimal_topology", - description="Generate minimal topology for testing rendered configs.", - requires_approval=False, - depends_on=[], - ) - self.define_stage( - name="setup_simulation", - description="Create and populate a new AIR simulation.", - requires_approval=False, - depends_on=["generate_minimal_topology"], - ) - self.define_stage( - name="configure_devices", - description="Configure and validate devices in the simulation.", - requires_approval=False, - depends_on=["setup_simulation"], - ) - self.define_stage( - name="delete_simulation", - description="Delete the AIR simulation.", - requires_approval=False, - depends_on=["configure_devices"], - ) - - class GenerateMinimalTopologyStageInput(StageInput): - """Generate Minimal Topology Stage Input.""" - - site_name: str - - class GenerateMinimalTopologyStageOutput(StageOutput): - """Generate Minimal Topology Stage Output.""" - - topology: dict[str, Any] - node_map: dict[str, str] # Maps nv-config-manager device to AIR node - - @stage_executor("generate_minimal_topology") - async def generate_minimal_topology( - self, stage_input: GenerateMinimalTopologyStageInput - ) -> GenerateMinimalTopologyStageOutput: - """Generate minimal topology for testing rendered configs.""" - output = await workflow.execute_activity( - generate_minimal_topology_for_site, - MinimalTopologyInput(site_name=stage_input.site_name), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - display = f"Generated minimal topology for {stage_input.site_name}\n\n" - display += f"Representing {len(output.node_map)} devices with {len(output.topology['nodes'])} nodes" - return AIRValidateSiteWorkflow.GenerateMinimalTopologyStageOutput( - topology=output.topology, - node_map=output.node_map, - display=display, - ) - - class SetupSimulationStageInput(StageInput): - """Setup Simulation Stage Input.""" - - topology: dict[str, Any] - site_name: str - user: str - - class SetupSimulationStageOutput(StageOutput): - """Setup Simulation Stage Output.""" - - simulation_id: str - devices: dict[str, AirDevice] - - @stage_executor("setup_simulation") - async def setup_simulation( - self, stage_input: SetupSimulationStageInput - ) -> SetupSimulationStageOutput: - """Create and populate a new AIR simulation.""" - simulation_name = f"{stage_input.site_name} Validation ({stage_input.user})" - - simulation_id, devices = await _setup_simulation(simulation_name, stage_input.topology) - - return AIRValidateSiteWorkflow.SetupSimulationStageOutput( - simulation_id=simulation_id, - devices={device.name: device for device in devices}, - display=f"Created validation simulation with ID: {simulation_id}", - ) - - class ConfigureDevicesStageInput(StageInput): - """Configure Devices Stage Input.""" - - device_to_node_map: dict[str, str] - node_map: dict[str, AirDevice] - - class ConfigureDevicesStageOutput(StageOutput): - """Configure Devices Stage Output.""" - - failed_devices: list[dict[str, str]] # Maps device name to error message - - @stage_executor("configure_devices") - async def configure_devices( - self, stage_input: ConfigureDevicesStageInput - ) -> ConfigureDevicesStageOutput: - """Configure and validate devices in the simulation.""" - failed_devices = [] - - # This is a very long running stage, update the display to show progress - # after each device, final output will be replaced with the result - stage_progress_display = ( - f"In Progress:0/{len(stage_input.device_to_node_map)} devices configured..." - ) - stage = self.get_stage_by_name("configure_devices") - stage.output = AIRValidateSiteWorkflow.ConfigureDevicesStageOutput( - failed_devices=[], - display=stage_progress_display, - ) - - network_device_results = await workflow.execute_activity( - get_network_devices, - GetNetworkDevicesInput(device_ids=list(stage_input.device_to_node_map.keys())), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - device_map = {device.id: device for device in network_device_results.devices} - - for i, (device_id, air_node_name) in enumerate(stage_input.device_to_node_map.items()): - network_device = device_map[device_id] - - # Load intended configuration - content, commit, url = await workflow.execute_activity( - load_intended_configuration, - network_device, - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - # Diff configuration - air_node = stage_input.node_map[air_node_name] - test_result = await workflow.execute_activity( - validate_configuration_against_air_device, - ConfigTestInput(node=air_node, config=content), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - if test_result.error: - encoded_url = urllib.parse.quote(url, safe="/:") - failed_devices.append( - { - "Device": network_device.name, - "Error Message": test_result.error, - "Intended Config": f"[startup.yaml]({encoded_url})", - } - ) - stage_progress_display = ( - f"In Progress: {i + 1}/{len(stage_input.device_to_node_map)} devices configured..." - ) - stage.output = AIRValidateSiteWorkflow.ConfigureDevicesStageOutput( - failed_devices=failed_devices, - display=stage_progress_display, - ) - - if failed_devices: - display = ( - markdown_table(failed_devices) - .set_params(quote=False, row_sep="markdown") - .get_markdown() - ) - else: - display = "All devices configured successfully." - - return AIRValidateSiteWorkflow.ConfigureDevicesStageOutput( - failed_devices=failed_devices, - display=display, - ) - - class DeleteSimulationStageInput(StageInput): - """Delete Simulation Stage Input.""" - - simulation_id: str - - class DeleteSimulationStageOutput(StageOutput): - """Delete Simulation Stage Output.""" - - success: bool - - @stage_executor("delete_simulation") - async def delete_simulation( - self, stage_input: DeleteSimulationStageInput - ) -> DeleteSimulationStageOutput: - """Delete an AIR simulation.""" - await workflow.execute_activity( - delete_simulation, - DeleteSimulationInput(simulation_id=stage_input.simulation_id), - retry_policy=DEFAULT_ACTIVITY_RETRY_POLICY, - start_to_close_timeout=DEFAULT_ACTIVITY_TIMEOUT, - ) - - return AIRValidateSiteWorkflow.DeleteSimulationStageOutput( - success=True, - display=f"Successfully deleted simulation with ID: {stage_input.simulation_id}", - ) - - @run_nv_config_manager_workflow - async def run(self, workflow_input: AIRValidateSiteInput) -> list[dict[str, str]]: # type: ignore[override, ty:invalid-method-override] - """Execute AIR test rendered configs workflow.""" - self.set_input(workflow_input) - - topology_output = await self.generate_minimal_topology( - AIRValidateSiteWorkflow.GenerateMinimalTopologyStageInput( - site_name=workflow_input.site_name, - ) - ) - - setup_output = await self.setup_simulation( - AIRValidateSiteWorkflow.SetupSimulationStageInput( - topology=topology_output.topology, - site_name=workflow_input.site_name, - user=workflow_input.user, - ) - ) - - configure_output = await self.configure_devices( - AIRValidateSiteWorkflow.ConfigureDevicesStageInput( - node_map=setup_output.devices, - device_to_node_map=topology_output.node_map, - ) - ) - - await self.delete_simulation( - AIRValidateSiteWorkflow.DeleteSimulationStageInput( - simulation_id=setup_output.simulation_id, - ) - ) - - await self.archive_results() - - return configure_output.failed_devices diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 6403ce1..8db4597 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -99,14 +99,6 @@ [temporal.api] cors_origins = https://config-manager.example.com -[temporal.air] -ssa_client_id = DUMMY -ssa_client_secret = DUMMY -org_id = DUMMY -air_api_url = https://air-api.example.com/api/ -air_node_user = nv-config-manager-air-user -air_node_password = nv-config-manager-air-password - [dhcp.kea] server = localhost port = 8000 diff --git a/src/tests/development/test_mock_topology_context.py b/src/tests/development/test_mock_topology_context.py new file mode 100644 index 0000000..a4d79d5 --- /dev/null +++ b/src/tests/development/test_mock_topology_context.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Validation tests for development mock topology source data.""" + +import json +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parents[3] +MOCK_TOPOLOGY_CONTEXT = PROJECT_ROOT / "development" / "mock_topology" / "context" +MOCK_TOPOLOGY_DESIGNS = PROJECT_ROOT / "development" / "mock_topology" / "jobs" / "designs" + + +def test_cumulus_mock_devices_define_explicit_ztp_identifiers() -> None: + missing_eth0_mac = [] + missing_serial = [] + + for path in sorted(MOCK_TOPOLOGY_CONTEXT.glob("*/devices/*.json")): + with path.open() as f: + device = json.load(f).get("data", {}).get("device") or {} + + platform_name = (device.get("platform") or {}).get("name", "") + if "Cumulus" not in platform_name: + continue + + if not device.get("serial"): + missing_serial.append(f"{path.name}:{device.get('name')}") + + eth0 = next( + (interface for interface in device.get("interfaces", []) if interface.get("name") == "eth0"), + None, + ) + if not eth0 or not eth0.get("mac_address"): + missing_eth0_mac.append(f"{path.name}:{device.get('name')}") + + assert missing_serial == [] + assert missing_eth0_mac == [] + + +def test_air_trial_cumulus_serials_match_eth0_mac_addresses() -> None: + mismatches = [] + + for path in sorted((MOCK_TOPOLOGY_CONTEXT / "air_trial" / "devices").glob("*.json")): + with path.open() as f: + device = json.load(f).get("data", {}).get("device") or {} + + platform_name = (device.get("platform") or {}).get("name", "") + if "Cumulus" not in platform_name: + continue + + eth0 = next( + (interface for interface in device.get("interfaces", []) if interface.get("name") == "eth0"), + {}, + ) + if device.get("serial") != eth0.get("mac_address"): + mismatches.append(f"{path.name}:{device.get('name')}") + + assert mismatches == [] + + +def test_cumulus_mock_devices_define_intended_firmware() -> None: + missing_firmware = [] + + for path in sorted(MOCK_TOPOLOGY_CONTEXT.glob("*/devices/*.json")): + with path.open() as f: + device = json.load(f).get("data", {}).get("device") or {} + + platform_name = (device.get("platform") or {}).get("name", "") + if "Cumulus" not in platform_name: + continue + + firmware = ( + device.get("config_context", {}).get("intended-firmware", {}).get("version") + ) + if not firmware: + missing_firmware.append(f"{path.name}:{device.get('name')}") + + assert missing_firmware == [] + + +def test_mock_topology_templates_quote_string_identifiers() -> None: + interfaces_template = (MOCK_TOPOLOGY_DESIGNS / "interfaces.yaml.j2").read_text() + devices_template = (MOCK_TOPOLOGY_DESIGNS / "devices.yaml.j2").read_text() + + assert 'mac_address: "{{ intf.mac_address }}"' in interfaces_template + assert 'serial: "{{ device.serial }}"' in devices_template + assert "local_config_context_data" in devices_template + + +def test_ip_address_templates_use_topology_namespace() -> None: + ip_addresses_template = (MOCK_TOPOLOGY_DESIGNS / "ip_addresses.yaml.j2").read_text() + + assert '"!create_or_update:parent__namespace__name": {{ global_defaults.namespace }}' in ip_addresses_template + + +def test_role_design_does_not_replace_content_type_memberships() -> None: + roles_template = (MOCK_TOPOLOGY_DESIGNS / "roles.yaml.j2").read_text() + + assert "content_types:" not in roles_template + + +def test_managed_devices_template_allows_devices_without_platform() -> None: + managed_devices_template = (MOCK_TOPOLOGY_DESIGNS / "managed_devices.yaml.j2").read_text() + + assert "device.platform is defined" in managed_devices_template diff --git a/src/tests/temporal/api/test_parameter.py b/src/tests/temporal/api/test_parameter.py index 8b03a25..5616d20 100644 --- a/src/tests/temporal/api/test_parameter.py +++ b/src/tests/temporal/api/test_parameter.py @@ -12,13 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from unittest.mock import MagicMock, patch - from aioresponses import aioresponses from fastapi.testclient import TestClient from nv_config_manager.temporal.api.main import app -from nv_config_manager.temporal.client.air import Simulation V2_SITES = { "data": {"locations": [{"id": "ddadde54-cbdd-4fa5-94ce-ca649b7e2aa8", "name": "SITEA"}]} @@ -222,26 +219,3 @@ def test_status_without_content_type(): {"id": "status-uuid-2", "name": "Provisioned"}, {"id": "status-uuid-3", "name": "Decommissioned"}, ] - - -@patch("nv_config_manager.temporal.api.parameter_v1.AirClient") -def test_simulations(mock_air_client): - """Test the simulations parameter endpoint.""" - # Use actual Simulation model objects - sim1 = Simulation(id="sim-12345-abcde", name="Test Simulation 1", state="LOADED") - sim2 = Simulation(id="sim-67890-fghij", name="Test Simulation 2", state="RUNNING") - - mock_client_instance = MagicMock() - mock_air_client.return_value = mock_client_instance - mock_client_instance.list_simulations.return_value = [sim1, sim2] - - client = TestClient(app) - rsp = client.get("/v1/parameter/simulations") - - expected_response = [ - {"id": "sim-12345-abcde", "name": "Test Simulation 1", "state": "LOADED"}, - {"id": "sim-67890-fghij", "name": "Test Simulation 2", "state": "RUNNING"}, - ] - - assert rsp.json() == expected_response - mock_client_instance.list_simulations.assert_called_once() diff --git a/src/tests/temporal/ngc/activities/test_air.py b/src/tests/temporal/ngc/activities/test_air.py deleted file mode 100644 index bf398e2..0000000 --- a/src/tests/temporal/ngc/activities/test_air.py +++ /dev/null @@ -1,102 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from unittest.mock import patch - -from ruamel.yaml import YAML - -from nv_config_manager.temporal.client.air import AirDevice -from nv_config_manager.temporal.ngc.activities.air import ( - ConfigTestInput, - validate_configuration_against_air_device, -) - - -def test_test_configuration_against_air_device(): - config = """ -- set: - system: - aaa: - tacacs: - server: - '1': - host: 1.1.1.1 - secret: tacacs-secret - user: - cumulus: - hashed-password: cumulus - role: system-admin - svc-ngc-cfa-nv-config-manager: - hashed-password: svc-ngc-cfa-nv-config-manager - role: system-admin - vrf: - default: - router: - bgp: - peer-group: - PEERGROUP: - password: bgppassword -""" - - expected_sanitized_config = """ -- set: - system: - aaa: - tacacs: - server: - '1': - host: 1.1.1.1 - secret: DuMMyP4SSW0RD! - user: - nv-config-manager-air-integration: - password: nv-config-manager-air-integration-password - role: system-admin - vrf: - default: - router: - bgp: - peer-group: - PEERGROUP: - password: DuMMyP4SSW0RD! -""" - - with ( - patch("nv_config_manager.temporal.ngc.activities.air.CumulusConnection") as mock_connection, - patch("nv_config_manager.temporal.ngc.activities.air.AirClient") as mock_air_client, - ): - # Mock the AirClient configuration - mock_client_instance = mock_air_client.return_value - mock_client_instance.cfg = { - "temporal.air": { - "air_node_user": "nv-config-manager-air-integration", - "air_node_password": "nv-config-manager-air-integration-password", - } - } - - mock_connection.return_value.perform_candidate_diff.return_value = None - result = validate_configuration_against_air_device( - ConfigTestInput( - node=AirDevice(id="1", name="test", worker_ip="192.168.1.1", api_port=8000), - config=config, - ) - ) - assert result.error is None - - mock_connection.return_value.perform_candidate_diff.assert_called_once() - actual_config = mock_connection.return_value.perform_candidate_diff.call_args[0][0] - expected_obj = YAML().load(expected_sanitized_config) - actual_obj = YAML().load(actual_config) - assert actual_obj == expected_obj, ( - f"Config mismatch:\nExpected: {expected_obj}\nActual: {actual_obj}" - ) diff --git a/src/tests/temporal/ngc/workflows/test_air_workflow.py b/src/tests/temporal/ngc/workflows/test_air_workflow.py deleted file mode 100644 index e12d7c7..0000000 --- a/src/tests/temporal/ngc/workflows/test_air_workflow.py +++ /dev/null @@ -1,615 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import urllib.parse -import uuid -from concurrent.futures import ThreadPoolExecutor -from datetime import timedelta -from unittest.mock import patch - -import pytest -from temporalio import activity -from temporalio.client import WorkflowHandle -from temporalio.common import RetryPolicy -from temporalio.worker import Worker - -from nv_config_manager.temporal.common.mixins.device import NetworkDeviceData -from nv_config_manager.temporal.ngc.activities.air import ( - AirDevice, - ConfigTestInput, - ConfigTestOutput, - CreateSimulationInput, - CreateSimulationNodeServicesInput, - CreateSimulationNodeServicesOutput, - CreateSimulationOutput, - DeleteSimulationInput, - MinimalTopologyInput, - MinimalTopologyOutput, - PrepareSimulationNodesInput, - StartSimulationInput, - WaitForSimulationNodeInput, -) -from nv_config_manager.temporal.ngc.activities.nats import PublishNatsInput -from nv_config_manager.temporal.ngc.activities.nautobot import ( - GetNetworkDevicesInput, - GetNetworkDevicesOutput, -) -from nv_config_manager.temporal.ngc.workflows.air import ( - AIRCreateBlueprintSimulationInput, - AIRCreateBlueprintSimulationWorkflow, - AIRCreateSimulationInput, - AIRCreateSimulationWorkflow, - AIRDeleteInput, - AIRDeleteSimulationWorkflow, - AIRValidateSiteInput, - AIRValidateSiteWorkflow, -) - -# Test-specific retry policy and timeout -TEST_RETRY_POLICY = RetryPolicy(maximum_attempts=1) -TEST_TIMEOUT = timedelta(seconds=10) - - -@activity.defn(name="generate_air_topology_for_location") -async def mock_generate_air_topology_for_location(location_id: str) -> dict: - """Mock topology generation activity.""" - # Updated to a more realistic topology structure - return { - "oob": False, - "nodes": { - f"{location_id}_leaf1": { - "memory": 2048, - "os": "cumulus-vx-5.10.0", - "cpu": 1, - "type": "switch", # Adding type as it's common in AIR - }, - f"{location_id}_spine1": { - "memory": 4096, - "os": "cumulus-vx-5.11.0", - "cpu": 2, - "type": "switch", # Adding type - }, - f"{location_id}_server1": { - "memory": 8192, - "os": "ubuntu-20.04", - "cpu": 4, - "type": "server", # Adding type - }, - }, - "links": [ - [ - {"node": f"{location_id}_leaf1", "interface": "swp1s1"}, - {"node": f"{location_id}_spine1", "interface": "swp1s1"}, - ], - [ - {"node": f"{location_id}_leaf1", "interface": "swp2s1"}, - {"node": f"{location_id}_server1", "interface": "eth0"}, - ], - [{"node": f"{location_id}_spine1", "interface": "swp2s1"}, "unconnected"], - [{"node": f"{location_id}_server1", "interface": "eth1"}, "unconnected"], - ], - } - - -@activity.defn(name="create_simulation") -async def mock_create_simulation( - input: CreateSimulationInput, -) -> CreateSimulationOutput: - """Mock simulation creation activity.""" - return CreateSimulationOutput(simulation_id="mock_simulation_id") - - -@activity.defn(name="prepare_simulation_nodes") -async def mock_prepare_simulation_nodes(input: PrepareSimulationNodesInput) -> None: - """Mock node preparation activity.""" - pass - - -@activity.defn(name="start_simulation") -async def mock_start_simulation(input: StartSimulationInput) -> None: - """Mock simulation start activity.""" - pass - - -@activity.defn(name="create_simulation_node_services") -async def mock_create_simulation_node_services( - input: CreateSimulationNodeServicesInput, -) -> CreateSimulationNodeServicesOutput: - """Mock node services creation activity.""" - return CreateSimulationNodeServicesOutput( - devices=[ - AirDevice( - id="node1", - name="node1", - worker_ip="10.0.0.1", - api_port=8765, - ), - AirDevice( - id="node2", - name="node2", - worker_ip="10.0.0.2", - api_port=8765, - ), - ] - ) - - -@activity.defn(name="wait_for_simulation_node") -async def mock_wait_for_simulation_node(input: WaitForSimulationNodeInput) -> None: - """Mock node wait activity.""" - pass - - -@activity.defn(name="delete_simulation") -async def mock_delete_simulation(input: DeleteSimulationInput) -> None: - """Mock simulation deletion activity.""" - pass - - -@activity.defn(name="get_network_devices") -async def mock_get_network_devices( - activity_input: GetNetworkDevicesInput, -) -> GetNetworkDevicesOutput: - """Mock get_network_devices activity. Dynamically sets device name.""" - - devices = [] - for device_id in activity_input.device_ids: - devices.append( - NetworkDeviceData( - id=device_id, - name=device_id, - role="mock_role", - platform="cumulus-linux", - site="SITEA", - device_type="sn4200", - primary_ip4="10.0.0.1", - primary_ip6=None, - ) - ) - - return GetNetworkDevicesOutput(devices=devices) - - -@activity.defn(name="load_intended_configuration") -async def mock_load_intended_configuration( - input: NetworkDeviceData, -) -> tuple[str, str, str]: - """Mock load_intended_configuration activity. Returns dynamic URL.""" - # The input.name here is now the AirDevice name (e.g., "switch1") - # due to changes in mock_get_network_device - url = f"https://git.nvidia.com/mock_site/{input.name}/startup.yaml" - return "mock config for " + input.name, "mock_commit_for_" + input.name, url - - -@activity.defn(name="validate_configuration_against_air_device") -async def mock_validate_configuration_against_air_device( - input: ConfigTestInput, -) -> ConfigTestOutput: - """Mock test_configuration_against_air_device activity.""" - return ConfigTestOutput(error=None) - - -@activity.defn(name="generate_minimal_topology_for_site") -async def mock_generate_minimal_topology_for_site( - input: MinimalTopologyInput, -) -> MinimalTopologyOutput: - """Mock generate_minimal_topology_for_site activity.""" - topology_dict = { - "oob": True, # Minimal topology for config testing might use OOB - "nodes": { - "node1": { - "memory": 1024, - "os": "cumulus-vx-5.9.0", # Example OS - "cpu": 1, - "type": "switch", # Standard type for AIR nodes - }, - "node2": { - "memory": 1024, - "os": "cumulus-vx-5.9.0", - "cpu": 1, - "type": "switch", - }, - }, - "links": [ - # For minimal config testing, nodes might be largely isolated - # or have very specific, simple connections if inter-device config is tested. - # Often, they are tested as standalone. - [ - {"node": "node1", "interface": "eth0"}, - "unconnected", - ], # Management interface - [{"node": "node1", "interface": "swp1"}, "unconnected"], - [ - {"node": "node2", "interface": "eth0"}, - "unconnected", - ], # Management interface - [{"node": "node2", "interface": "swp1"}, "unconnected"], - ], - } - # 2 devices mapped to the same node - node_map_dict = { - "switch1": "node1", - "switch2": "node2", - "switch3": "node2", - } - return MinimalTopologyOutput(topology=topology_dict, node_map=node_map_dict) - - -@activity.defn( - name="validate_configuration_against_air_device" -) # Give it a unique name if registered globally -async def mock_validate_config_with_error(input: ConfigTestInput) -> ConfigTestOutput: - """Mock validate_configuration_against_air_device that returns an error for a specific node.""" - if input.node.name == "node1": - return ConfigTestOutput(error="Syntax Error! Invalid command found.") - return ConfigTestOutput(error=None) - - -@activity.defn(name="publish_nats") -async def mock_publish_nats(activity_input: PublishNatsInput) -> None: - """Mock publish nats activity.""" - return None - - -@pytest.mark.asyncio -@patch("nv_config_manager.temporal.common.mixins.stage.workflow.time", return_value=float(0)) -@patch( - "nv_config_manager.temporal.ngc.workflows.air.DEFAULT_ACTIVITY_RETRY_POLICY", - return_value=TEST_RETRY_POLICY, -) -@patch("nv_config_manager.temporal.ngc.workflows.air.timedelta", return_value=TEST_TIMEOUT) -async def test_execute_workflow(mock_time_delta, mock_retry_policy, mock_time, env): - """Test the AIR workflow execution.""" - task_queue_name = str(uuid.uuid4()) - async with Worker( - env.client, - task_queue=task_queue_name, - workflows=[AIRCreateBlueprintSimulationWorkflow], - activities=[ - mock_generate_air_topology_for_location, - mock_create_simulation, - mock_prepare_simulation_nodes, - mock_start_simulation, - mock_create_simulation_node_services, - mock_wait_for_simulation_node, - mock_publish_nats, - ], - activity_executor=ThreadPoolExecutor(100), - ): - input = AIRCreateBlueprintSimulationInput( - blueprint_name="test_blueprint", - user="test_user", - user_domain="nvidia.com", - ) - workflow_id = str(uuid.uuid4()) - handle: WorkflowHandle = await env.client.start_workflow( - AIRCreateBlueprintSimulationWorkflow.run, - input, - id=workflow_id, - task_queue=task_queue_name, - run_timeout=timedelta(minutes=10), - ) - - result = await handle.result() - - assert result == ["device1", "device2"] - assert await handle.query("stages") == [ - { - "approval_threshold": 0, - "approvers": [], - "child_workflows": [], - "depends_on": [], - "description": "Create Cerebro location from blueprint.", - "execution_time": 0.0, - "input": {"blueprint_name": "test_blueprint"}, - "name": "create_cerebro_location", - "output": { - "location_id": "placeholder", - "display": "Created location with ID: placeholder", - }, - "rejecters": [], - "requires_approval": False, - "retry_count": 0, - "retryable": True, - "state": "COMPLETE", - "state_history": [ - {"state": "NOT_STARTED", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "IN_PROGRESS", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "COMPLETE", "time": "1970-01-01T00:00:00+00:00"}, - ], - "traceback": None, - }, - { - "approval_threshold": 0, - "approvers": [], - "child_workflows": [], - "depends_on": ["create_cerebro_location"], - "description": "Create and populate a new AIR simulation.", - "execution_time": 0.0, - "input": { - "user": "test_user", - "user_domain": "nvidia.com", - "location_id": "placeholder", - }, - "name": "setup_simulation", - "output": { - "simulation_id": "mock_simulation_id", - "display": "Created simulation with ID: mock_simulation_id", - }, - "rejecters": [], - "requires_approval": False, - "retry_count": 0, - "retryable": True, - "state": "COMPLETE", - "state_history": [ - {"state": "NOT_STARTED", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "IN_PROGRESS", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "COMPLETE", "time": "1970-01-01T00:00:00+00:00"}, - ], - "traceback": None, - }, - { - "approval_threshold": 0, - "approvers": [], - "child_workflows": [], - "depends_on": ["setup_simulation"], - "description": "Configure devices in the simulation.", - "execution_time": 0.0, - "input": {"location_id": "placeholder"}, - "name": "configure_devices", - "output": { - "configured_devices": ["device1", "device2"], - "display": "Configured devices: device1, device2", - }, - "rejecters": [], - "requires_approval": False, - "retry_count": 0, - "retryable": True, - "state": "COMPLETE", - "state_history": [ - {"state": "NOT_STARTED", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "IN_PROGRESS", "time": "1970-01-01T00:00:00+00:00"}, - {"state": "COMPLETE", "time": "1970-01-01T00:00:00+00:00"}, - ], - "traceback": None, - }, - ] - - -@pytest.mark.asyncio -@patch("nv_config_manager.temporal.common.mixins.stage.workflow.time", return_value=float(0)) -@patch( - "nv_config_manager.temporal.ngc.workflows.air.DEFAULT_ACTIVITY_RETRY_POLICY", - return_value=TEST_RETRY_POLICY, -) -@patch("nv_config_manager.temporal.ngc.workflows.air.timedelta", return_value=TEST_TIMEOUT) -async def test_air_create_simulation_workflow(mock_time_delta, mock_retry_policy, mock_time, env): - """Test the AIRCreateSimulationWorkflow execution.""" - task_queue_name = str(uuid.uuid4()) - async with Worker( - env.client, - task_queue=task_queue_name, - workflows=[AIRCreateSimulationWorkflow], - activities=[ - mock_create_simulation, - mock_prepare_simulation_nodes, - mock_start_simulation, - mock_create_simulation_node_services, - mock_wait_for_simulation_node, - mock_publish_nats, - ], - activity_executor=ThreadPoolExecutor(100), - ): - input_data = AIRCreateSimulationInput( - name="test_simulation", - topology={ - "oob": False, - "nodes": { - "node1": { - "memory": 2048, - "os": "cumulus-vx-5.10.0", - "cpu": 1, - "type": "switch", - }, - }, - "links": [], - }, - user="test_user", - ) - workflow_id = str(uuid.uuid4()) - handle: WorkflowHandle = await env.client.start_workflow( - AIRCreateSimulationWorkflow.run, - input_data, - id=workflow_id, - task_queue=task_queue_name, - run_timeout=timedelta(minutes=10), - ) - - result = await handle.result() - assert result == "mock_simulation_id" - - stages = await handle.query("stages") - assert len(stages) == 1 - assert stages[0]["name"] == "setup_simulation" - assert stages[0]["state"] == "COMPLETE" - assert stages[0]["output"]["simulation_id"] == "mock_simulation_id" - assert len(stages[0]["output"]["devices"]) == 2 - - -@pytest.mark.asyncio -@patch("nv_config_manager.temporal.common.mixins.stage.workflow.time", return_value=float(0)) -async def test_air_delete_simulation_workflow(mock_time, env): - """Test the AIRDeleteSimulationWorkflow execution.""" - task_queue_name = str(uuid.uuid4()) - async with Worker( - env.client, - task_queue=task_queue_name, - workflows=[AIRDeleteSimulationWorkflow], - activities=[mock_delete_simulation, mock_publish_nats], - activity_executor=ThreadPoolExecutor(100), - ): - input_data = AIRDeleteInput(simulation_id="mock_sim_id_to_delete") - workflow_id = str(uuid.uuid4()) - handle: WorkflowHandle = await env.client.start_workflow( - AIRDeleteSimulationWorkflow.run, - input_data, - id=workflow_id, - task_queue=task_queue_name, - run_timeout=timedelta(minutes=10), - ) - - result = await handle.result() - assert result is True - - stages = await handle.query("stages") - assert len(stages) == 1 - assert stages[0]["name"] == "delete_simulation" - assert stages[0]["state"] == "COMPLETE" - assert stages[0]["output"]["success"] is True - - -@pytest.mark.asyncio -@patch("nv_config_manager.temporal.common.mixins.stage.workflow.time", return_value=float(0)) -@patch( - "nv_config_manager.temporal.ngc.workflows.air.DEFAULT_ACTIVITY_RETRY_POLICY", - return_value=TEST_RETRY_POLICY, -) -@patch("nv_config_manager.temporal.ngc.workflows.air.timedelta", return_value=TEST_TIMEOUT) -async def test_air_validate_site_workflow(mock_time_delta, mock_retry_policy, mock_time, env): - """Test the AIRValidateSiteWorkflow execution.""" - task_queue_name = str(uuid.uuid4()) - async with Worker( - env.client, - task_queue=task_queue_name, - workflows=[AIRValidateSiteWorkflow], - activities=[ - mock_generate_minimal_topology_for_site, - mock_create_simulation, - mock_prepare_simulation_nodes, - mock_start_simulation, - mock_create_simulation_node_services, - mock_wait_for_simulation_node, - mock_get_network_devices, - mock_load_intended_configuration, - mock_validate_configuration_against_air_device, - mock_delete_simulation, - mock_publish_nats, - ], - activity_executor=ThreadPoolExecutor(100), - ): - input_data = AIRValidateSiteInput(site_name="test_site", user="test_user") - workflow_id = str(uuid.uuid4()) - handle: WorkflowHandle = await env.client.start_workflow( - AIRValidateSiteWorkflow.run, - input_data, - id=workflow_id, - task_queue=task_queue_name, - run_timeout=timedelta(minutes=10), - ) - - result = await handle.result() - assert result == [] - - stages = await handle.query("stages") - assert len(stages) == 4 - assert stages[0]["name"] == "generate_minimal_topology" - assert stages[0]["state"] == "COMPLETE" - assert stages[1]["name"] == "setup_simulation" - assert stages[1]["state"] == "COMPLETE" - assert stages[2]["name"] == "configure_devices" - assert stages[2]["state"] == "COMPLETE" - assert stages[2]["output"]["failed_devices"] == [] - assert stages[3]["name"] == "delete_simulation" - assert stages[3]["state"] == "COMPLETE" - assert stages[3]["output"]["success"] is True - - -@pytest.mark.asyncio -@patch("nv_config_manager.temporal.common.mixins.stage.workflow.time", return_value=float(0)) -@patch( - "nv_config_manager.temporal.ngc.workflows.air.DEFAULT_ACTIVITY_RETRY_POLICY", - return_value=TEST_RETRY_POLICY, -) -@patch("nv_config_manager.temporal.ngc.workflows.air.timedelta", return_value=TEST_TIMEOUT) -async def test_air_validate_site_workflow_with_config_error( - mock_time_delta, mock_retry_policy, mock_time, env -): - """Test the AIRValidateSiteWorkflow when a device has a configuration error.""" - task_queue_name = str(uuid.uuid4()) - async with Worker( - env.client, - task_queue=task_queue_name, - workflows=[AIRValidateSiteWorkflow], - activities=[ - mock_generate_minimal_topology_for_site, - mock_create_simulation, - mock_prepare_simulation_nodes, - mock_start_simulation, - mock_create_simulation_node_services, - mock_wait_for_simulation_node, - mock_get_network_devices, - mock_load_intended_configuration, - mock_validate_config_with_error, # Specific mock for this test - mock_delete_simulation, - mock_publish_nats, - ], - activity_executor=ThreadPoolExecutor(100), - ): - input_data = AIRValidateSiteInput(site_name="test_site_error", user="test_user") - workflow_id = str(uuid.uuid4()) - handle: WorkflowHandle = await env.client.start_workflow( - AIRValidateSiteWorkflow.run, - input_data, - id=workflow_id, - task_queue=task_queue_name, - run_timeout=timedelta(minutes=10), - ) - - result = await handle.result() - - expected_failed_device_name = "switch1" - expected_error_message = "Syntax Error! Invalid command found." - # URL needs to be constructed carefully based on mock_load_intended_configuration and then URL encoded by the workflow - raw_url_for_failed_device = ( - f"https://git.nvidia.com/mock_site/{expected_failed_device_name}/startup.yaml" - ) - encoded_url_for_failed_device = urllib.parse.quote(raw_url_for_failed_device, safe="/:") - - expected_failure = { - "Device": expected_failed_device_name, - "Error Message": expected_error_message, - "Intended Config": f"[startup.yaml]({encoded_url_for_failed_device})", - } - - assert result == [expected_failure] - - stages = await handle.query("stages") - assert len(stages) == 4 - assert stages[0]["name"] == "generate_minimal_topology" - assert stages[0]["state"] == "COMPLETE" - assert stages[1]["name"] == "setup_simulation" - assert stages[1]["state"] == "COMPLETE" - assert stages[2]["name"] == "configure_devices" - assert stages[2]["state"] == "COMPLETE" - assert stages[2]["output"]["failed_devices"] == [expected_failure] - assert stages[3]["name"] == "delete_simulation" - assert stages[3]["state"] == "COMPLETE" - assert stages[3]["output"]["success"] is True - - expected_display_table = """ -| Device| Error Message | Intended Config | -|-------|------------------------------------|---------------------------------------------------------------------| -|switch1|Syntax Error! Invalid command found.|[startup.yaml](https://git.nvidia.com/mock_site/switch1/startup.yaml)| -""" - assert stages[2]["output"]["display"].strip() == expected_display_table.strip() diff --git a/ui/src/app/workflows/aircreatesimulationworkflow/form/air-create-simulation-form.tsx b/ui/src/app/workflows/aircreatesimulationworkflow/form/air-create-simulation-form.tsx deleted file mode 100644 index 0e0b352..0000000 --- a/ui/src/app/workflows/aircreatesimulationworkflow/form/air-create-simulation-form.tsx +++ /dev/null @@ -1,164 +0,0 @@ -"use client"; -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as React from "react"; -import { zodResolver } from "@hookform/resolvers/zod"; -import { useSearchParams } from "next/navigation"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { Form } from "@/components/ui/form"; -import { WorkflowFormField } from "@/components/forms/formfield"; -import { startWorkflow } from "@/lib/utils"; -import { toast } from "@/components/ui/use-toast"; -import { AIRCreateSimulationWorkflowInput } from "@/types/data-table.types"; - -const AIRCreateSimulationWorkflowFormSchema = z.object({ - name: z.string().trim().min(1, { message: "Name is required" }), - topology: z - .string() - .trim() - .min(1, { message: "Topology JSON is required" }) - .refine( - (val) => { - try { - JSON.parse(val); - return true; - } catch { - return false; - } - }, - { message: "Must be valid JSON" } - ), -}); - -export type AIRCreateSimulationWorkflowFormSchema = z.infer< - typeof AIRCreateSimulationWorkflowFormSchema ->; - -export const AIRCreateSimulationWorkflowForm = () => { - const [isSubmitting, setIsSubmitting] = React.useState(false); - - const searchParams = useSearchParams(); - const queryName = searchParams && searchParams.get("name"); - const queryTopology = searchParams && searchParams.get("topology"); - - const form = useForm({ - resolver: zodResolver(AIRCreateSimulationWorkflowFormSchema), - defaultValues: { - name: queryName || "", - topology: queryTopology || "", - }, - }); - - const onSubmit = async ( - data: z.infer - ) => { - setIsSubmitting(true); - - const topologyObject = JSON.parse(data.topology); - const submissionData: AIRCreateSimulationWorkflowInput = { - name: data.name, - topology: topologyObject, - }; - - await startWorkflow( - "/v1/workflow/ngc/air_create_simulation", - submissionData - ).catch((error) => { - toast({ - variant: "destructive", - title: "Workflow Failed", - description: error, - }); - }); - setIsSubmitting(false); - }; - - const formatJSON = () => { - const currentValue = form.getValues("topology"); - if (currentValue.trim()) { - try { - const parsed = JSON.parse(currentValue); - const formatted = JSON.stringify(parsed, null, 2); - form.setValue("topology", formatted); - toast({ - title: "JSON Formatted", - description: "Your JSON has been prettified!", - }); - } catch (error) { - console.error(error); - toast({ - variant: "destructive", - title: "Invalid JSON", - description: "Cannot format invalid JSON. Please check your syntax.", - }); - } - } - }; - - return ( -
- - - AIR Create Simulation - - -
- - -
-
- - -
- -
- - - -
-
-
- ); -}; diff --git a/ui/src/app/workflows/aircreatesimulationworkflow/form/loading.tsx b/ui/src/app/workflows/aircreatesimulationworkflow/form/loading.tsx deleted file mode 100644 index c39f536..0000000 --- a/ui/src/app/workflows/aircreatesimulationworkflow/form/loading.tsx +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import * as React from "react"; -import { WorkflowFormSkeleton } from "@/components/loading"; - -const AIRCreateSimulationWorkflowFormLoading: React.FC = () => { - return ; -}; - -export default AIRCreateSimulationWorkflowFormLoading; diff --git a/ui/src/app/workflows/aircreatesimulationworkflow/form/page.tsx b/ui/src/app/workflows/aircreatesimulationworkflow/form/page.tsx deleted file mode 100644 index a3454e5..0000000 --- a/ui/src/app/workflows/aircreatesimulationworkflow/form/page.tsx +++ /dev/null @@ -1,21 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { AIRCreateSimulationWorkflowForm } from "./air-create-simulation-form"; - -export default function AIRCreateSimulationWorkflowPage() { - return ; -} diff --git a/ui/src/app/workflows/airdeletesimulationworkflow/form/air-delete-simulation-form.tsx b/ui/src/app/workflows/airdeletesimulationworkflow/form/air-delete-simulation-form.tsx deleted file mode 100644 index 49baee3..0000000 --- a/ui/src/app/workflows/airdeletesimulationworkflow/form/air-delete-simulation-form.tsx +++ /dev/null @@ -1,145 +0,0 @@ -"use client"; -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as React from "react"; -import { zodResolver } from "@hookform/resolvers/zod"; -import { useSearchParams } from "next/navigation"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { Form } from "@/components/ui/form"; -import { WorkflowFormField } from "@/components/forms/formfield"; -import { startWorkflow } from "@/lib/utils"; -import { toast } from "@/components/ui/use-toast"; -import { AIRDeleteSimulationWorkflowInput } from "@/types/data-table.types"; -import { useSimulations } from "@/hooks"; - -const AIRDeleteSimulationWorkflowFormSchema = z.object({ - simulation_id: z - .string() - .trim() - .min(1, { message: "Simulation ID is required" }), -}); - -export type AIRDeleteSimulationWorkflowFormSchema = z.infer< - typeof AIRDeleteSimulationWorkflowFormSchema ->; - -export const AIRDeleteSimulationWorkflowForm = () => { - const [isSubmitting, setIsSubmitting] = React.useState(false); - const [isManualSimulationChange, setIsManualSimulationChange] = - React.useState(false); - - const searchParams = useSearchParams(); - const querySimulationId = searchParams && searchParams.get("simulation_id"); - - const { - simulations, - isLoading: simulationsIsLoading, - } = useSimulations(); - - const form = useForm({ - resolver: zodResolver(AIRDeleteSimulationWorkflowFormSchema), - defaultValues: { - simulation_id: querySimulationId || "", - }, - }); - - // Handle URL param prefill - React.useEffect(() => { - if (querySimulationId && !isManualSimulationChange) { - const isSimulationValid = simulations.some( - (simulation) => simulation.value === querySimulationId - ); - - if (isSimulationValid) { - if (form.getValues("simulation_id") !== querySimulationId) { - form.setValue("simulation_id", querySimulationId); - } - } else { - if (form.getValues("simulation_id") !== "") { - form.setValue("simulation_id", ""); - } - } - } - }, [querySimulationId, simulations, form, isManualSimulationChange]); - - const handleSimulationChange = (newSimulation: string | string[]) => { - setIsManualSimulationChange(true); - - if (Array.isArray(newSimulation)) { - form.setValue("simulation_id", newSimulation[0]); - } else { - form.setValue("simulation_id", newSimulation); - } - }; - - const onSubmit = async ( - data: z.infer - ) => { - setIsSubmitting(true); - - const submissionData: AIRDeleteSimulationWorkflowInput = { - simulation_id: data.simulation_id, - }; - - await startWorkflow("/v1/workflow/ngc/air_delete", submissionData).catch( - (error) => { - toast({ - variant: "destructive", - title: "Workflow Failed", - description: error.message || String(error), - }); - } - ); - setIsSubmitting(false); - }; - - return ( -
- - - AIR Delete Simulation - - -
- - handleSimulationChange(value)} - /> - - - -
-
-
- ); -}; diff --git a/ui/src/app/workflows/airdeletesimulationworkflow/form/loading.tsx b/ui/src/app/workflows/airdeletesimulationworkflow/form/loading.tsx deleted file mode 100644 index f614e15..0000000 --- a/ui/src/app/workflows/airdeletesimulationworkflow/form/loading.tsx +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import * as React from "react"; -import { WorkflowFormSkeleton } from "@/components/loading"; - -const AIRDeleteSimulationWorkflowFormLoading: React.FC = () => { - return ; -}; - -export default AIRDeleteSimulationWorkflowFormLoading; diff --git a/ui/src/app/workflows/airdeletesimulationworkflow/form/page.tsx b/ui/src/app/workflows/airdeletesimulationworkflow/form/page.tsx deleted file mode 100644 index fca8b54..0000000 --- a/ui/src/app/workflows/airdeletesimulationworkflow/form/page.tsx +++ /dev/null @@ -1,21 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { AIRDeleteSimulationWorkflowForm } from "./air-delete-simulation-form"; - -export default function AIRDeleteSimulationWorkflowPage() { - return ; -} diff --git a/ui/src/app/workflows/airvalidatesiteworkflow/form/air-validates-site-workflow-form.tsx b/ui/src/app/workflows/airvalidatesiteworkflow/form/air-validates-site-workflow-form.tsx deleted file mode 100644 index 232dcf2..0000000 --- a/ui/src/app/workflows/airvalidatesiteworkflow/form/air-validates-site-workflow-form.tsx +++ /dev/null @@ -1,145 +0,0 @@ -"use client"; -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as React from "react"; -import { zodResolver } from "@hookform/resolvers/zod"; -import { useSearchParams } from "next/navigation"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; -import { Button } from "@/components/ui/button"; -import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { Form } from "@/components/ui/form"; -import { useEnvData } from "@/hooks"; -import { WorkflowFormField } from "@/components/forms/formfield"; -import { startWorkflow } from "@/lib/utils"; -import { toast } from "@/components/ui/use-toast"; -import { AIRValidateSiteWorkflowInput } from "@/types/data-table.types"; - -const AIRValidateSiteWorkflowFormSchema = z.object({ - site: z.string().trim().min(1, { message: "Site is required" }), -}); - -export type AIRValidateSiteWorkflowFormSchema = z.infer< - typeof AIRValidateSiteWorkflowFormSchema ->; - -export const AIRValidateSiteWorkflowForm = () => { - const [isSubmitting, setIsSubmitting] = React.useState(false); - const [isManualSiteChange, setIsManualSiteChange] = React.useState(false); - const { - data: { siteData: sites }, - errors: { siteError }, - isLoading: { siteIsLoading }, - } = useEnvData(); - - const searchParams = useSearchParams(); - const querySite = searchParams && searchParams.get("site"); - - const form = useForm({ - resolver: zodResolver(AIRValidateSiteWorkflowFormSchema), - defaultValues: { - site: querySite || "", - }, - }); - - if (siteError) console.error(`Failed to query devices: ${siteError}`); - - const onSubmit = async ( - data: z.infer - ) => { - setIsSubmitting(true); - - const submissionData: AIRValidateSiteWorkflowInput = { - site_name: data.site, - }; - - await startWorkflow( - "/v1/workflow/ngc/air_validate_site", - submissionData - ).catch((error) => { - toast({ - variant: "destructive", - title: "Workflow Failed", - description: error, - }); - }); - setIsSubmitting(false); - }; - - React.useEffect(() => { - if (querySite && !isManualSiteChange) { - const isSiteValid = sites.some((option) => option.key === querySite); - const siteId = sites.find((option) => option.key === querySite)?.value; - - if (!isSiteValid) { - if (form.getValues("site") !== "") { - form.setValue("site", ""); // Clear site if invalid - } - } else { - if (siteId && form.getValues("site") !== siteId) { - form.setValue("site", siteId); // Set valid site from URL - } - } - } - }, [querySite, sites, form, isManualSiteChange]); - - const handleSiteChange = (newSite: string | string[]) => { - setIsManualSiteChange(true); - - if (Array.isArray(newSite)) { - form.setValue("site", newSite[0]); - } else { - form.setValue("site", newSite); - } - }; - - const SiteField = () => { - return ( - handleSiteChange(value)} - /> - ); - }; - - return ( -
- - - AIR Validate Site - - -
- - - - - -
-
-
- ); -}; diff --git a/ui/src/app/workflows/airvalidatesiteworkflow/form/loading.tsx b/ui/src/app/workflows/airvalidatesiteworkflow/form/loading.tsx deleted file mode 100644 index 383952c..0000000 --- a/ui/src/app/workflows/airvalidatesiteworkflow/form/loading.tsx +++ /dev/null @@ -1,24 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import * as React from "react"; -import { WorkflowFormSkeleton } from "@/components/loading"; - -const AIRValidateSiteWorkflowFormLoading: React.FC = () => { - return ; -}; - -export default AIRValidateSiteWorkflowFormLoading; diff --git a/ui/src/app/workflows/airvalidatesiteworkflow/form/page.tsx b/ui/src/app/workflows/airvalidatesiteworkflow/form/page.tsx deleted file mode 100644 index 912e867..0000000 --- a/ui/src/app/workflows/airvalidatesiteworkflow/form/page.tsx +++ /dev/null @@ -1,25 +0,0 @@ -"use client"; -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { AIRValidateSiteWorkflowForm } from "./air-validates-site-workflow-form"; - -const AIRValidateSiteWorkflowFormPage = () => { - return ; -}; - -export default AIRValidateSiteWorkflowFormPage; diff --git a/ui/src/config/site.ts b/ui/src/config/site.ts index 53bf0ec..fdf44bd 100644 --- a/ui/src/config/site.ts +++ b/ui/src/config/site.ts @@ -110,21 +110,6 @@ export const siteConfig = { slug: "reprovisionworkflow", enabled: true, }, - { - title: "AIR Validate Site", - slug: "airvalidatesiteworkflow", - enabled: true, - }, - { - title: "AIR Create Simulation", - slug: "aircreatesimulationworkflow", - enabled: true, - }, - { - title: "AIR Delete Simulation", - slug: "airdeletesimulationworkflow", - enabled: true, - }, { title: "Switch OS Upgrade", slug: "switchosupgradeworkflow", diff --git a/ui/src/hooks/index.ts b/ui/src/hooks/index.ts index ce6c8ac..5f0157d 100644 --- a/ui/src/hooks/index.ts +++ b/ui/src/hooks/index.ts @@ -16,7 +16,6 @@ */ export { default as useEnvData } from "./useEnvData"; export { default as useDevices } from "./useDevices"; -export { default as useSimulations } from "./useSimulations"; export { default as useCommandCatalog } from "./useCommandCatalog"; export { default as useCommandCatalogGrouped } from "./useCommandCatalogGrouped"; export type { CommandGroup } from "./useCommandCatalogGrouped"; diff --git a/ui/src/hooks/useSimulations.ts b/ui/src/hooks/useSimulations.ts deleted file mode 100644 index 4e77f44..0000000 --- a/ui/src/hooks/useSimulations.ts +++ /dev/null @@ -1,62 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import useSWR from "swr"; -import { fetcher } from "@/lib/fetcher"; -import { useRuntimeConfig } from "@/config/runtime"; -import { sanitizeUrl } from "@/lib/utils"; -import { Option } from "@/types/workflow-form.types"; - -interface SimulationData { - id: string; - name: string; - state: string; -} - -interface UseSimulationsReturn { - simulations: Option[]; - error: unknown; - isLoading: boolean; -} - -const useSimulations = (): UseSimulationsReturn => { - const { config } = useRuntimeConfig(); - const apiURL = config?.workflowApiUrl; - - const { data, error, isLoading } = useSWR( - apiURL ? sanitizeUrl(`${apiURL}/v1/parameter/simulations`) : null, - fetcher - ); - - const transformToOptions = (simulations: SimulationData[]): Option[] => { - if (!simulations || !Array.isArray(simulations)) { - return []; - } - - return simulations.map((simulation) => ({ - key: simulation.name, // This is what users see in the dropdown - value: simulation.id, // This is what gets submitted as the selection - })); - }; - - return { - simulations: transformToOptions(data) || [], - error, - isLoading, - }; -}; - -export default useSimulations; diff --git a/ui/src/mocks/data/airSimulations.ts b/ui/src/mocks/data/airSimulations.ts deleted file mode 100644 index 5c11e75..0000000 --- a/ui/src/mocks/data/airSimulations.ts +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -export const AIR_SIMULATIONS_MOCK_DATA = [ - { - id: "4dce8367-aaea-4965-924e-34647be0a630", - name: "SITEA Validation", - state: "LOADING", - }, - { - id: "1a048166-42f0-4da9-82e9-761a10ddb0e3", - name: "test", - state: "LOADED", - }, -]; diff --git a/ui/src/mocks/data/index.ts b/ui/src/mocks/data/index.ts index 3858cdf..0d9d95c 100644 --- a/ui/src/mocks/data/index.ts +++ b/ui/src/mocks/data/index.ts @@ -17,4 +17,3 @@ export * from "./devicesData"; export * from "./workflows"; export * from "./formData"; -export * from "./airSimulations"; diff --git a/ui/src/mocks/handlers/airHandlers.ts b/ui/src/mocks/handlers/airHandlers.ts deleted file mode 100644 index 52f04c0..0000000 --- a/ui/src/mocks/handlers/airHandlers.ts +++ /dev/null @@ -1,145 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { delay, http, HttpResponse } from "msw"; -import { sanitizeUrl } from "@/lib/utils"; -import { mockApiURL as apiURL } from "@/config/mockApiUrl"; -import { - AIRValidateSiteWorkflowInput, - AIRDeleteSimulationWorkflowInput, - AIRCreateSimulationWorkflowInput, -} from "@/types/data-table.types"; -import { FORBIDDEN_SITE_ID, AIR_SIMULATIONS_MOCK_DATA } from "@/mocks/data"; - -export const airHandlers = [ - http.get(sanitizeUrl(`${apiURL}/v1/parameter/simulations`), async () => { - return HttpResponse.json(AIR_SIMULATIONS_MOCK_DATA, { status: 200 }); - }), - - http.post( - sanitizeUrl(`${apiURL}/v1/workflow/ngc/air_validate_site`), - async ({ request }) => { - const body = (await request.json()) as AIRValidateSiteWorkflowInput; - - if (!body.site_name) { - return HttpResponse.json( - { error: "Missing required fields" }, - { status: 400 } - ); - } - - if (body.site_name === FORBIDDEN_SITE_ID) { - return HttpResponse.json( - { - error: "Forbidden: You do not have permission to run this workflow", - }, - { status: 403 } - ); - } - - await delay(2500); - - return HttpResponse.json( - { - id: body.site_name, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.site_name}`, - submitted_data: body, - }, - { status: 201 } - ); - } - ), - - http.post( - sanitizeUrl(`${apiURL}/v1/workflow/ngc/air_create_simulation`), - async ({ request }) => { - const body = (await request.json()) as AIRCreateSimulationWorkflowInput; - - if (!body.name || !body.topology) { - return HttpResponse.json( - { error: "Missing required fields" }, - { status: 400 } - ); - } - - // Check for forbidden simulation names (using site ID as example) - if (body.name === FORBIDDEN_SITE_ID) { - console.log("FORBIDDEN_SITE_ID", body.name); - return HttpResponse.json( - { - error: "Forbidden: You do not have permission to run this workflow", - }, - { status: 403 } - ); - } - - await delay(2500); - - return HttpResponse.json( - { - id: body.name, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.name}`, - submitted_data: body, - }, - { status: 201 } - ); - } - ), - - http.post("/v1/workflow/ngc/air_delete", async ({ request }) => { - const body = (await request.json()) as AIRDeleteSimulationWorkflowInput; - - if (body.simulation_id === FORBIDDEN_SITE_ID) { - return new HttpResponse( - JSON.stringify({ - error: "Forbidden: You do not have permission to run this workflow", - }), - { - status: 403, - headers: { - "Content-Type": "application/json", - }, - } - ); - } - - if (!body.simulation_id) { - return new HttpResponse( - JSON.stringify({ error: "Missing required fields" }), - { - status: 400, - headers: { - "Content-Type": "application/json", - }, - } - ); - } - - return new HttpResponse( - JSON.stringify({ - id: body.simulation_id, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.simulation_id}`, - submitted_data: body, - }), - { - status: 201, - headers: { - "Content-Type": "application/json", - }, - } - ); - }), -]; diff --git a/ui/src/mocks/handlers/index.ts b/ui/src/mocks/handlers/index.ts index bddaa83..b750a32 100644 --- a/ui/src/mocks/handlers/index.ts +++ b/ui/src/mocks/handlers/index.ts @@ -29,7 +29,6 @@ import { deployHandlers } from "./deployHandlers"; import { ibValidationHandlers } from "./ibValidationHandlers"; import { ibOsUpgradeHandlers } from "./ibOsUpgradeHandler"; import { reprovisionHandlers } from "./reprovisionHandlers"; -import { airHandlers } from "./airHandlers"; import { switchOsUpgradeHandlers } from "./switchOsUpgradeHandlers"; import { cumulusHardwareValidationHandlers } from "./cumulusHardwareValidationHandler"; import { multiDeployHandlers } from "./multiDeployHandlers"; @@ -49,7 +48,6 @@ export * from "./deployHandlers"; export * from "./ibValidationHandlers"; export * from "./ibOsUpgradeHandler"; export * from "./reprovisionHandlers"; -export * from "./airHandlers"; export * from "./switchOsUpgradeHandlers"; export * from "./cumulusHardwareValidationHandler"; export * from "./multiDeployHandlers"; @@ -70,7 +68,6 @@ export const handlers = [ ...ibValidationHandlers, ...ibOsUpgradeHandlers, ...reprovisionHandlers, - ...airHandlers, ...switchOsUpgradeHandlers, ...cumulusHardwareValidationHandlers, ...multiDeployHandlers, diff --git a/ui/src/mocks/handlers/workflowHandlers.ts b/ui/src/mocks/handlers/workflowHandlers.ts index 801d2fa..420cd41 100644 --- a/ui/src/mocks/handlers/workflowHandlers.ts +++ b/ui/src/mocks/handlers/workflowHandlers.ts @@ -40,9 +40,6 @@ export const workflowTypes = [ "ReprovisionWorkflow", "SwitchOsUpgradeWorkflow", "CumulusHardwareValidationWorkflow", - "AIRCreateSimulationWorkflow", - "AIRValidateSiteWorkflow", - "AIRDeleteSimulationWorkflow", ]; export const workflowFetchingHandlers = [ diff --git a/ui/src/types/data-table.types.ts b/ui/src/types/data-table.types.ts index d8b2c69..3ea4e50 100644 --- a/ui/src/types/data-table.types.ts +++ b/ui/src/types/data-table.types.ts @@ -71,19 +71,6 @@ export type ConnectedDeviceMetadataWorkflowInput = { device_id: string; }; -export type AIRValidateSiteWorkflowInput = { - site_name: string; -}; - -export type AIRCreateSimulationWorkflowInput = { - name: string; - topology: object; -}; - -export type AIRDeleteSimulationWorkflowInput = { - simulation_id: string; -}; - export type PortLLDPInfoWorkflowInput = | { device_id: string; diff --git a/ui/tests/e2e/airCreateSimulationForm.spec.ts b/ui/tests/e2e/airCreateSimulationForm.spec.ts deleted file mode 100644 index 5647a9c..0000000 --- a/ui/tests/e2e/airCreateSimulationForm.spec.ts +++ /dev/null @@ -1,348 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { expect } from "@playwright/test"; -import { test, TEST_TIMEOUT } from "./shared/utils"; -import { FORBIDDEN_SITE_ID } from "@/mocks/data"; - -// Sample data for testing -const SAMPLE_NAME = "test-simulation"; -const SAMPLE_TOPOLOGY = JSON.stringify({ - devices: [ - { - name: "device1", - type: "switch", - interfaces: ["Ethernet1/1", "Ethernet1/2"], - }, - ], -}); - -test.describe("AIR Create Simulation Workflow Form", () => { - test.beforeEach(async ({ page }) => { - await page.goto("/workflows/aircreatesimulationworkflow/form"); - }); - - test("renders form with correct title", async ({ page }) => { - const title = await page.getByRole("heading", { - name: "AIR Create Simulation", - }); - await expect(title).toBeVisible(); - }); - - test("displays validation error when no fields are filled", async ({ - page, - }) => { - await page.getByRole("button", { name: "Submit" }).click(); - - await expect(page.getByText("Name is required")).toBeVisible(); - await expect(page.getByText("Topology JSON is required")).toBeVisible(); - }); - - test("displays validation error when form information is incomplete", async ({ - page, - }) => { - // Fill only name field - await page.getByLabel("Simulation Name").fill(SAMPLE_NAME); - await page.getByRole("button", { name: "Submit" }).click(); - - // Check for validation error - await expect(page.getByText("Topology JSON is required")).toBeVisible(); - - // Clear and fill only topology field - await page.reload(); - await page.locator('textarea[name="topology"]').fill(SAMPLE_TOPOLOGY); - await page.getByRole("button", { name: "Submit" }).click(); - - // Check for validation error - await expect(page.getByText("Name is required")).toBeVisible(); - }); - - test("displays validation error for invalid JSON", async ({ page }) => { - // Fill form with invalid JSON - await page.getByLabel("Simulation Name").fill(SAMPLE_NAME); - await page.locator('textarea[name="topology"]').fill("invalid json"); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Check for validation error - await expect(page.getByText("Must be valid JSON")).toBeVisible(); - }); - - test("formats valid JSON when format button is clicked", async ({ page }) => { - // Fill form with unformatted JSON - const unformattedJSON = - '{"devices":[{"name":"device1","type":"switch","interfaces":["Ethernet1/1","Ethernet1/2"]}]}'; - await page.locator('textarea[name="topology"]').fill(unformattedJSON); - - // Click format button - await page.getByRole("button", { name: "Format JSON" }).click(); - - // NOTE: While not ideal, firefox has a weird bug where the toast notification is not visible unless we force a viewport adjustment. - const successTitle = page.locator("div.text-sm.font-semibold", { - hasText: "JSON Formatted", - }); - const successMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Your JSON has been prettified!", - }); - - await expect(successTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(successMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - - // Verify JSON is formatted - const formattedValue = await page - .locator('textarea[name="topology"]') - .inputValue(); - expect(formattedValue).toBe( - JSON.stringify(JSON.parse(unformattedJSON), null, 2) - ); - }); - - test("verifies JSON formatting preserves data structure", async ({ - page, - }) => { - // Create a complex JSON structure - const complexJSON = { - devices: [ - { - name: "device1", - type: "switch", - interfaces: ["Ethernet1/1", "Ethernet1/2"], - config: { - hostname: "switch1", - vlans: [1, 2, 3], - features: { - lldp: true, - cdp: false, - }, - }, - }, - { - name: "device2", - type: "router", - interfaces: ["GigabitEthernet0/1", "GigabitEthernet0/2"], - config: { - hostname: "router1", - routing: { - ospf: true, - bgp: false, - }, - }, - }, - ], - topology: { - connections: [ - { - from: "device1.Ethernet1/1", - to: "device2.GigabitEthernet0/1", - }, - ], - }, - }; - - // Fill form with unformatted JSON - const unformattedJSON = JSON.stringify(complexJSON); - await page.locator('textarea[name="topology"]').fill(unformattedJSON); - - // Click format button - await page.getByRole("button", { name: "Format JSON" }).click(); - - // NOTE: While not ideal, firefox has a weird bug where the toast notification is not visible unless we force a viewport adjustment. - const successTitle = page.locator("div.text-sm.font-semibold", { - hasText: "JSON Formatted", - }); - const successMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Your JSON has been prettified!", - }); - - await expect(successTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(successMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - - // Get the formatted value - const formattedValue = await page - .locator('textarea[name="topology"]') - .inputValue(); - - // Parse both the original and formatted JSON to verify they are equivalent - const originalParsed = JSON.parse(unformattedJSON); - const formattedParsed = JSON.parse(formattedValue); - - // Verify the data structure is preserved - expect(formattedParsed).toEqual(originalParsed); - - // Verify the formatting (should have proper indentation) - expect(formattedValue).toContain(' "devices": ['); - expect(formattedValue).toContain(' "name": "device1"'); - expect(formattedValue).toContain(' "config": {'); - expect(formattedValue).toContain(' "hostname": "switch1"'); - }); - - test("shows error toast when formatting invalid JSON", async ({ page }) => { - // Fill form with invalid JSON - const topologyInput = page.locator('textarea[name="topology"]'); - await expect(topologyInput).toBeVisible({ timeout: TEST_TIMEOUT }); - await topologyInput.fill("invalid json"); - await expect(topologyInput).toHaveValue("invalid json", { - timeout: TEST_TIMEOUT, - }); - - // Click format button - await page.getByRole("button", { name: "Format JSON" }).click(); - - // NOTE: While not ideal, firefox has a weird bug where the toast notification is not visible unless we force a viewport adjustment. - const errorTitle = page.locator("div.text-sm.font-semibold", { - hasText: "Invalid JSON", - }); - const errorMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Cannot format invalid JSON. Please check your syntax.", - }); - - await expect(errorTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(errorMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("submits form with valid data correctly", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_create_simulation"); - }); - - // Fill form with valid data - await page.getByLabel("Simulation Name").fill(SAMPLE_NAME); - await page.locator('textarea[name="topology"]').fill(SAMPLE_TOPOLOGY); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - name: SAMPLE_NAME, - topology: JSON.parse(SAMPLE_TOPOLOGY), - }); - - // Wait for navigation to confirm submission completed - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("loads form data from URL parameters", async ({ page }) => { - // Navigate with URL parameters - await page.goto( - `/workflows/aircreatesimulationworkflow/form?name=${SAMPLE_NAME}&topology=${encodeURIComponent( - SAMPLE_TOPOLOGY - )}` - ); - - // Verify the form is pre-populated with URL parameter values - await expect(page.getByLabel("Simulation Name")).toHaveValue(SAMPLE_NAME); - await expect(page.locator('textarea[name="topology"]')).toHaveValue( - SAMPLE_TOPOLOGY - ); - }); - - test("disables form during submission", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_create_simulation"); - }); - - // Fill form with valid data - await page.getByLabel("Simulation Name").fill(SAMPLE_NAME); - await page.locator('textarea[name="topology"]').fill(SAMPLE_TOPOLOGY); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - name: SAMPLE_NAME, - topology: JSON.parse(SAMPLE_TOPOLOGY), - }); - - // Verify all form elements are disabled during submission - await expect(page.getByLabel("Simulation Name")).toBeDisabled(); - await expect(page.locator('textarea[name="topology"]')).toBeDisabled(); - await expect( - page.getByRole("button", { name: "Format JSON" }) - ).toBeDisabled(); - await expect( - page.getByRole("button", { name: "Submitting..." }) - ).toBeDisabled(); - }); - - test("displays error toast when workflow is forbidden", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_create_simulation"); - }); - - // Fill form with valid data - const nameInput = page.getByLabel("Simulation Name"); - const topologyInput = page.locator('textarea[name="topology"]'); - await expect(nameInput).toBeVisible({ timeout: TEST_TIMEOUT }); - await nameInput.fill(FORBIDDEN_SITE_ID); - await topologyInput.fill(SAMPLE_TOPOLOGY); - await expect(nameInput).toHaveValue(FORBIDDEN_SITE_ID, { - timeout: TEST_TIMEOUT, - }); - await expect(topologyInput).toHaveValue(SAMPLE_TOPOLOGY, { - timeout: TEST_TIMEOUT, - }); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - name: FORBIDDEN_SITE_ID, - topology: JSON.parse(SAMPLE_TOPOLOGY), - }); - - // NOTE: While not ideal, firefox has a weird bug where the toast notification is not visible unless we force a viewport adjustment. - const errorTitle = page.locator("div.text-sm.font-semibold", { - hasText: "Workflow Failed", - }); - const errorMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Forbidden: You do not have permission to run this workflow", - }); - - await expect(errorTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(errorMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("handles empty JSON string in format function", async ({ page }) => { - // Clear the topology field - await page.locator('textarea[name="topology"]').fill(""); - - // Click format button - await page.getByRole("button", { name: "Format JSON" }).click(); - - // Verify no toast is shown and field remains empty - await expect( - page.getByText("Your JSON has been prettified!") - ).not.toBeVisible(); - await expect(page.locator('textarea[name="topology"]')).toHaveValue(""); - }); -}); diff --git a/ui/tests/e2e/airDeleteSimulationForm.spec.ts b/ui/tests/e2e/airDeleteSimulationForm.spec.ts deleted file mode 100644 index e7fb0c3..0000000 --- a/ui/tests/e2e/airDeleteSimulationForm.spec.ts +++ /dev/null @@ -1,241 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { expect } from "@playwright/test"; -import { test, TEST_TIMEOUT } from "./shared/utils"; -import { FORBIDDEN_SITE_ID } from "@/mocks/data"; - -// Sample data for testing - matching what's available in the mock -const SAMPLE_SIMULATION_ID = "test-simulation-123"; -const SAMPLE_SIMULATION_NAME = "Test Simulation 123"; -const FORBIDDEN_SIMULATION_NAME = "Forbidden Simulation"; - -test.describe("AIR Delete Simulation Workflow Form", () => { - test.beforeEach(async ({ page }) => { - await page.goto("/workflows/airdeletesimulationworkflow/form"); - }); - - test("renders form with correct title", async ({ page }) => { - const title = await page.getByRole("heading", { - name: "AIR Delete Simulation", - }); - await expect(title).toBeVisible(); - }); - - test("displays validation error when no simulation is selected", async ({ - page, - }) => { - await page.getByRole("button", { name: "Submit" }).click(); - - await expect(page.getByText("Simulation ID is required")).toBeVisible(); - }); - - test("submits form with valid simulation selection correctly", async ({ - page, - }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_delete"); - }); - - // Wait for simulations to load and select one - await page.getByRole("button", { name: "Select a Simulation..." }).click(); - await page.getByRole("option", { name: SAMPLE_SIMULATION_NAME }).click(); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - simulation_id: SAMPLE_SIMULATION_ID, - }); - - // Wait for navigation to confirm submission completed - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("loads form data from URL parameters", async ({ page }) => { - // Navigate with URL parameters - await page.goto( - `/workflows/airdeletesimulationworkflow/form?simulation_id=${SAMPLE_SIMULATION_ID}` - ); - - // Wait for the form to load and check that the correct simulation is selected - await expect(page.getByText(SAMPLE_SIMULATION_NAME)).toBeVisible(); - }); - - test("disables form during submission", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_delete"); - }); - - // Wait for simulations to load and select one - await page.getByRole("button", { name: "Select a Simulation..." }).click(); - await page.getByRole("option", { name: SAMPLE_SIMULATION_NAME }).click(); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - simulation_id: SAMPLE_SIMULATION_ID, - }); - - // Verify submit button shows submitting state - await expect( - page.getByRole("button", { name: "Submitting..." }) - ).toBeDisabled(); - }); - - test("displays error toast when workflow is forbidden", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_delete"); - }); - - // Wait for simulations to load and select the forbidden one - await page.getByRole("button", { name: "Select a Simulation..." }).click(); - await page.getByRole("option", { name: FORBIDDEN_SIMULATION_NAME }).click(); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - simulation_id: FORBIDDEN_SITE_ID, - }); - - // NOTE: While not ideal, firefox has a weird bug where the toast notification is not visible unless we force a viewport adjustment. - const errorTitle = page.locator("div.text-sm.font-semibold", { - hasText: "Workflow Failed", - }); - const errorMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Forbidden: You do not have permission to run this workflow", - }); - - await expect(errorTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(errorMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("submits form with URL parameters without changes", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_delete"); - }); - - // Navigate with URL parameters - await page.goto( - `/workflows/airdeletesimulationworkflow/form?simulation_id=${SAMPLE_SIMULATION_ID}` - ); - - // Wait for the correct simulation to be pre-selected - await expect(page.getByText(SAMPLE_SIMULATION_NAME)).toBeVisible(); - - // Submit the form directly without making any changes - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data contains the URL parameter values - expect(requestData).toEqual({ - simulation_id: SAMPLE_SIMULATION_ID, - }); - - // Wait for navigation to confirm submission completed - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("loads from URL parameters then do manual changes before submission", async ({ - page, - }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_delete"); - }); - - // Navigate with initial URL parameters - await page.goto( - `/workflows/airdeletesimulationworkflow/form?simulation_id=${SAMPLE_SIMULATION_ID}` - ); - - // Wait for the initial selection to load - await expect(page.getByText(SAMPLE_SIMULATION_NAME)).toBeVisible(); - - // Change the simulation selection - await page.getByRole("button", { name: SAMPLE_SIMULATION_NAME }).click(); - await page - .getByRole("option", { name: "SITEA Validation" }) - .click(); - - // Submit the form with the modified values - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data matches the manually changed values - expect(requestData).toEqual({ - simulation_id: "4dce8367-aaea-4965-924e-34647be0a630", - }); - - // Wait for navigation to confirm submission completed - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("can search and filter simulations in dropdown", async ({ page }) => { - // Open the dropdown - await page.getByRole("button", { name: "Select a Simulation..." }).click(); - - // Type in the search box to filter - await page.getByPlaceholder("Search Simulation").fill("SITEA"); - - // Verify that only matching items are shown - await expect( - page.getByRole("option", { name: "SITEA Validation" }) - ).toBeVisible(); - await expect( - page.getByRole("option", { name: SAMPLE_SIMULATION_NAME }) - ).not.toBeVisible(); - - // Clear search and verify all options are back - await page.getByPlaceholder("Search Simulation").fill(""); - await expect( - page.getByRole("option", { name: SAMPLE_SIMULATION_NAME }) - ).toBeVisible(); - await expect( - page.getByRole("option", { name: "SITEA Validation" }) - ).toBeVisible(); - }); -}); diff --git a/ui/tests/e2e/airValidateSiteForm.spec.ts b/ui/tests/e2e/airValidateSiteForm.spec.ts deleted file mode 100644 index 03bb1bd..0000000 --- a/ui/tests/e2e/airValidateSiteForm.spec.ts +++ /dev/null @@ -1,310 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import { expect } from "@playwright/test"; -import { SITES_LIST, FORBIDDEN_SITE_ID } from "@/mocks/data"; -import { test, TEST_TIMEOUT } from "./shared/utils"; - -test.describe("AIR Validate Site Workflow Form", () => { - test.beforeEach(async ({ page }) => { - await page.goto("/workflows/airvalidatesiteworkflow/form"); - }); - - test("renders form with correct title", async ({ page }) => { - const title = await page.getByRole("heading", { - name: "AIR Validate Site", - }); - await expect(title).toBeVisible(); - }); - - test("displays validation error when no site is selected", async ({ - page, - }) => { - await page.getByRole("button", { name: "Submit" }).click(); - - await expect(page.getByText("Site is required")).toBeVisible(); - }); - - test("submits form with correct data to API", async ({ page }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_validate_site"); - }); - - const site = SITES_LIST.pdx01; - - // Fill form with site - await page.getByRole("button", { name: "Site" }).click(); - await page.getByRole("dialog").getByText(site).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data - expect(requestData).toEqual({ - site_name: site, - }); - - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("loads site from URL parameters", async ({ page }) => { - const site = SITES_LIST.pdx01; - - // Navigate with site URL parameter - await page.goto(`/workflows/airvalidatesiteworkflow/form?site=${site}`); - - // Verify the form is pre-populated with URL parameter value - await expect(page.getByRole("button").getByText(site)).toBeVisible({ - timeout: TEST_TIMEOUT, - }); - }); - - test("submits form directly from URL parameters without changes", async ({ - page, - }) => { - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_validate_site"); - }); - - const site = SITES_LIST.pdx01; - - // Navigate with site URL parameter - await page.goto(`/workflows/airvalidatesiteworkflow/form?site=${site}`); - - // Verify the form is pre-populated - await expect(page.getByRole("button").getByText(site)).toBeVisible({ - timeout: TEST_TIMEOUT, - }); - - // Submit without making any changes - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data contains the URL parameter value - expect(requestData).toEqual({ - site_name: site, - }); - - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("loads form with URL parameters and performs manual changes", async ({ - page, - }) => { - const initialSite = SITES_LIST.pdx01; - const newSite = SITES_LIST.rno1; - - // Navigate with initial site URL parameter - await page.goto( - `/workflows/airvalidatesiteworkflow/form?site=${initialSite}` - ); - - // Verify the form is pre-populated with URL parameter value - await expect(page.getByRole("button").getByText(initialSite)).toBeVisible({ - timeout: TEST_TIMEOUT, - }); - - // Change the site manually - await page - .getByRole("button") - .getByText(initialSite, { exact: true }) - .click(); - await page.getByRole("dialog").getByText(newSite).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - // Verify the form is updated with the new value - await expect( - page.getByRole("button").getByText(newSite, { exact: true }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - - // Set up a listener for the request - const requestPromise = page.waitForRequest((request) => { - return request.url().includes("/v1/workflow/ngc/air_validate_site"); - }); - - // Submit the form with the manually changed value - await page.getByRole("button", { name: "Submit" }).click(); - - // Get the request before navigation completes - const request = await requestPromise; - const requestData = JSON.parse((await request.postData()) || "{}"); - - // Verify the request data contains the manually changed value, not the URL parameter value - expect(requestData).toEqual({ - site_name: newSite, - }); - - await expect( - page.getByRole("heading", { name: "Workflow Details" }) - ).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("clears site field when invalid site is provided in URL params", async ({ - page, - }) => { - // Navigate with an invalid site parameter - const invalidSite = "nonexistent-site"; - await page.goto( - `/workflows/airvalidatesiteworkflow/form?site=${invalidSite}` - ); - - // Allow time for validation logic to run - await page.waitForTimeout(500); - - // Check that site field is empty (cleared) - await expect( - page.getByRole("button", { name: "Select a Site" }) - ).toBeVisible(); - - // Verify site dropdown works properly after clearing invalid value - await page.getByRole("button", { name: "Site" }).click(); - await expect(page.getByRole("dialog")).toBeVisible(); - await page.getByRole("dialog").getByText(SITES_LIST.pdx01).click(); - - // Verify the site was selected correctly - await expect( - page.getByRole("button", { name: SITES_LIST.pdx01 }) - ).toBeVisible(); - }); - - test("shows error for forbidden site", async ({ page }) => { - // Fill form with forbidden site - await page.getByRole("button", { name: "Site" }).click(); - await page.getByRole("dialog").getByText(FORBIDDEN_SITE_ID).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - // Submit the form - await page.getByRole("button", { name: "Submit" }).click(); - - // Wait for and verify the error toast appears - const errorTitle = page.locator("div.text-sm.font-semibold", { - hasText: "Workflow Failed", - }); - const errorMessage = page.locator("div.text-sm.opacity-90", { - hasText: "Forbidden: You do not have permission to run this workflow", - }); - - await expect(errorTitle).toBeVisible({ timeout: TEST_TIMEOUT }); - await expect(errorMessage).toBeVisible({ timeout: TEST_TIMEOUT }); - }); - - test("form disables during submission", async ({ page }) => { - const site = SITES_LIST.pdx01; - - // Fill the form - await page.getByRole("button", { name: "Site" }).click(); - await page.getByRole("dialog").getByText(site).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - // Submit the form - await page.getByRole("button", { name: "Submit" }).click(); - - // Verify submit button shows "Submitting..." and is disabled - await expect( - page.getByRole("button", { name: "Submitting..." }) - ).toBeDisabled(); - - // Verify site field is also disabled during submission - await expect( - page.getByRole("button", { name: site, exact: true }) - ).toBeDisabled(); - }); - - test("can clear site selection and reselect", async ({ page }) => { - const firstSite = SITES_LIST.pdx01; - const secondSite = SITES_LIST.rno1; - - // Select first site - await page.getByRole("button", { name: "Site" }).click(); - await page.getByRole("dialog").getByText(firstSite).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - // Verify first site is selected - await expect(page.getByRole("button", { name: firstSite })).toBeVisible(); - await expect(page.getByRole("button", { name: "Submit" })).toBeEnabled(); - - // Clear the selection by clicking the X button - await page - .locator(".flex.items-center.self-stretch") - .filter({ has: page.locator("svg.lucide.lucide-x.size-4") }) - .first() - .click(); - - // Verify site field is cleared - await expect( - page.getByRole("button", { name: "Select a Site" }) - ).toBeVisible(); - - // Select second site - await page.getByRole("button", { name: "Site" }).click(); - await page.getByRole("dialog").getByText(secondSite).click(); - // Click outside to close dropdown - await page.getByRole("heading", { name: "AIR Validate Site" }).click(); - - // Verify second site is selected - await expect(page.getByRole("button", { name: secondSite })).toBeVisible(); - }); - - test("keyboard navigation works in site dropdown", async ({ page }) => { - // Open dropdown with keyboard - await page.getByRole("button", { name: "Site" }).focus(); - await page.keyboard.press("Enter"); - - // Verify dropdown is open - await expect(page.getByRole("dialog")).toBeVisible(); - - // Navigate with arrow keys and select with Enter - await page.keyboard.press("ArrowDown"); - await page.keyboard.press("Enter"); - - // Verify a site was selected (first option) - await expect(page.getByRole("button", { name: "Submit" })).toBeEnabled(); - }); - - test("displays loading state when sites are loading", async ({ page }) => { - // This test verifies the loading state is handled properly - // The loading state should be brief but visible during initial load - await page.goto("/workflows/airvalidatesiteworkflow/form"); - - // Check that the form renders without errors during loading - await expect( - page.getByRole("heading", { name: "AIR Validate Site" }) - ).toBeVisible(); - - // Eventually the site dropdown should be available - await expect( - page.getByRole("button", { name: /Site|Select a Site/ }) - ).toBeVisible({ timeout: 10000 }); - }); -}); diff --git a/ui/tests/e2e/shared/apiMocks.ts b/ui/tests/e2e/shared/apiMocks.ts index 0769fcf..7908686 100644 --- a/ui/tests/e2e/shared/apiMocks.ts +++ b/ui/tests/e2e/shared/apiMocks.ts @@ -75,10 +75,7 @@ export async function setupApiMocks(page: Page) { await mockIbOsUpgradeEndpoint(page); await mockInfinibandCableValidationEndpoint(page); await mockReprovisionEndpoint(page); - await mockAirValidateSiteEndpoint(page); await mockSwitchOsUpgradeEndpoint(page); - await mockAirCreateSimulationEndpoint(page); - await mockAirDeleteSimulationEndpoint(page); await mockCumulusHardwareValidationEndpoint(page); await mockMultiDeployEndpoint(page); @@ -90,7 +87,6 @@ export async function setupApiMocks(page: Page) { await mockDeviceTypesEndpoint(page); await mockDevicesEndpoint(page); await mockPasswordUsersEndpoint(page); - await mockSimulationsEndpoint(page); // Workflow listing endpoints await mockWorkflowTypesEndpoint(page); @@ -753,42 +749,6 @@ export async function mockReprovisionEndpoint(page: Page) { }); } -export async function mockAirValidateSiteEndpoint(page: Page) { - await page.route(`**/v1/workflow/ngc/air_validate_site`, async (route) => { - const request = route.request(); - const body = JSON.parse((await request.postData()) || "{}"); - - if (body.site_name === FORBIDDEN_SITE_ID) { - await route.fulfill({ - status: 403, - json: { - error: "Forbidden: You do not have permission to run this workflow", - }, - }); - return; - } - - if (!body.site_name) { - await route.fulfill({ - status: 400, - json: { error: "Missing required fields" }, - }); - return; - } - - await delay(100); - - await route.fulfill({ - status: 201, - json: { - id: body.site_name, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.site_name}`, - submitted_data: body, - }, - }); - }); -} - export async function mockSwitchOsUpgradeEndpoint(page: Page) { await page.route(`**/v1/workflow/ngc/switch_os_upgrade`, async (route) => { const request = route.request(); @@ -825,81 +785,6 @@ export async function mockSwitchOsUpgradeEndpoint(page: Page) { }); } -export async function mockAirCreateSimulationEndpoint(page: Page) { - await page.route( - `**/v1/workflow/ngc/air_create_simulation`, - async (route) => { - const request = route.request(); - const body = JSON.parse((await request.postData()) || "{}"); - - if (body.name === FORBIDDEN_SITE_ID) { - await route.fulfill({ - status: 403, - json: { - error: "Forbidden: You do not have permission to run this workflow", - }, - }); - return; - } - - if (!body.name || !body.topology) { - await route.fulfill({ - status: 400, - json: { error: "Missing required fields" }, - }); - return; - } - - await delay(100); - - await route.fulfill({ - status: 201, - json: { - id: body.name, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.name}`, - submitted_data: body, - }, - }); - } - ); -} - -export async function mockAirDeleteSimulationEndpoint(page: Page) { - await page.route(`**/v1/workflow/ngc/air_delete`, async (route) => { - const request = route.request(); - const body = JSON.parse((await request.postData()) || "{}"); - - if (body.simulation_id === FORBIDDEN_SITE_ID) { - await route.fulfill({ - status: 403, - json: { - error: "Forbidden: You do not have permission to run this workflow", - }, - }); - return; - } - - if (!body.simulation_id) { - await route.fulfill({ - status: 400, - json: { error: "Missing required fields" }, - }); - return; - } - - await delay(100); - - await route.fulfill({ - status: 201, - json: { - id: body.simulation_id, - href: `https://url-to-temporal.com/namespaces/default/workflows/${body.simulation_id}`, - submitted_data: body, - }, - }); - }); -} - // Data fetching endpoints export async function mockSitesEndpoint(page: Page) { await page.route(`**/v1/parameter/location*`, async (route) => { @@ -1022,37 +907,6 @@ export async function mockPasswordUsersEndpoint(page: Page) { }); } -export async function mockSimulationsEndpoint(page: Page) { - await page.route(`**/v1/parameter/simulations`, async (route) => { - const simulationsData = [ - { - id: "4dce8367-aaea-4965-924e-34647be0a630", - name: "SITEA Validation", - state: "LOADING", - }, - { - id: "1a048166-42f0-4da9-82e9-761a10ddb0e3", - name: "test", - state: "LOADED", - }, - { - id: "test-simulation-123", - name: "Test Simulation 123", - state: "LOADED", - }, - { - id: FORBIDDEN_SITE_ID, - name: "Forbidden Simulation", - state: "LOADED", - }, - ]; - - await route.fulfill({ - json: simulationsData, - }); - }); -} - // Workflow listing endpoints export async function mockWorkflowTypesEndpoint(page: Page) { const workflowTypes = [ @@ -1075,9 +929,6 @@ export async function mockWorkflowTypesEndpoint(page: Page) { "ReprovisionWorkflow", "SwitchOsUpgradeWorkflow", "CumulusHardwareValidationWorkflow", - "AIRCreateSimulationWorkflow", - "AIRValidateSiteWorkflow", - "AIRDeleteSimulationWorkflow", ]; await page.route(`**/v1/workflow/types`, async (route) => { diff --git a/uv.lock b/uv.lock index ad9a715..db89e8d 100644 --- a/uv.lock +++ b/uv.lock @@ -213,19 +213,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" }, ] -[[package]] -name = "air-sdk" -version = "2.21.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "python-dateutil" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b2/7b/82a87a17d158109efc98cd69c379045d83818a304c1ec7c9e93078e9dd2a/air_sdk-2.21.1.tar.gz", hash = "sha256:6394b89da7c415e90e9c15ed01c467e1ad1382d69b592b26b21e8fc16009ce73", size = 59202, upload-time = "2025-07-08T15:27:49.592Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/b6/644c275126c49b855f47c6593aa982d2b1a272f2e701d0406273a96f31ed/air_sdk-2.21.1-py3-none-any.whl", hash = "sha256:5caffcc6b00f4e9136b8ce2d3aba013cd82366ab9664634e0d672d2073e63aca", size = 101629, upload-time = "2025-07-08T15:27:48.569Z" }, -] - [[package]] name = "alembic" version = "1.18.4" @@ -2051,7 +2038,6 @@ dependencies = [ { name = "aioboto3" }, { name = "aiohttp", extra = ["speedups"] }, { name = "aiohttp-retry" }, - { name = "air-sdk" }, { name = "alembic" }, { name = "asyncpg" }, { name = "boto3" }, @@ -2156,7 +2142,6 @@ requires-dist = [ { name = "aioboto3", specifier = ">=13.1.1" }, { name = "aiohttp", extras = ["speedups"], specifier = ">=3.13.4" }, { name = "aiohttp-retry", specifier = ">=2.9.1" }, - { name = "air-sdk", specifier = ">=2.16.0" }, { name = "alembic", specifier = ">=1.13.1" }, { name = "asyncpg", specifier = ">=0.29.0" }, { name = "boto3", specifier = ">=1.35.42" },