Skip to content

Commit b3c0d4b

Browse files
feat: enable site clusters to run Nautobot Celery workers with mTLS
Sites need to run background task processing locally to reduce cross-cluster latency and scale worker capacity independently. Workers connect back to the global PostgreSQL and Redis, so cross-cluster connections require stronger auth than passwords alone. Adds a site-scoped ArgoCD Application that deploys only the Celery worker portion of the Nautobot Helm chart. The web server, Redis, and PostgreSQL remain on the global cluster. All cross-cluster connections use end-to-end mTLS: - nautobot_config.py gains conditional SSL/mTLS logic for both PostgreSQL (NAUTOBOT_DB_SSLMODE) and Redis (auto-detected from mounted CA cert) - nautobot-worker component values disable everything except celery - envoy-configs gateway template supports gatewayPort on TLS passthrough listeners for non-443 ports (5432, 6379) - envoy-configs schema adds gatewayPort to the tls route type - Deploy guide documents the full architecture, step-by-step site onboarding, certificate infrastructure, and troubleshooting
1 parent a946f2c commit b3c0d4b

8 files changed

Lines changed: 729 additions & 3 deletions

File tree

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{{- if eq (include "understack.isEnabled" (list $.Values.site "nautobot_worker")) "true" }}
2+
---
3+
apiVersion: argoproj.io/v1alpha1
4+
kind: Application
5+
metadata:
6+
name: {{ printf "%s-%s" $.Release.Name "nautobot-worker" }}
7+
finalizers:
8+
- resources-finalizer.argocd.argoproj.io
9+
annotations:
10+
argocd.argoproj.io/compare-options: ServerSideDiff=true,IncludeMutationWebhook=true
11+
spec:
12+
destination:
13+
namespace: nautobot
14+
server: {{ $.Values.cluster_server }}
15+
project: understack
16+
sources:
17+
- chart: nautobot
18+
helm:
19+
fileParameters:
20+
- name: nautobot.config
21+
path: $understack/components/nautobot/nautobot_config.py
22+
ignoreMissingValueFiles: true
23+
releaseName: nautobot-worker
24+
valueFiles:
25+
- $understack/components/nautobot-worker/values.yaml
26+
- $deploy/{{ include "understack.deploy_path" $ }}/nautobot-worker/values.yaml
27+
repoURL: https://nautobot.github.io/helm-charts/
28+
targetRevision: 2.5.6
29+
- path: components/nautobot-worker
30+
ref: understack
31+
repoURL: {{ include "understack.understack_url" $ }}
32+
targetRevision: {{ include "understack.understack_ref" $ }}
33+
- path: {{ include "understack.deploy_path" $ }}/nautobot-worker
34+
ref: deploy
35+
repoURL: {{ include "understack.deploy_url" $ }}
36+
targetRevision: {{ include "understack.deploy_ref" $ }}
37+
syncPolicy:
38+
automated:
39+
prune: true
40+
selfHeal: true
41+
managedNamespaceMetadata:
42+
annotations:
43+
argocd.argoproj.io/sync-options: Delete=false
44+
syncOptions:
45+
- CreateNamespace=true
46+
- ServerSideApply=true
47+
- RespectIgnoreDifferences=true
48+
- ApplyOutOfSyncOnly=true
49+
{{- end }}

charts/argocd-understack/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,12 @@ site:
556556
# @default -- false
557557
enabled: false
558558

559+
# -- Nautobot Celery workers (site-level, connects to global Nautobot)
560+
nautobot_worker:
561+
# -- Enable/disable deploying Nautobot workers at the site level
562+
# @default -- false
563+
enabled: false
564+
559565
# -- SNMP exporter for network device monitoring
560566
snmp_exporter:
561567
# -- Enable/disable deploying SNMP exporter

components/envoy-configs/templates/gw-external.yaml.tpl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,11 @@ spec:
3535
{{- range .Values.routes.tls }}
3636
{{- $listenerName := .name | default (index (splitList "." .fqdn) 0) }}
3737
- name: {{ $listenerName }}
38-
port: {{ $.Values.gateways.external.port | default 443 }}
38+
port: {{ .gatewayPort | default ($.Values.gateways.external.port | default 443) }}
3939
protocol: TLS
4040
hostname: {{ .fqdn | quote }}
4141
tls:
4242
mode: Passthrough
43-
certificateRefs:
44-
- name: {{ $listenerName }}-tls
4543
allowedRoutes:
4644
namespaces:
4745
{{- if .selector }}
@@ -52,6 +50,7 @@ spec:
5250
from: {{ .from | default "All" }}
5351
{{- end }}
5452
{{- end }}
53+
5554
{{- if .Values.gateways.external.serviceAnnotations }}
5655
infrastructure:
5756
parametersRef:

components/envoy-configs/values.schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,12 @@
180180
"type": "string",
181181
"description": "Namespace where the httproute will be installed (same as backend service)"
182182
},
183+
"gatewayPort": {
184+
"type": "integer",
185+
"minimum": 1,
186+
"maximum": 65535,
187+
"description": "Port exposed on the gateway for this TLS passthrough listener. Defaults to the external gateway port (443) if not specified."
188+
},
183189
"service": {
184190
"type": "object",
185191
"description": "Kubernetes service backend configuration for the route",
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Nautobot Worker (site-level)
2+
#
3+
# Deploys only Celery workers that connect back to the global Nautobot
4+
# database and Redis. The web server is disabled because it lives on
5+
# the global cluster. Redis and PostgreSQL are disabled because the
6+
# workers reach the global instances over the network.
7+
---
8+
9+
# Disable the Nautobot web server — workers only
10+
nautobot:
11+
enabled: false
12+
replicaCount: 0
13+
14+
db:
15+
engine: "django.db.backends.postgresql"
16+
# Override in deploy repo values to point at the global CNPG service
17+
host: ""
18+
port: 5432
19+
name: "app"
20+
user: "app"
21+
existingSecret: "nautobot-db"
22+
existingSecretPasswordKey: "password"
23+
24+
django:
25+
existingSecret: nautobot-django
26+
27+
superUser:
28+
enabled: false
29+
30+
redis:
31+
# Override in deploy repo values to point at the global Redis service
32+
host: ""
33+
port: 6379
34+
ssl: false
35+
username: ""
36+
37+
celery:
38+
enabled: true
39+
concurrency: 2
40+
replicaCount: 1
41+
extraEnvVarsSecret:
42+
- nautobot-django
43+
livenessProbe:
44+
initialDelaySeconds: 60
45+
periodSeconds: 120
46+
timeoutSeconds: 60
47+
readinessProbe:
48+
initialDelaySeconds: 60
49+
periodSeconds: 120
50+
timeoutSeconds: 60
51+
52+
# Disable celery beat — scheduling runs on the global cluster only
53+
workers:
54+
beat:
55+
enabled: false
56+
57+
# Do not deploy local Redis — use the global instance
58+
redis:
59+
enabled: false
60+
61+
# Do not deploy local PostgreSQL — use the global CNPG instance
62+
postgresql:
63+
enabled: false
64+
65+
ingress:
66+
enabled: false
67+
68+
metrics:
69+
enabled: false

components/nautobot/nautobot_config.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,55 @@
6464
if DATABASES["default"]["ENGINE"].endswith("mysql"): # noqa F405
6565
DATABASES["default"]["OPTIONS"] = {"charset": "utf8mb4"} # noqa F405
6666

67+
# SSL/mTLS options for PostgreSQL connections.
68+
# When NAUTOBOT_DB_SSLMODE is set to "verify-ca" or "verify-full", the client
69+
# certificate, key, and CA root cert must be present at the configured paths.
70+
_db_sslcert = os.getenv("NAUTOBOT_DB_SSLCERT", "/etc/nautobot/mtls/tls.crt")
71+
_db_sslkey = os.getenv("NAUTOBOT_DB_SSLKEY", "/etc/nautobot/mtls/tls.key")
72+
_db_sslrootcert = os.getenv("NAUTOBOT_DB_SSLROOTCERT", "/etc/nautobot/mtls/ca.crt")
73+
_db_sslmode = os.getenv("NAUTOBOT_DB_SSLMODE", "")
74+
75+
if _db_sslmode in ("verify-ca", "verify-full"):
76+
for _path, _label in [
77+
(_db_sslcert, "NAUTOBOT_DB_SSLCERT"),
78+
(_db_sslkey, "NAUTOBOT_DB_SSLKEY"),
79+
(_db_sslrootcert, "NAUTOBOT_DB_SSLROOTCERT"),
80+
]:
81+
if not os.path.isfile(_path):
82+
raise FileNotFoundError(
83+
f"SSL certificate file required by {_label} not found: {_path}"
84+
)
85+
DATABASES["default"]["OPTIONS"] = { # noqa F405
86+
"sslmode": _db_sslmode,
87+
"sslcert": _db_sslcert,
88+
"sslkey": _db_sslkey,
89+
"sslrootcert": _db_sslrootcert,
90+
}
91+
92+
# SSL/mTLS options for Redis connections.
93+
# When NAUTOBOT_REDIS_SSL env var is "true" (set by Helm `nautobot.redis.ssl`),
94+
# the Helm chart switches the URL scheme to rediss://. We still need to tell
95+
# the Python redis client *which* certs to use for mutual TLS.
96+
import ssl as _ssl # noqa: E402
97+
98+
_redis_ca = os.getenv("NAUTOBOT_REDIS_SSL_CA_CERTS", "/etc/nautobot/mtls/ca.crt")
99+
_redis_cert = os.getenv("NAUTOBOT_REDIS_SSL_CERTFILE", "/etc/nautobot/mtls/tls.crt")
100+
_redis_key = os.getenv("NAUTOBOT_REDIS_SSL_KEYFILE", "/etc/nautobot/mtls/tls.key")
101+
102+
if os.path.isfile(_redis_ca):
103+
_redis_ssl_kwargs = {
104+
"ssl_cert_reqs": _ssl.CERT_REQUIRED,
105+
"ssl_ca_certs": _redis_ca,
106+
"ssl_certfile": _redis_cert,
107+
"ssl_keyfile": _redis_key,
108+
}
109+
CACHES["default"].setdefault("OPTIONS", {}) # noqa F405
110+
CACHES["default"]["OPTIONS"].setdefault("CONNECTION_POOL_KWARGS", {}) # noqa F405
111+
CACHES["default"]["OPTIONS"]["CONNECTION_POOL_KWARGS"].update(_redis_ssl_kwargs) # noqa F405
112+
CELERY_BROKER_USE_SSL = _redis_ssl_kwargs # noqa F405
113+
CELERY_REDIS_BACKEND_USE_SSL = _redis_ssl_kwargs # noqa F405
114+
CELERY_BROKER_TRANSPORT_OPTIONS = {"ssl": _redis_ssl_kwargs} # noqa F405
115+
67116
# This key is used for secure generation of random numbers and strings. It must never be exposed outside of this file.
68117
# For optimal security, SECRET_KEY should be at least 50 characters in length and contain a mix of letters, numbers, and
69118
# symbols. Nautobot will not run without this defined. For more information, see

0 commit comments

Comments
 (0)