Skip to content

Commit b5e37b0

Browse files
feat: enable site clusters to run Nautobot Celery workers locally
Nautobot currently runs entirely on the global cluster, including its Celery workers. Sites that generate heavy background task load have no way to offload that processing closer to where the work originates, and a single global worker pool becomes a bottleneck as sites scale. This adds a site-scoped ArgoCD Application that deploys only the Celery worker portion of the Nautobot helm chart. The web server, Redis, and PostgreSQL are all disabled because they remain on the global cluster — site workers connect back to those shared services. This lets operators scale worker capacity per-site independently, run queue-specific workers closer to the hardware they manage, and reduce cross-cluster task latency for site-driven automation.
1 parent 912ab35 commit b5e37b0

11 files changed

Lines changed: 429 additions & 0 deletions

File tree

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{{- if eq (include "understack.isEnabled" (list $.Values.site "nautobot_worker")) "true" }}
2+
---
3+
apiVersion: argoproj.io/v1alpha1
4+
kind: Application
5+
metadata:
6+
name: {{ printf "%s-%s" $.Release.Name "nautobot-worker" }}
7+
finalizers:
8+
- resources-finalizer.argocd.argoproj.io
9+
annotations:
10+
argocd.argoproj.io/compare-options: ServerSideDiff=true,IncludeMutationWebhook=true
11+
spec:
12+
destination:
13+
namespace: nautobot
14+
server: {{ $.Values.cluster_server }}
15+
project: understack
16+
sources:
17+
- chart: nautobot
18+
helm:
19+
fileParameters:
20+
- name: nautobot.config
21+
path: $understack/components/nautobot/nautobot_config.py
22+
ignoreMissingValueFiles: true
23+
releaseName: nautobot-worker
24+
valueFiles:
25+
- $understack/components/nautobot-worker/values.yaml
26+
- $deploy/{{ include "understack.deploy_path" $ }}/nautobot-worker/values.yaml
27+
repoURL: https://nautobot.github.io/helm-charts/
28+
targetRevision: 2.5.6
29+
- path: components/nautobot-worker
30+
ref: understack
31+
repoURL: {{ include "understack.understack_url" $ }}
32+
targetRevision: {{ include "understack.understack_ref" $ }}
33+
- path: {{ include "understack.deploy_path" $ }}/nautobot-worker
34+
ref: deploy
35+
repoURL: {{ include "understack.deploy_url" $ }}
36+
targetRevision: {{ include "understack.deploy_ref" $ }}
37+
syncPolicy:
38+
automated:
39+
prune: true
40+
selfHeal: true
41+
managedNamespaceMetadata:
42+
annotations:
43+
argocd.argoproj.io/sync-options: Delete=false
44+
syncOptions:
45+
- CreateNamespace=true
46+
- ServerSideApply=true
47+
- RespectIgnoreDifferences=true
48+
- ApplyOutOfSyncOnly=true
49+
{{- end }}

charts/argocd-understack/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,12 @@ site:
556556
# @default -- false
557557
enabled: false
558558

559+
# -- Nautobot Celery workers (site-level, connects to global Nautobot)
560+
nautobot_worker:
561+
# -- Enable/disable deploying Nautobot workers at the site level
562+
# @default -- false
563+
enabled: false
564+
559565
# -- SNMP exporter for network device monitoring
560566
snmp_exporter:
561567
# -- Enable/disable deploying SNMP exporter

components/envoy-configs/templates/gw-external.yaml.tpl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,39 @@ spec:
5252
from: {{ .from | default "All" }}
5353
{{- end }}
5454
{{- end }}
55+
{{- range .Values.routes.tcp }}
56+
- name: {{ .listenerName }}
57+
port: {{ .gatewayPort }}
58+
protocol: TCP
59+
allowedRoutes:
60+
namespaces:
61+
{{- if .selector }}
62+
from: Selector
63+
selector:
64+
{{- .selector | toYaml | nindent 12 }}
65+
{{- else }}
66+
from: {{ .from | default "All" }}
67+
{{- end }}
68+
{{- end }}
69+
{{- range .Values.routes.tlsTerminatedTcp }}
70+
- name: {{ .listenerName }}
71+
port: {{ .gatewayPort }}
72+
protocol: TLS
73+
hostname: {{ .fqdn | quote }}
74+
tls:
75+
mode: Terminate
76+
certificateRefs:
77+
- name: {{ .listenerName }}-tls
78+
allowedRoutes:
79+
namespaces:
80+
{{- if .selector }}
81+
from: Selector
82+
selector:
83+
{{- .selector | toYaml | nindent 12 }}
84+
{{- else }}
85+
from: {{ .from | default "All" }}
86+
{{- end }}
87+
{{- end }}
5588
{{- if .Values.gateways.external.serviceAnnotations }}
5689
infrastructure:
5790
parametersRef:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{{- range .Values.routes.tcp }}
2+
---
3+
apiVersion: gateway.networking.k8s.io/v1alpha2
4+
kind: TCPRoute
5+
metadata:
6+
{{- if .name }}
7+
name: {{ .name }}
8+
{{- else }}
9+
name: {{ .service.name }}
10+
{{- end }}
11+
namespace: {{ .namespace | default "envoy-gateway" }}
12+
labels:
13+
{{- include "envoy-configs.labels" $ | nindent 4 }}
14+
spec:
15+
parentRefs:
16+
- name: {{ $.Values.gateways.external.name }}
17+
namespace: {{ $.Values.gateways.external.namespace }}
18+
sectionName: {{ .listenerName }}
19+
rules:
20+
- backendRefs:
21+
- name: {{ .service.name }}
22+
{{- with .namespace }}
23+
namespace: {{ . }}
24+
{{- end }}
25+
port: {{ .service.port }}
26+
{{- end }}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{{- range .Values.routes.tlsTerminatedTcp }}
2+
---
3+
apiVersion: gateway.networking.k8s.io/v1alpha2
4+
kind: TCPRoute
5+
metadata:
6+
{{- if .name }}
7+
name: {{ .name }}
8+
{{- else }}
9+
name: {{ .service.name }}
10+
{{- end }}
11+
namespace: {{ .namespace | default "envoy-gateway" }}
12+
labels:
13+
{{- include "envoy-configs.labels" $ | nindent 4 }}
14+
spec:
15+
parentRefs:
16+
- name: {{ $.Values.gateways.external.name }}
17+
namespace: {{ $.Values.gateways.external.namespace }}
18+
sectionName: {{ .listenerName }}
19+
rules:
20+
- backendRefs:
21+
- name: {{ .service.name }}
22+
{{- with .namespace }}
23+
namespace: {{ . }}
24+
{{- end }}
25+
port: {{ .service.port }}
26+
{{- end }}

components/envoy-configs/values.schema.json

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,151 @@
224224
],
225225
"additionalProperties": false
226226
}
227+
},
228+
"tcp": {
229+
"type": "array",
230+
"description": "TCP routes for non-HTTP services (e.g., PostgreSQL, Redis)",
231+
"items": {
232+
"type": "object",
233+
"properties": {
234+
"name": {
235+
"type": "string",
236+
"description": "Name identifier for the TCPRoute resource"
237+
},
238+
"listenerName": {
239+
"type": "string",
240+
"description": "Name of the TCP listener on the gateway (must match)"
241+
},
242+
"gatewayPort": {
243+
"type": "integer",
244+
"minimum": 1,
245+
"maximum": 65535,
246+
"description": "Port exposed on the gateway for this TCP route"
247+
},
248+
"namespace": {
249+
"type": "string",
250+
"description": "Namespace of the backend service"
251+
},
252+
"service": {
253+
"type": "object",
254+
"description": "Kubernetes service backend configuration",
255+
"properties": {
256+
"name": {
257+
"type": "string",
258+
"description": "Name of the Kubernetes service"
259+
},
260+
"port": {
261+
"type": "integer",
262+
"minimum": 1,
263+
"maximum": 65535,
264+
"description": "Port of the backend service"
265+
}
266+
},
267+
"required": [
268+
"name",
269+
"port"
270+
],
271+
"additionalProperties": false
272+
},
273+
"selector": {
274+
"type": "object",
275+
"description": "Kubernetes-style label selector (key-value pairs)",
276+
"additionalProperties": {
277+
"type": "string"
278+
}
279+
},
280+
"from": {
281+
"type": "string",
282+
"enum": [
283+
"Same",
284+
"All",
285+
"Selector"
286+
],
287+
"description": "Specifies where traffic can originate from"
288+
}
289+
},
290+
"required": [
291+
"listenerName",
292+
"gatewayPort",
293+
"service"
294+
],
295+
"additionalProperties": false
296+
}
297+
},
298+
"tlsTerminatedTcp": {
299+
"type": "array",
300+
"description": "TLS-terminated TCP routes. Envoy terminates TLS from the client and forwards plaintext to the backend. Used for encrypting cross-cluster TCP traffic (e.g., Redis) without requiring server-side TLS.",
301+
"items": {
302+
"type": "object",
303+
"properties": {
304+
"name": {
305+
"type": "string",
306+
"description": "Name identifier for the TCPRoute resource"
307+
},
308+
"listenerName": {
309+
"type": "string",
310+
"description": "Name of the TLS listener on the gateway (must match)"
311+
},
312+
"fqdn": {
313+
"type": "string",
314+
"description": "FQDN for the TLS listener (used for SNI and cert generation)"
315+
},
316+
"gatewayPort": {
317+
"type": "integer",
318+
"minimum": 1,
319+
"maximum": 65535,
320+
"description": "Port exposed on the gateway for this route"
321+
},
322+
"namespace": {
323+
"type": "string",
324+
"description": "Namespace of the backend service"
325+
},
326+
"service": {
327+
"type": "object",
328+
"description": "Kubernetes service backend configuration",
329+
"properties": {
330+
"name": {
331+
"type": "string",
332+
"description": "Name of the Kubernetes service"
333+
},
334+
"port": {
335+
"type": "integer",
336+
"minimum": 1,
337+
"maximum": 65535,
338+
"description": "Port of the backend service"
339+
}
340+
},
341+
"required": [
342+
"name",
343+
"port"
344+
],
345+
"additionalProperties": false
346+
},
347+
"selector": {
348+
"type": "object",
349+
"description": "Kubernetes-style label selector (key-value pairs)",
350+
"additionalProperties": {
351+
"type": "string"
352+
}
353+
},
354+
"from": {
355+
"type": "string",
356+
"enum": [
357+
"Same",
358+
"All",
359+
"Selector"
360+
],
361+
"description": "Specifies where traffic can originate from"
362+
}
363+
},
364+
"required": [
365+
"listenerName",
366+
"fqdn",
367+
"gatewayPort",
368+
"service"
369+
],
370+
"additionalProperties": false
371+
}
227372
}
228373
}
229374
}

components/envoy-configs/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ gateways: {}
22
routes:
33
http: []
44
tls: []
5+
tcp: []
6+
tlsTerminatedTcp: []
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
5+
resources: []
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Nautobot Worker (site-level)
2+
#
3+
# Deploys only Celery workers that connect back to the global Nautobot
4+
# database and Redis. The web server is disabled because it lives on
5+
# the global cluster. Redis and PostgreSQL are disabled because the
6+
# workers reach the global instances over the network.
7+
#
8+
# The deploy repo for each site MUST provide:
9+
# - ExternalSecrets for nautobot-django, nautobot-redis, nautobot-db,
10+
# nautobot-custom-env, and dockerconfigjson-github-com
11+
# - values.yaml overrides for nautobot.db.host and nautobot.redis.host
12+
# pointing to the global cluster endpoints
13+
---
14+
15+
# Disable the Nautobot web server — workers only
16+
nautobot:
17+
enabled: false
18+
replicaCount: 0
19+
20+
db:
21+
engine: "django.db.backends.postgresql"
22+
# Override in deploy repo values to point at the global CNPG service
23+
host: ""
24+
port: 5432
25+
name: "app"
26+
user: "app"
27+
existingSecret: "nautobot-db"
28+
existingSecretPasswordKey: "password"
29+
30+
django:
31+
existingSecret: nautobot-django
32+
33+
superUser:
34+
enabled: false
35+
36+
redis:
37+
# Override in deploy repo values to point at the global Redis service.
38+
# ssl: true because Envoy Gateway terminates TLS on the Redis port —
39+
# the worker connects via rediss:// and Envoy forwards plaintext
40+
# to Redis locally on the global cluster.
41+
host: ""
42+
port: 6379
43+
ssl: true
44+
username: ""
45+
46+
celery:
47+
enabled: true
48+
concurrency: 2
49+
replicaCount: 1
50+
extraEnvVarsSecret:
51+
- nautobot-django
52+
livenessProbe:
53+
initialDelaySeconds: 60
54+
periodSeconds: 120
55+
timeoutSeconds: 60
56+
readinessProbe:
57+
initialDelaySeconds: 60
58+
periodSeconds: 120
59+
timeoutSeconds: 60
60+
61+
# Disable celery beat — scheduling runs on the global cluster only
62+
workers:
63+
beat:
64+
enabled: false
65+
66+
# Do not deploy local Redis — use the global instance
67+
redis:
68+
enabled: false
69+
70+
# Do not deploy local PostgreSQL — use the global CNPG instance
71+
postgresql:
72+
enabled: false
73+
74+
ingress:
75+
enabled: false
76+
77+
metrics:
78+
enabled: false

0 commit comments

Comments
 (0)