agent-governance-toolkit/packages/agent-mesh/charts/agentmesh/values.yaml at main · imran-siddique/agent-governance-toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# Default values for agentmesh
# Production-grade defaults for enterprise deployment.

# -- Global configuration shared across all components
global:
  # -- Kubernetes namespace for deployment (string)
  namespace: agentmesh
  # -- Docker registry secrets for pulling private images (list of secret names)
  imagePullSecrets: []
  # -- Default image tag applied to all components unless overridden (string, semver)
  imageTag: "0.3.0"
  tls:
    # -- Enable TLS for inter-component communication (bool)
    enabled: true
    # -- Name of the Kubernetes Secret containing the TLS cert and key (string)
    certSecretName: agentmesh-tls
  spiffe:
    # -- Enable SPIFFE-based workload identity (bool)
    enabled: false
    # -- SPIFFE trust domain, e.g. "agentmesh.local" (string)
    trustDomain: agentmesh.local
    # -- Path to the SPIRE agent UNIX socket (string, file path)
    socketPath: /run/spire/sockets/agent.sock

# -- Override the chart name in resource names (string)
nameOverride: ""
# -- Fully override the release name in resource names (string)
fullnameOverride: ""

# -- Service account configuration
serviceAccount:
  # -- Create a dedicated ServiceAccount for agentmesh pods (bool)
  create: true
  # -- Annotations to add to the ServiceAccount, e.g. for IAM roles (map)
  annotations: {}
  # -- Explicit ServiceAccount name; auto-generated from release name if empty (string)
  name: ""

# -- Pod-level security context applied to all pods
podSecurityContext:
  # -- Require containers to run as non-root (bool)
  runAsNonRoot: true
  # -- UID for the container process (int)
  runAsUser: 1000
  # -- GID for volume mounts (int)
  fsGroup: 1000
  seccompProfile:
    # -- Seccomp profile type: RuntimeDefault, Localhost, or Unconfined (string)
    type: RuntimeDefault

# -- Container-level security context applied to all containers
securityContext:
  capabilities:
    # -- Linux capabilities to drop (list)
    drop:
      - ALL
  # -- Mount the root filesystem as read-only (bool)
  readOnlyRootFilesystem: true
  # -- Prevent privilege escalation via setuid/setgid (bool)
  allowPrivilegeEscalation: false

# ---------------------------------------------------------------------------
# Trust Engine - validates agent identity and issues trust tokens
# ---------------------------------------------------------------------------
trustEngine:
  # -- Number of Trust Engine replicas (int, >= 1)
  replicas: 2
  image:
    # -- Container image repository (string)
    repository: ghcr.io/microsoft/agentmesh/trust-engine
    # -- Image tag; defaults to global.imageTag when empty (string)
    tag: ""
    # -- Image pull policy: Always, IfNotPresent, or Never (string)
    pullPolicy: IfNotPresent
  service:
    # -- Service type: ClusterIP, NodePort, or LoadBalancer (string)
    type: ClusterIP
    # -- Primary service port (int, 1-65535)
    port: 8443
    # -- Prometheus metrics port (int, 1-65535)
    metricsPort: 9090
  resources:
    requests:
      # -- Minimum CPU allocation (string, Kubernetes resource quantity)
      cpu: 100m
      # -- Minimum memory allocation (string, Kubernetes resource quantity)
      memory: 256Mi
    limits:
      # -- Maximum CPU allocation (string, Kubernetes resource quantity)
      cpu: 500m
      # -- Maximum memory allocation (string, Kubernetes resource quantity)
      memory: 512Mi
  livenessProbe:
    httpGet:
      path: /healthz
      port: http
    # -- Seconds to wait before the first liveness check (int)
    initialDelaySeconds: 30
    # -- Interval between liveness checks (int, seconds)
    periodSeconds: 10
    # -- Timeout for each liveness check (int, seconds)
    timeoutSeconds: 5
    # -- Consecutive failures before restarting the pod (int)
    failureThreshold: 3
  readinessProbe:
    httpGet:
      path: /readyz
      port: http
    # -- Seconds to wait before the first readiness check (int)
    initialDelaySeconds: 10
    # -- Interval between readiness checks (int, seconds)
    periodSeconds: 5
    # -- Timeout for each readiness check (int, seconds)
    timeoutSeconds: 3
    # -- Consecutive failures before marking pod unready (int)
    failureThreshold: 3
  # -- Node labels for pod scheduling (map)
  nodeSelector: {}
  # -- Tolerations for pod scheduling (list)
  tolerations: []
  # -- Affinity rules for pod scheduling (map)
  affinity: {}

# ---------------------------------------------------------------------------
# Policy Server - evaluates governance policies against agent actions
# ---------------------------------------------------------------------------
policyServer:
  # -- Number of Policy Server replicas (int, >= 1)
  replicas: 2
  image:
    # -- Container image repository (string)
    repository: ghcr.io/microsoft/agentmesh/policy-server
    # -- Image tag; defaults to global.imageTag when empty (string)
    tag: ""
    # -- Image pull policy: Always, IfNotPresent, or Never (string)
    pullPolicy: IfNotPresent
  service:
    # -- Service type: ClusterIP, NodePort, or LoadBalancer (string)
    type: ClusterIP
    # -- Primary service port (int, 1-65535)
    port: 8444
    # -- Prometheus metrics port (int, 1-65535)
    metricsPort: 9091
  resources:
    requests:
      # -- Minimum CPU allocation (string, Kubernetes resource quantity)
      cpu: 100m
      # -- Minimum memory allocation (string, Kubernetes resource quantity)
      memory: 256Mi
    limits:
      # -- Maximum CPU allocation (string, Kubernetes resource quantity)
      cpu: 500m
      # -- Maximum memory allocation (string, Kubernetes resource quantity)
      memory: 512Mi
  # -- Mount path for YAML policy files inside the container (string, absolute path)
  policyMountPath: /etc/agentmesh/policies
  livenessProbe:
    httpGet:
      path: /healthz
      port: http
    # -- Seconds to wait before the first liveness check (int)
    initialDelaySeconds: 30
    # -- Interval between liveness checks (int, seconds)
    periodSeconds: 10
    # -- Timeout for each liveness check (int, seconds)
    timeoutSeconds: 5
    # -- Consecutive failures before restarting the pod (int)
    failureThreshold: 3
  readinessProbe:
    httpGet:
      path: /readyz
      port: http
    # -- Seconds to wait before the first readiness check (int)
    initialDelaySeconds: 10
    # -- Interval between readiness checks (int, seconds)
    periodSeconds: 5
    # -- Timeout for each readiness check (int, seconds)
    timeoutSeconds: 3
    # -- Consecutive failures before marking pod unready (int)
    failureThreshold: 3
  # -- Node labels for pod scheduling (map)
  nodeSelector: {}
  # -- Tolerations for pod scheduling (list)
  tolerations: []
  # -- Affinity rules for pod scheduling (map)
  affinity: {}

# ---------------------------------------------------------------------------
# Audit Collector - captures and stores agent interaction audit logs
# ---------------------------------------------------------------------------
auditCollector:
  # -- Number of Audit Collector replicas (int, >= 1)
  replicas: 1
  image:
    # -- Container image repository (string)
    repository: ghcr.io/microsoft/agentmesh/audit-collector
    # -- Image tag; defaults to global.imageTag when empty (string)
    tag: ""
    # -- Image pull policy: Always, IfNotPresent, or Never (string)
    pullPolicy: IfNotPresent
  service:
    # -- Service type: ClusterIP, NodePort, or LoadBalancer (string)
    type: ClusterIP
    # -- Primary service port (int, 1-65535)
    port: 8445
    # -- Prometheus metrics port (int, 1-65535)
    metricsPort: 9092
  resources:
    requests:
      # -- Minimum CPU allocation (string, Kubernetes resource quantity)
      cpu: 100m
      # -- Minimum memory allocation (string, Kubernetes resource quantity)
      memory: 256Mi
    limits:
      # -- Maximum CPU allocation (string, Kubernetes resource quantity)
      cpu: 500m
      # -- Maximum memory allocation (string, Kubernetes resource quantity)
      memory: 512Mi
  persistence:
    # -- Enable persistent storage for audit logs (bool)
    enabled: true
    # -- StorageClass name; empty uses the cluster default (string)
    storageClass: ""
    # -- Persistent volume size (string, Kubernetes resource quantity, e.g. "10Gi")
    size: 10Gi
    # -- PVC access modes (list)
    accessModes:
      - ReadWriteOnce
  # -- Number of days to retain audit logs before automatic cleanup (int)
  retentionDays: 90
  livenessProbe:
    httpGet:
      path: /healthz
      port: http
    # -- Seconds to wait before the first liveness check (int)
    initialDelaySeconds: 30
    # -- Interval between liveness checks (int, seconds)
    periodSeconds: 10
    # -- Timeout for each liveness check (int, seconds)
    timeoutSeconds: 5
    # -- Consecutive failures before restarting the pod (int)
    failureThreshold: 3
  readinessProbe:
    httpGet:
      path: /readyz
      port: http
    # -- Seconds to wait before the first readiness check (int)
    initialDelaySeconds: 10
    # -- Interval between readiness checks (int, seconds)
    periodSeconds: 5
    # -- Timeout for each readiness check (int, seconds)
    timeoutSeconds: 3
    # -- Consecutive failures before marking pod unready (int)
    failureThreshold: 3
  # -- Node labels for pod scheduling (map)
  nodeSelector: {}
  # -- Tolerations for pod scheduling (list)
  tolerations: []
  # -- Affinity rules for pod scheduling (map)
  affinity: {}

# ---------------------------------------------------------------------------
# API Gateway - external entry point for agent traffic
# ---------------------------------------------------------------------------
apiGateway:
  # -- Number of API Gateway replicas (int, >= 1)
  replicas: 2
  image:
    # -- Container image repository (string)
    repository: ghcr.io/microsoft/agentmesh/api-gateway
    # -- Image tag; defaults to global.imageTag when empty (string)
    tag: ""
    # -- Image pull policy: Always, IfNotPresent, or Never (string)
    pullPolicy: IfNotPresent
  service:
    # -- Service type; LoadBalancer exposes the gateway externally (string)
    type: LoadBalancer
    # -- External HTTPS port (int, 1-65535)
    port: 443
    # -- Prometheus metrics port (int, 1-65535)
    metricsPort: 9093
  resources:
    requests:
      # -- Minimum CPU allocation (string, Kubernetes resource quantity)
      cpu: 100m
      # -- Minimum memory allocation (string, Kubernetes resource quantity)
      memory: 256Mi
    limits:
      # -- Maximum CPU allocation (string, Kubernetes resource quantity)
      cpu: 500m
      # -- Maximum memory allocation (string, Kubernetes resource quantity)
      memory: 512Mi
  # -- Maximum API requests per minute per client before throttling (int)
  rateLimitPerMinute: 1000
  livenessProbe:
    httpGet:
      path: /healthz
      port: http
    # -- Seconds to wait before the first liveness check (int)
    initialDelaySeconds: 30
    # -- Interval between liveness checks (int, seconds)
    periodSeconds: 10
    # -- Timeout for each liveness check (int, seconds)
    timeoutSeconds: 5
    # -- Consecutive failures before restarting the pod (int)
    failureThreshold: 3
  readinessProbe:
    httpGet:
      path: /readyz
      port: http
    # -- Seconds to wait before the first readiness check (int)
    initialDelaySeconds: 10
    # -- Interval between readiness checks (int, seconds)
    periodSeconds: 5
    # -- Timeout for each readiness check (int, seconds)
    timeoutSeconds: 3
    # -- Consecutive failures before marking pod unready (int)
    failureThreshold: 3
  # -- Node labels for pod scheduling (map)
  nodeSelector: {}
  # -- Tolerations for pod scheduling (list)
  tolerations: []
  # -- Affinity rules for pod scheduling (map)
  affinity: {}

# ---------------------------------------------------------------------------
# Monitoring - Prometheus metrics collection
# ---------------------------------------------------------------------------
monitoring:
  prometheus:
    # -- Enable Prometheus metrics endpoints on all components (bool)
    enabled: true
    # -- How often Prometheus scrapes metrics (string, duration, e.g. "15s")
    scrapeInterval: 15s
  serviceMonitor:
    # -- Create a Prometheus ServiceMonitor resource (bool; requires prometheus-operator)
    enabled: false
    # -- Namespace for the ServiceMonitor; empty uses the release namespace (string)
    namespace: ""
    # -- Extra labels to match the Prometheus selector (map)
    additionalLabels: {}

# ---------------------------------------------------------------------------
# Autoscaling (HPA) - Horizontal Pod Autoscaler
# ---------------------------------------------------------------------------
autoscaling:
  # -- Enable HPA for all components (bool)
  enabled: true
  # -- Minimum number of replicas (int, >= 1)
  minReplicas: 2
  # -- Maximum number of replicas (int)
  maxReplicas: 10
  # -- Target average CPU usage before scaling up (int, percent 1-100)
  targetCPUUtilizationPercentage: 70

# ---------------------------------------------------------------------------
# Pod Disruption Budget
# ---------------------------------------------------------------------------
podDisruptionBudget:
  # -- Enable PDB to protect availability during voluntary disruptions (bool)
  enabled: true
  # -- Minimum pods that must remain available during disruptions (int or percentage)
  minAvailable: 1

# ---------------------------------------------------------------------------
# Network Policy
# ---------------------------------------------------------------------------
networkPolicy:
  # -- Enable Kubernetes NetworkPolicy to restrict pod-to-pod traffic (bool)
  enabled: true