forked from llm-d/llm-d-workload-variant-autoscaler
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmanager.yaml
More file actions
166 lines (166 loc) · 6 KB
/
manager.yaml
File metadata and controls
166 lines (166 loc) · 6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
apiVersion: v1
kind: Namespace
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: workload-variant-autoscaler
app.kubernetes.io/managed-by: kustomize
name: system
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
labels:
control-plane: controller-manager
app.kubernetes.io/name: workload-variant-autoscaler
app.kubernetes.io/managed-by: kustomize
spec:
selector:
matchLabels:
control-plane: controller-manager
app.kubernetes.io/name: workload-variant-autoscaler
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: manager
labels:
control-plane: controller-manager
app.kubernetes.io/name: workload-variant-autoscaler
spec:
# TODO(user): Uncomment the following code to configure the nodeAffinity expression
# according to the platforms which are supported by your solution.
# It is considered best practice to support multiple architectures. You can
# build your manager image using the makefile target docker-buildx.
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: kubernetes.io/arch
# operator: In
# values:
# - amd64
# - arm64
# - ppc64le
# - s390x
# - key: kubernetes.io/os
# operator: In
# values:
# - linux
securityContext:
# Projects are configured by default to adhere to the "restricted" Pod Security Standards.
# This ensures that deployments meet the highest security requirements for Kubernetes.
# For more details, see: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
containers:
- command:
- /manager
args:
- --leader-elect=true
- --health-probe-bind-address=:8081
# set watch-namespace explicitly to empty string for consistent behaviour because of diverging defaults between upstream and midstream/downstream
- --watch-namespace=
# Leader election timeout configuration (optional - defaults shown below)
# Uncomment and adjust these values if you need to tune for your environment:
# - --leader-election-lease-duration=60s
# - --leader-election-renew-deadline=50s
# - --leader-election-retry-period=10s
# - --rest-client-timeout=60s
image: controller:latest
imagePullPolicy: IfNotPresent
env:
- name: LOG_LEVEL
value: "debug" # or "info", "warn", "error"
- name: CONFIG_MAP_NAME
value: workload-variant-autoscaler-wva-variantautoscaling-config
- name: PROMETHEUS_BASE_URL
valueFrom:
configMapKeyRef:
name: workload-variant-autoscaler-wva-variantautoscaling-config
key: PROMETHEUS_BASE_URL
- name: PROMETHEUS_TLS_INSECURE_SKIP_VERIFY
valueFrom:
configMapKeyRef:
name: workload-variant-autoscaler-wva-variantautoscaling-config
key: PROMETHEUS_TLS_INSECURE_SKIP_VERIFY
- name: PROMETHEUS_TOKEN_PATH
value: "/var/run/secrets/kubernetes.io/serviceaccount/token"
- name: WVA_SCALE_TO_ZERO
valueFrom:
configMapKeyRef:
name: workload-variant-autoscaler-wva-variantautoscaling-config
key: WVA_SCALE_TO_ZERO
- name: WVA_LIMITED_MODE
valueFrom:
configMapKeyRef:
name: workload-variant-autoscaler-wva-variantautoscaling-config
key: WVA_LIMITED_MODE
- name: WVA_NODE_SELECTOR
valueFrom:
configMapKeyRef:
name: workload-variant-autoscaler-wva-variantautoscaling-config
key: WVA_NODE_SELECTOR
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
# Saturation scaling ConfigMap name (must match kustomize namePrefix + base name)
- name: SATURATION_CONFIG_MAP_NAME
value: "workload-variant-autoscaler-saturation-scaling-config"
name: manager
ports: []
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "ALL"
livenessProbe:
httpGet:
path: /healthz
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /readyz
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
# TODO(user): Configure the resources accordingly based on the project requirements.
# More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
resources:
limits:
cpu: "1"
memory: 1Gi
requests:
cpu: 10m
memory: 256Mi
volumeMounts:
- name: prometheus-client-certs
mountPath: /etc/prometheus-certs
readOnly: true
- name: epp-metrics-token
mountPath: /var/run/secrets/epp-metrics
readOnly: true
volumes:
- name: prometheus-client-certs
secret:
secretName: prometheus-client-cert
optional: true
items:
- key: ca.crt
path: ca.crt
- key: tls.crt
path: tls.crt
- key: tls.key
path: tls.key
- name: epp-metrics-token
secret:
secretName: epp-metrics-token
defaultMode: 420
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10