-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsetup.sh
More file actions
executable file
·581 lines (490 loc) · 17.2 KB
/
setup.sh
File metadata and controls
executable file
·581 lines (490 loc) · 17.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
#!/bin/bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Configuration
CLUSTER_NAME=${CLUSTER_NAME:-kind-yunikarp}
KARPENTER_VERSION=${KARPENTER_VERSION:-v1.8.0}
# Set to "true" to install Gateway API and kgateway for unified UI access
# If false (default), use direct kubectl port-forward to YuniKorn and Grafana
INSTALL_GATEWAY=${INSTALL_GATEWAY:-false}
# Helper functions
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check prerequisites
check_prerequisites() {
log_info "Checking prerequisites..."
local missing_tools=()
for tool in kind kubectl helm go controller-gen helm-docs golangci-lint actionlint ko; do
if ! command -v "$tool" &> /dev/null; then
missing_tools+=("$tool")
fi
done
if [ ${#missing_tools[@]} -ne 0 ]; then
log_error "Missing required tools: ${missing_tools[*]}"
log_error "Please install them before running this script"
exit 1
fi
log_info "All prerequisites met!"
}
# Step 1: Create Kind cluster
create_kind_cluster() {
log_info "Creating Kind cluster: $CLUSTER_NAME"
if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then
log_warn "Cluster $CLUSTER_NAME already exists. Skipping creation."
kind export kubeconfig --name "$CLUSTER_NAME"
return 0
fi
cat <<EOF | kind create cluster --name "$CLUSTER_NAME" --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
EOF
log_info "Cluster created successfully!"
kubectl cluster-info --context "kind-${CLUSTER_NAME}"
}
# Step 2: Label worker node
label_worker_node() {
log_info "Labeling worker node..."
# Wait for worker node to be ready
kubectl wait --for=condition=Ready node/"${CLUSTER_NAME}-worker" --timeout=120s
# Label the worker node
kubectl label node "${CLUSTER_NAME}-worker" node-role=real --overwrite
log_info "Worker node labeled!"
}
# Step 3: Install Gateway API
install_gateway_api() {
log_info "Installing Gateway API v1.4.0..."
# Install Gateway API CRDs
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.4.0/standard-install.yaml
log_info "Gateway API installed successfully!"
}
# Step 4: Install YuniKorn
install_yunikorn() {
log_info "Installing YuniKorn scheduler..."
# Add YuniKorn Helm repo
helm repo add yunikorn https://apache.github.io/yunikorn-release
helm repo update
# Install YuniKorn
if helm list -n yunikorn 2>/dev/null | grep -q yunikorn; then
log_warn "YuniKorn already installed. Skipping installation."
else
helm install yunikorn yunikorn/yunikorn \
--namespace yunikorn \
--create-namespace \
--wait
log_info "YuniKorn installed successfully!"
fi
# Configure YuniKorn components to avoid KWOK nodes
log_info "Configuring YuniKorn components to avoid KWOK nodes..."
# Wait for deployments to be ready first
kubectl wait --for=condition=available --timeout=120s \
deployment/yunikorn-scheduler -n yunikorn || true
kubectl wait --for=condition=available --timeout=120s \
deployment/yunikorn-admission-controller -n yunikorn || true
# Patch scheduler with affinity to avoid KWOK nodes
kubectl patch deployment yunikorn-scheduler -n yunikorn --type='json' -p='[
{
"op": "add",
"path": "/spec/template/spec/affinity",
"value": {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [{
"matchExpressions": [{
"key": "kwok.x-k8s.io/node",
"operator": "DoesNotExist"
}]
}]
}
}
}
}
]'
# Patch admission controller with affinity to avoid KWOK nodes
kubectl patch deployment yunikorn-admission-controller -n yunikorn --type='json' -p='[
{
"op": "add",
"path": "/spec/template/spec/affinity",
"value": {
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [{
"matchExpressions": [{
"key": "kwok.x-k8s.io/node",
"operator": "DoesNotExist"
}]
}]
}
}
}
}
]'
log_info "YuniKorn components configured!"
}
# Step 5: Install KWOK
install_kwok() {
log_info "Installing KWOK..."
helm repo add kwok https://kwok.sigs.k8s.io/charts/
helm repo update
if helm list -n kube-system 2>/dev/null | grep -q kwok; then
log_warn "KWOK already installed. Skipping installation."
else
helm upgrade --install kwok kwok/kwok \
--namespace kube-system \
-f kwok-helm-karpenter-config.yaml \
--wait
helm upgrade --install kwok-stage kwok/stage-fast \
--namespace kube-system \
--wait
log_info "KWOK installed successfully!"
fi
# Verify KWOK is running
kubectl wait --for=condition=available --timeout=120s \
deployment/kwok-controller -n kube-system
log_info "KWOK controller is running!"
}
# Step 6: Install Karpenter with KWOK provider
install_karpenter() {
log_info "Installing Karpenter with KWOK provider..."
# Check if Karpenter is already running
if kubectl get deployment karpenter -n kube-system &>/dev/null; then
log_warn "Karpenter already installed. Skipping installation."
return 0
fi
# Clone Karpenter repo if not exists
if [ ! -d "karpenter" ]; then
log_info "Cloning Karpenter repository..."
git clone https://github.com/kubernetes-sigs/karpenter.git
fi
cd karpenter
# Checkout specific version
git fetch --tags
git switch --detach "$KARPENTER_VERSION" 2>/dev/null || git checkout "$KARPENTER_VERSION"
# Install Prometheus (required by Karpenter)
log_info "Installing Prometheus..."
./hack/install-prometheus.sh
# Build and install Karpenter with KWOK provider
log_info "Building and installing Karpenter (this may take a few minutes)..."
export KWOK_REPO=kind.local
export KIND_CLUSTER_NAME="${CLUSTER_NAME}"
make apply-with-kind
cd ..
log_info "Karpenter installed successfully!"
}
# Step 6.5: Configure instance types for KWOK
configure_instance_types() {
log_info "Configuring instance types for KWOK provider..."
# Check if instance-types.json exists
if [ ! -f "instance-types.json" ]; then
log_warn "instance-types.json not found, using default instance types"
return 0
fi
# Create ConfigMap with instance types
kubectl create configmap -n kube-system karpenter-instance-types \
--from-file=instance-types.json \
--dry-run=client -o yaml | kubectl apply -f -
# Patch Karpenter deployment to use custom instance types
log_info "Patching Karpenter deployment to use custom instance types..."
# Use strategic merge patch to add volume, volumeMount, and env var
kubectl patch deployment karpenter -n kube-system --type=strategic --patch '
spec:
template:
spec:
volumes:
- name: instance-types
configMap:
name: karpenter-instance-types
containers:
- name: controller
volumeMounts:
- name: instance-types
mountPath: /etc/karpenter/instance-types
readOnly: true
env:
- name: INSTANCE_TYPES_FILE_PATH
value: /etc/karpenter/instance-types/instance-types.json
'
# Wait for rollout
log_info "Waiting for Karpenter to restart with new configuration..."
kubectl rollout status deployment/karpenter -n kube-system --timeout=120s
log_info "Instance types configured successfully!"
}
# Step 7: Configure Karpenter NodePool and NodeClass
configure_karpenter() {
log_info "Configuring Karpenter NodePool and NodeClass..."
cat <<EOF | kubectl apply -f -
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: default
spec:
template:
spec:
requirements:
- key: kubernetes.io/arch
operator: In
values: ["amd64"]
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
nodeClassRef:
name: default
kind: KWOKNodeClass
group: karpenter.kwok.sh
expireAfter: 720h # 30 days
limits:
cpu: 10000
disruption:
consolidationPolicy: WhenEmptyOrUnderutilized
consolidateAfter: 10s
---
apiVersion: karpenter.kwok.sh/v1alpha1
kind: KWOKNodeClass
metadata:
name: default
EOF
log_info "Karpenter configured!"
}
# Step 8: Install kgateway
install_kgateway() {
log_info "Installing kgateway v2.1.1..."
# Check if kgateway is already installed
if helm list -n kgateway-system 2>/dev/null | grep -q kgateway; then
log_warn "kgateway already installed. Skipping installation."
return 0
fi
# Install kgateway CRDs
log_info "Installing kgateway CRDs..."
helm upgrade -i --create-namespace --namespace kgateway-system \
--version v2.1.1 \
kgateway-crds \
oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
# Install kgateway
log_info "Installing kgateway..."
helm upgrade -i --create-namespace --namespace kgateway-system \
--version v2.1.1 \
kgateway \
oci://cr.kgateway.dev/kgateway-dev/charts/kgateway
log_info "kgateway installed successfully!"
}
# Step 9: Configure Gateway and HTTPRoutes for UIs
configure_gateway_routes() {
log_info "Configuring Gateway and HTTPRoutes for YuniKorn and Grafana..."
# Wait for kgateway to be ready
kubectl wait --for=condition=available --timeout=120s \
deployment/kgateway -n kgateway-system || log_warn "kgateway deployment may not be ready yet"
# Create GatewayParameters with tmp volume for Envoy
# Note: The tmp volume is required because the proxy pod uses readOnlyRootFilesystem
log_info "Creating GatewayParameters with tmp volume..."
cat <<EOF | kubectl apply -f -
apiVersion: gateway.kgateway.dev/v1alpha1
kind: GatewayParameters
metadata:
name: kgateway-params
namespace: kgateway-system
spec:
kube:
podTemplate:
extraVolumes:
- name: tmp
emptyDir: {}
envoyContainer:
extraVolumeMounts:
- name: tmp
mountPath: /tmp
---
EOF
log_info "Creating Gateway and HTTPRoutes..."
cat <<EOF | kubectl apply -f -
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
name: ui-gateway
namespace: kgateway-system
spec:
gatewayClassName: kgateway
infrastructure:
parametersRef:
group: gateway.kgateway.dev
kind: GatewayParameters
name: kgateway-params
listeners:
- name: http
protocol: HTTP
port: 80
allowedRoutes:
namespaces:
from: All
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: yunikorn-route
namespace: yunikorn
spec:
parentRefs:
- name: ui-gateway
namespace: kgateway-system
hostnames:
- "yunikorn.localhost"
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: yunikorn-service
port: 9889
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: grafana-route
namespace: monitoring
spec:
parentRefs:
- name: ui-gateway
namespace: kgateway-system
hostnames:
- "grafana.localhost"
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: prometheus-grafana
port: 80
EOF
log_info "Gateway routes configured!"
}
# Step 10: Taint control-plane node
taint_control_plane() {
log_info "Tainting control-plane node..."
kubectl taint nodes "${CLUSTER_NAME}-control-plane" \
CriticalAddonsOnly:NoSchedule \
--overwrite=true || log_warn "Taint may already exist"
log_info "Control-plane tainted!"
}
# Step 11: Import Grafana dashboard
import_grafana_dashboard() {
log_info "Importing Grafana dashboard..."
# Wait for Grafana to be ready
log_info "Waiting for Grafana to be ready..."
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=grafana -n monitoring --timeout=180s || {
log_warn "Grafana pod may not be ready yet, continuing anyway..."
}
# Get Grafana admin password
GRAFANA_PASSWORD=$(kubectl get secret prometheus-grafana -n monitoring -o jsonpath="{.data.admin-password}" | base64 -d)
# Get Grafana pod name
GRAFANA_POD=$(kubectl get pods -n monitoring -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].metadata.name}')
if [ -z "$GRAFANA_POD" ]; then
log_warn "Could not find Grafana pod, skipping dashboard import"
return 0
fi
# Create dashboards directory in the repository if it doesn't exist
mkdir -p dashboards
# Check if dashboard file exists
if [ ! -f "dashboards/cluster-overview.json" ]; then
log_warn "Dashboard file not found at dashboards/cluster-overview.json, skipping import"
return 0
fi
# Port-forward to Grafana in the background
kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80 > /dev/null 2>&1 &
PF_PID=$!
# Wait for port-forward to be ready
sleep 5
# Import dashboard using Grafana API
log_info "Uploading dashboard to Grafana..."
curl -f -s -X POST \
-H "Content-Type: application/json" \
-d @dashboards/cluster-overview.json \
http://admin:${GRAFANA_PASSWORD}@localhost:3000/api/dashboards/import \
> /dev/null 2>&1 || log_warn "Failed to import dashboard, you can import it manually later"
# Kill port-forward
kill $PF_PID 2>/dev/null || true
wait $PF_PID 2>/dev/null || true
log_info "Dashboard import complete!"
}
# Main execution
main() {
log_info "Starting Kind + KWOK + Karpenter + YuniKorn setup..."
echo ""
check_prerequisites
create_kind_cluster
label_worker_node
install_yunikorn
install_kwok
install_karpenter
configure_instance_types
configure_karpenter
if [ "$INSTALL_GATEWAY" = "true" ]; then
install_gateway_api
install_kgateway
configure_gateway_routes
fi
taint_control_plane
import_grafana_dashboard
echo ""
log_info "=========================================="
log_info "Setup complete! 🎉"
log_info "=========================================="
echo ""
log_info "Cluster context: kind-${CLUSTER_NAME}"
log_info ""
log_info "Useful commands:"
if [ "$INSTALL_GATEWAY" = "true" ]; then
log_info " - Access UIs: Run 'kubectl port-forward -n kgateway-system svc/ui-gateway 8080:80'"
log_info " Then open:"
log_info " * YuniKorn UI: http://yunikorn.localhost:8080"
log_info " * Grafana UI: http://grafana.localhost:8080"
log_info " (Default login: admin / Get password: kubectl get secret prometheus-grafana -n monitoring -o jsonpath='{.data.admin-password}' | base64 -d)"
else
log_info " - Access YuniKorn UI: kubectl port-forward -n yunikorn svc/yunikorn-service 9889:9889"
log_info " Then open: http://localhost:9889"
log_info " - Access Grafana UI: kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80"
log_info " Then open: http://localhost:3000"
log_info " (Default login: admin / Get password: kubectl get secret prometheus-grafana -n monitoring -o jsonpath='{.data.admin-password}' | base64 -d)"
fi
log_info " - View nodes: kubectl get nodes"
log_info " - View Karpenter logs: kubectl logs -n kube-system -l app.kubernetes.io/name=karpenter"
log_info " - View YuniKorn logs: kubectl logs -n yunikorn -l app=yunikorn"
echo ""
log_info "Next steps:"
if [ "$INSTALL_GATEWAY" = "true" ]; then
log_info " 1. Start the Gateway port-forward in a separate terminal:"
log_info " kubectl port-forward -n kgateway-system svc/ui-gateway 8080:80"
log_info " 2. Try deploying an example workload: kubectl apply -f examples/test-deployment.yaml"
log_info " 3. Watch Karpenter create nodes: kubectl get nodes -w"
log_info " 4. Check YuniKorn UI: http://yunikorn.localhost:8080"
log_info " 5. Check Grafana dashboard: http://grafana.localhost:8080"
log_info " Dashboard: 'Cluster Resource Overview' (auto-imported)"
else
log_info " 1. Start port-forwards in separate terminals:"
log_info " kubectl port-forward -n yunikorn svc/yunikorn-service 9889:9889"
log_info " kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80"
log_info " 2. Try deploying an example workload: kubectl apply -f examples/test-deployment.yaml"
log_info " 3. Watch Karpenter create nodes: kubectl get nodes -w"
log_info " 4. Check YuniKorn UI: http://localhost:9889"
log_info " 5. Check Grafana dashboard: http://localhost:3000"
log_info " Dashboard: 'Cluster Resource Overview' (auto-imported)"
fi
echo ""
}
main "$@"