Skip to content

Story/epic 005 us 021 boost activation metrics #138

Story/epic 005 us 021 boost activation metrics

Story/epic 005 us 021 boost activation metrics #138

Workflow file for this run

# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Tilt CI - End-to-End Deployment
on:
push:
branches:
- main
- development
pull_request:
branches:
- main
- development
- 'story/**'
workflow_dispatch:
permissions:
contents: read
jobs:
tilt-ci:
runs-on: ubuntu-latest
env:
# CI environment flag for scripts
CI: "true"
NON_INTERACTIVE: "true"
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.23'
check-latest: true
- name: Cache Go modules
uses: actions/cache@v4
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- name: Setup Helm
uses: azure/setup-helm@v4
with:
version: 'latest'
- name: Install dependencies
run: |
# Verify pre-installed tools (GitHub Actions runners have these)
python3 --version
curl --version
# Install kubectl (if not already available)
if ! command -v kubectl &> /dev/null; then
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/
fi
# Install kind (not available in apt repositories)
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.30.0/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
kind version
# Verify Docker is available (pre-installed on GitHub Actions runners)
docker --version
kubectl version --client
# Verify Helm is installed
helm version
- name: Start local registry for Kind (localhost:5001)
run: |
docker ps -a
RUNNING=$(docker inspect -f '{{.State.Running}}' kind-registry 2>/dev/null || echo "false")
if [ "$RUNNING" != "true" ]; then
docker rm -f kind-registry 2>/dev/null || true
docker run -d --restart=always -p 5001:5001 -e REGISTRY_HTTP_ADDR=0.0.0.0:5001 --name kind-registry registry:2
fi
- name: Create Kind cluster (with local registry mirror)
run: |
# Use the Kind version we installed (v0.30.0) instead of helm/kind-action's bundled version
kind create cluster --name kube-startup-cpu-boost --config kind-config.yaml --wait 60s
echo "Waiting for cluster to be fully ready..."
# Wait for nodes to be ready
kubectl wait --for=condition=Ready nodes --all --timeout=120s
# Wait for core system components
echo "Waiting for CoreDNS to be ready..."
kubectl wait --for=condition=ready pod -n kube-system -l k8s-app=kube-dns --timeout=120s || true
echo "Waiting for local-path-provisioner to be ready..."
kubectl wait --for=condition=ready pod -n local-path-storage -l app=local-path-provisioner --timeout=120s || true
echo "Verifying cluster components..."
kubectl get nodes
kubectl get pods -n kube-system
kubectl get pods -n local-path-storage || true
echo "✅ Cluster is ready"
- name: Connect registry to kind network
run: |
docker network connect kind kind-registry 2>/dev/null || true
kubectl get nodes
kubectl get pods -A || true
- name: Verify cluster
run: |
echo "=== Cluster Info ==="
kubectl cluster-info
echo ""
echo "=== Nodes ==="
kubectl get nodes
echo ""
echo "=== System Pods ==="
kubectl get pods -n kube-system
echo ""
echo "=== All Namespaces ==="
kubectl get pods -A || true
# Verify critical components are running
echo ""
echo "=== Verifying Critical Components ==="
if ! kubectl get nodes -o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -q "True"; then
echo "❌ ERROR: Not all nodes are ready"
exit 1
fi
# Check if CoreDNS is running (at least one pod)
if ! kubectl get pods -n kube-system -l k8s-app=kube-dns --field-selector=status.phase=Running 2>/dev/null | grep -q kube-dns; then
echo "⚠️ Warning: CoreDNS may not be fully ready"
else
echo "✅ CoreDNS is running"
fi
echo "✅ Cluster verification complete"
- name: Create namespaces
run: |
# Create namespace for controller (idempotent)
kubectl create namespace kube-startup-cpu-boost-system --dry-run=client -o yaml | kubectl apply -f -
echo "✅ Namespace 'kube-startup-cpu-boost-system' created"
# Create demo namespace for demo app (idempotent)
kubectl create namespace demo --dry-run=client -o yaml | kubectl apply -f -
echo "✅ Namespace 'demo' created"
- name: Create local registry ConfigMap for Tilt
run: |
# Create ConfigMap so Tilt can detect the local registry
# Note: Tiltfile explicitly sets default_registry() to avoid relying on auto-detection
# This ConfigMap is kept for compatibility and to help Tilt understand the registry setup
kubectl create namespace kube-public --dry-run=client -o yaml | kubectl apply -f -
kubectl apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: local-registry-hosting
namespace: kube-public
data:
localRegistryHosting.v1: |
host: "localhost:5001"
hostFromClusterNetwork: "kind-registry:5001"
help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
EOF
- name: Install Tilt
run: |
curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash
tilt version
- name: Install Python dependencies
run: |
# Install PyYAML required by Tiltfile for YAML processing
# Use apt instead of pip to avoid externally-managed-environment error
sudo apt-get update -qq
sudo apt-get install -y python3-yaml
# Verify installation
python3 -c "import yaml; print('✅ PyYAML installed successfully')"
- name: Install Java and Maven
run: |
sudo apt-get update -qq
sudo apt-get install -y default-jdk maven
java -version
mvn --version
- name: Cache Maven dependencies
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('demo-app/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Get dependencies
run: go mod download
- name: Generate CRDs
run: |
make manifests
ls -la config/crd/bases/
- name: Install CRDs
run: |
kubectl apply -f config/crd/bases/
kubectl wait --for=condition=established crd startupcpuboosts.autoscaling.x-k8s.io --timeout=60s
- name: Run Tilt CI
env:
TILT_HOST: "0.0.0.0"
CI: "true"
run: |
echo "Starting Tilt CI - will build, deploy, and wait for services to be healthy"
echo "Tilt will:"
echo " 1. Build Go binary for detected architecture"
echo " 2. Build Docker image"
echo " 3. Push image to localhost:5001 registry"
echo " 4. Deploy via Helm chart"
echo " 5. Wait for all resources to be ready"
echo ""
echo "Note: Optimized for slower CI environments with extended timeouts"
# Pre-flight checks
echo "=== Pre-Flight Checks ==="
echo "Current directory: $(pwd)"
echo "Go version: $(go version)"
echo "Docker version: $(docker --version)"
echo "Kind cluster architecture: $(python3 scripts/detect_kind_architecture.py || echo 'detection failed')"
# Verify registry is accessible before starting Tilt
echo "Verifying registry is accessible..."
curl -s http://127.0.0.1:5001/v2/ || (echo "ERROR: Registry not accessible" && exit 1)
# Wait a bit for cluster to be fully ready before starting Tilt
# This helps ensure all cluster components are ready
echo "Waiting for cluster to be fully ready..."
sleep 10
# Verify CRDs are established before Tilt starts
echo "Verifying CRDs are established..."
kubectl wait --for=condition=established crd startupcpuboosts.autoscaling.x-k8s.io --timeout=60s || true
# Tilt CI runs in non-interactive mode and waits for resources to be ready
# Tilt will build the image, push it to the registry, and deploy it
# Increased timeout to 15m to account for slower CI environments:
# - Image builds can be slower
# - Image pulls from registry can be slower
# - Container startup takes longer
# - Webhook certificate generation needs time
# - Leader election may take longer
# - Network latency in CI is higher
echo "=== Starting Tilt CI ==="
tilt ci --timeout 15m
timeout-minutes: 18
- name: Check deployment status
if: always()
run: |
# Check if kubectl is configured (cluster exists)
if ! kubectl cluster-info &>/dev/null; then
echo "⚠️ Kubernetes cluster not available (workflow may have failed before cluster creation)"
exit 0
fi
echo "=== Kubernetes Resources ==="
kubectl get all -n kube-startup-cpu-boost-system || echo "⚠️ Namespace may not exist"
echo ""
echo "=== Controller Manager Logs ==="
kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager --tail=100 || echo "⚠️ Controller manager pod may not exist"
echo ""
echo "=== RBAC Resources ==="
kubectl get rolebinding,clusterrolebinding -n kube-startup-cpu-boost-system || echo "⚠️ RBAC resources may not exist"
echo ""
echo "=== Leader Election Lease ==="
kubectl get leases -n kube-startup-cpu-boost-system || echo "⚠️ Leader election lease may not exist"
- name: Verify controller functionality
run: |
echo "Waiting for controller to be ready..."
kubectl wait --for=condition=ready pod -n kube-startup-cpu-boost-system -l control-plane=controller-manager --timeout=120s
# Wait a bit for startup to complete (cert rotation, webhook registration, etc.)
echo "Waiting for controller startup to complete..."
sleep 5
echo "Checking for RBAC errors in logs..."
# Check for actual RBAC errors, excluding transient cert-rotation errors
# Cert-rotation errors during startup are normal and retry automatically
LOGS=$(kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager --tail=100)
if echo "$LOGS" | grep -iE "(forbidden|error.*rbac|error.*lease)" | grep -v "cert-rotation"; then
echo "ERROR: RBAC errors detected in logs"
echo "$LOGS"
exit 1
fi
echo "✅ No RBAC errors detected (ignoring transient cert-rotation errors)"
echo "Verifying leader election..."
LEASES=$(kubectl get leases -n kube-startup-cpu-boost-system -o name)
if [ -z "$LEASES" ]; then
echo "WARNING: No leader election lease found"
else
echo "✅ Leader election lease exists"
kubectl get leases -n kube-startup-cpu-boost-system
fi
echo "✅ Controller is running successfully"
- name: Test CRD recognition
run: |
echo "Testing CRD recognition..."
kubectl api-resources | grep startupcpuboost || (echo "ERROR: CRD not recognized" && exit 1)
echo "✅ CRD is recognized by Kubernetes API"
- name: Test controller reconciliation
run: |
echo "Creating test StartupCPUBoost resource..."
kubectl apply -f config/tilt/test-startupcpuboost.yaml
sleep 5
echo "Checking resource status..."
kubectl get startupcpuboost test-boost -n default -o yaml || true
echo "Cleaning up test resource..."
kubectl delete startupcpuboost test-boost -n default --ignore-not-found=true
- name: Upload Tilt logs
if: always()
uses: actions/upload-artifact@v5
with:
name: tilt-ci-logs
retention-days: 3
path: |
~/.tilt-dev/
- name: Upload controller logs
if: always()
uses: actions/upload-artifact@v5
with:
name: controller-logs
retention-days: 3
path: |
controller-logs.txt
continue-on-error: true
- name: Save controller logs
if: always()
run: |
kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager > controller-logs.txt 2>&1 || true
- name: Cleanup
if: always()
run: |
echo "Cleaning up resources..."
# Cleanup Tilt (if installed and running)
if command -v tilt &>/dev/null; then
tilt down || true
fi
# Cleanup Kind cluster
if command -v kind &>/dev/null; then
kind delete cluster --name kube-startup-cpu-boost || true
fi
# Cleanup registry (use BRRTRouter's approach: simple cleanup)
docker rm -f kind-registry || true
echo "✅ Cleanup complete"