Story/epic 005 us 021 boost activation metrics #138
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2023 Google LLC | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # https://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: Tilt CI - End-to-End Deployment | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - development | |
| pull_request: | |
| branches: | |
| - main | |
| - development | |
| - 'story/**' | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| jobs: | |
| tilt-ci: | |
| runs-on: ubuntu-latest | |
| env: | |
| # CI environment flag for scripts | |
| CI: "true" | |
| NON_INTERACTIVE: "true" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Setup Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version: '1.23' | |
| check-latest: true | |
| - name: Cache Go modules | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/.cache/go-build | |
| ~/go/pkg/mod | |
| key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} | |
| restore-keys: | | |
| ${{ runner.os }}-go- | |
| - name: Setup Helm | |
| uses: azure/setup-helm@v4 | |
| with: | |
| version: 'latest' | |
| - name: Install dependencies | |
| run: | | |
| # Verify pre-installed tools (GitHub Actions runners have these) | |
| python3 --version | |
| curl --version | |
| # Install kubectl (if not already available) | |
| if ! command -v kubectl &> /dev/null; then | |
| curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
| chmod +x kubectl | |
| sudo mv kubectl /usr/local/bin/ | |
| fi | |
| # Install kind (not available in apt repositories) | |
| curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.30.0/kind-linux-amd64 | |
| chmod +x ./kind | |
| sudo mv ./kind /usr/local/bin/kind | |
| kind version | |
| # Verify Docker is available (pre-installed on GitHub Actions runners) | |
| docker --version | |
| kubectl version --client | |
| # Verify Helm is installed | |
| helm version | |
| - name: Start local registry for Kind (localhost:5001) | |
| run: | | |
| docker ps -a | |
| RUNNING=$(docker inspect -f '{{.State.Running}}' kind-registry 2>/dev/null || echo "false") | |
| if [ "$RUNNING" != "true" ]; then | |
| docker rm -f kind-registry 2>/dev/null || true | |
| docker run -d --restart=always -p 5001:5001 -e REGISTRY_HTTP_ADDR=0.0.0.0:5001 --name kind-registry registry:2 | |
| fi | |
| - name: Create Kind cluster (with local registry mirror) | |
| run: | | |
| # Use the Kind version we installed (v0.30.0) instead of helm/kind-action's bundled version | |
| kind create cluster --name kube-startup-cpu-boost --config kind-config.yaml --wait 60s | |
| echo "Waiting for cluster to be fully ready..." | |
| # Wait for nodes to be ready | |
| kubectl wait --for=condition=Ready nodes --all --timeout=120s | |
| # Wait for core system components | |
| echo "Waiting for CoreDNS to be ready..." | |
| kubectl wait --for=condition=ready pod -n kube-system -l k8s-app=kube-dns --timeout=120s || true | |
| echo "Waiting for local-path-provisioner to be ready..." | |
| kubectl wait --for=condition=ready pod -n local-path-storage -l app=local-path-provisioner --timeout=120s || true | |
| echo "Verifying cluster components..." | |
| kubectl get nodes | |
| kubectl get pods -n kube-system | |
| kubectl get pods -n local-path-storage || true | |
| echo "✅ Cluster is ready" | |
| - name: Connect registry to kind network | |
| run: | | |
| docker network connect kind kind-registry 2>/dev/null || true | |
| kubectl get nodes | |
| kubectl get pods -A || true | |
| - name: Verify cluster | |
| run: | | |
| echo "=== Cluster Info ===" | |
| kubectl cluster-info | |
| echo "" | |
| echo "=== Nodes ===" | |
| kubectl get nodes | |
| echo "" | |
| echo "=== System Pods ===" | |
| kubectl get pods -n kube-system | |
| echo "" | |
| echo "=== All Namespaces ===" | |
| kubectl get pods -A || true | |
| # Verify critical components are running | |
| echo "" | |
| echo "=== Verifying Critical Components ===" | |
| if ! kubectl get nodes -o jsonpath='{.items[*].status.conditions[?(@.type=="Ready")].status}' | grep -q "True"; then | |
| echo "❌ ERROR: Not all nodes are ready" | |
| exit 1 | |
| fi | |
| # Check if CoreDNS is running (at least one pod) | |
| if ! kubectl get pods -n kube-system -l k8s-app=kube-dns --field-selector=status.phase=Running 2>/dev/null | grep -q kube-dns; then | |
| echo "⚠️ Warning: CoreDNS may not be fully ready" | |
| else | |
| echo "✅ CoreDNS is running" | |
| fi | |
| echo "✅ Cluster verification complete" | |
| - name: Create namespaces | |
| run: | | |
| # Create namespace for controller (idempotent) | |
| kubectl create namespace kube-startup-cpu-boost-system --dry-run=client -o yaml | kubectl apply -f - | |
| echo "✅ Namespace 'kube-startup-cpu-boost-system' created" | |
| # Create demo namespace for demo app (idempotent) | |
| kubectl create namespace demo --dry-run=client -o yaml | kubectl apply -f - | |
| echo "✅ Namespace 'demo' created" | |
| - name: Create local registry ConfigMap for Tilt | |
| run: | | |
| # Create ConfigMap so Tilt can detect the local registry | |
| # Note: Tiltfile explicitly sets default_registry() to avoid relying on auto-detection | |
| # This ConfigMap is kept for compatibility and to help Tilt understand the registry setup | |
| kubectl create namespace kube-public --dry-run=client -o yaml | kubectl apply -f - | |
| kubectl apply -f - <<EOF | |
| apiVersion: v1 | |
| kind: ConfigMap | |
| metadata: | |
| name: local-registry-hosting | |
| namespace: kube-public | |
| data: | |
| localRegistryHosting.v1: | | |
| host: "localhost:5001" | |
| hostFromClusterNetwork: "kind-registry:5001" | |
| help: "https://kind.sigs.k8s.io/docs/user/local-registry/" | |
| EOF | |
| - name: Install Tilt | |
| run: | | |
| curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash | |
| tilt version | |
| - name: Install Python dependencies | |
| run: | | |
| # Install PyYAML required by Tiltfile for YAML processing | |
| # Use apt instead of pip to avoid externally-managed-environment error | |
| sudo apt-get update -qq | |
| sudo apt-get install -y python3-yaml | |
| # Verify installation | |
| python3 -c "import yaml; print('✅ PyYAML installed successfully')" | |
| - name: Install Java and Maven | |
| run: | | |
| sudo apt-get update -qq | |
| sudo apt-get install -y default-jdk maven | |
| java -version | |
| mvn --version | |
| - name: Cache Maven dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.m2/repository | |
| key: ${{ runner.os }}-maven-${{ hashFiles('demo-app/pom.xml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-maven- | |
| - name: Get dependencies | |
| run: go mod download | |
| - name: Generate CRDs | |
| run: | | |
| make manifests | |
| ls -la config/crd/bases/ | |
| - name: Install CRDs | |
| run: | | |
| kubectl apply -f config/crd/bases/ | |
| kubectl wait --for=condition=established crd startupcpuboosts.autoscaling.x-k8s.io --timeout=60s | |
| - name: Run Tilt CI | |
| env: | |
| TILT_HOST: "0.0.0.0" | |
| CI: "true" | |
| run: | | |
| echo "Starting Tilt CI - will build, deploy, and wait for services to be healthy" | |
| echo "Tilt will:" | |
| echo " 1. Build Go binary for detected architecture" | |
| echo " 2. Build Docker image" | |
| echo " 3. Push image to localhost:5001 registry" | |
| echo " 4. Deploy via Helm chart" | |
| echo " 5. Wait for all resources to be ready" | |
| echo "" | |
| echo "Note: Optimized for slower CI environments with extended timeouts" | |
| # Pre-flight checks | |
| echo "=== Pre-Flight Checks ===" | |
| echo "Current directory: $(pwd)" | |
| echo "Go version: $(go version)" | |
| echo "Docker version: $(docker --version)" | |
| echo "Kind cluster architecture: $(python3 scripts/detect_kind_architecture.py || echo 'detection failed')" | |
| # Verify registry is accessible before starting Tilt | |
| echo "Verifying registry is accessible..." | |
| curl -s http://127.0.0.1:5001/v2/ || (echo "ERROR: Registry not accessible" && exit 1) | |
| # Wait a bit for cluster to be fully ready before starting Tilt | |
| # This helps ensure all cluster components are ready | |
| echo "Waiting for cluster to be fully ready..." | |
| sleep 10 | |
| # Verify CRDs are established before Tilt starts | |
| echo "Verifying CRDs are established..." | |
| kubectl wait --for=condition=established crd startupcpuboosts.autoscaling.x-k8s.io --timeout=60s || true | |
| # Tilt CI runs in non-interactive mode and waits for resources to be ready | |
| # Tilt will build the image, push it to the registry, and deploy it | |
| # Increased timeout to 15m to account for slower CI environments: | |
| # - Image builds can be slower | |
| # - Image pulls from registry can be slower | |
| # - Container startup takes longer | |
| # - Webhook certificate generation needs time | |
| # - Leader election may take longer | |
| # - Network latency in CI is higher | |
| echo "=== Starting Tilt CI ===" | |
| tilt ci --timeout 15m | |
| timeout-minutes: 18 | |
| - name: Check deployment status | |
| if: always() | |
| run: | | |
| # Check if kubectl is configured (cluster exists) | |
| if ! kubectl cluster-info &>/dev/null; then | |
| echo "⚠️ Kubernetes cluster not available (workflow may have failed before cluster creation)" | |
| exit 0 | |
| fi | |
| echo "=== Kubernetes Resources ===" | |
| kubectl get all -n kube-startup-cpu-boost-system || echo "⚠️ Namespace may not exist" | |
| echo "" | |
| echo "=== Controller Manager Logs ===" | |
| kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager --tail=100 || echo "⚠️ Controller manager pod may not exist" | |
| echo "" | |
| echo "=== RBAC Resources ===" | |
| kubectl get rolebinding,clusterrolebinding -n kube-startup-cpu-boost-system || echo "⚠️ RBAC resources may not exist" | |
| echo "" | |
| echo "=== Leader Election Lease ===" | |
| kubectl get leases -n kube-startup-cpu-boost-system || echo "⚠️ Leader election lease may not exist" | |
| - name: Verify controller functionality | |
| run: | | |
| echo "Waiting for controller to be ready..." | |
| kubectl wait --for=condition=ready pod -n kube-startup-cpu-boost-system -l control-plane=controller-manager --timeout=120s | |
| # Wait a bit for startup to complete (cert rotation, webhook registration, etc.) | |
| echo "Waiting for controller startup to complete..." | |
| sleep 5 | |
| echo "Checking for RBAC errors in logs..." | |
| # Check for actual RBAC errors, excluding transient cert-rotation errors | |
| # Cert-rotation errors during startup are normal and retry automatically | |
| LOGS=$(kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager --tail=100) | |
| if echo "$LOGS" | grep -iE "(forbidden|error.*rbac|error.*lease)" | grep -v "cert-rotation"; then | |
| echo "ERROR: RBAC errors detected in logs" | |
| echo "$LOGS" | |
| exit 1 | |
| fi | |
| echo "✅ No RBAC errors detected (ignoring transient cert-rotation errors)" | |
| echo "Verifying leader election..." | |
| LEASES=$(kubectl get leases -n kube-startup-cpu-boost-system -o name) | |
| if [ -z "$LEASES" ]; then | |
| echo "WARNING: No leader election lease found" | |
| else | |
| echo "✅ Leader election lease exists" | |
| kubectl get leases -n kube-startup-cpu-boost-system | |
| fi | |
| echo "✅ Controller is running successfully" | |
| - name: Test CRD recognition | |
| run: | | |
| echo "Testing CRD recognition..." | |
| kubectl api-resources | grep startupcpuboost || (echo "ERROR: CRD not recognized" && exit 1) | |
| echo "✅ CRD is recognized by Kubernetes API" | |
| - name: Test controller reconciliation | |
| run: | | |
| echo "Creating test StartupCPUBoost resource..." | |
| kubectl apply -f config/tilt/test-startupcpuboost.yaml | |
| sleep 5 | |
| echo "Checking resource status..." | |
| kubectl get startupcpuboost test-boost -n default -o yaml || true | |
| echo "Cleaning up test resource..." | |
| kubectl delete startupcpuboost test-boost -n default --ignore-not-found=true | |
| - name: Upload Tilt logs | |
| if: always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: tilt-ci-logs | |
| retention-days: 3 | |
| path: | | |
| ~/.tilt-dev/ | |
| - name: Upload controller logs | |
| if: always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: controller-logs | |
| retention-days: 3 | |
| path: | | |
| controller-logs.txt | |
| continue-on-error: true | |
| - name: Save controller logs | |
| if: always() | |
| run: | | |
| kubectl logs -n kube-startup-cpu-boost-system -l control-plane=controller-manager > controller-logs.txt 2>&1 || true | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| echo "Cleaning up resources..." | |
| # Cleanup Tilt (if installed and running) | |
| if command -v tilt &>/dev/null; then | |
| tilt down || true | |
| fi | |
| # Cleanup Kind cluster | |
| if command -v kind &>/dev/null; then | |
| kind delete cluster --name kube-startup-cpu-boost || true | |
| fi | |
| # Cleanup registry (use BRRTRouter's approach: simple cleanup) | |
| docker rm -f kind-registry || true | |
| echo "✅ Cleanup complete" |