Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 215 additions & 0 deletions .github/actions/setup-e2e-cluster/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
name: Setup E2E Cluster
description: Provision a k3d cluster with cert-manager, Prometheus, and the operator installed.

inputs:
cluster-name:
description: k3d cluster name
required: true
kubeconfig-path:
description: Path to write the kubeconfig file
required: true
go-version:
description: Go version to install
required: true
k3d-version:
description: k3d version
required: true
k3s-image:
description: k3s container image
required: true
cert-manager-version:
description: cert-manager release tag
required: true
prometheus-image:
description: Prometheus container image
required: true
prometheus-chart-version:
description: Prometheus Helm chart version
required: true

runs:
using: composite
steps:
- uses: ./.github/actions/setup-clean-docker-config

- name: Verify Docker and buildx
shell: bash -Eeuo pipefail {0}
run: |
echo "Docker version: $(docker version --format '{{.Client.Version}}')"
if ! docker buildx version >/dev/null 2>&1; then
echo "::error::Docker buildx is not available. Check that cli-plugins are accessible from DOCKER_CONFIG."
exit 1
fi
echo "buildx: $(docker buildx version)"

- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version: ${{ inputs.go-version }}
cache: true

- name: Cache E2E container images
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: |
/tmp/cert-manager-controller.tar
/tmp/cert-manager-webhook.tar
/tmp/cert-manager-cainjector.tar
/tmp/prometheus.tar
key: e2e-images-${{ runner.os }}-cm${{ inputs.cert-manager-version }}-prom${{ inputs.prometheus-chart-version }}

- name: Install k3d
shell: bash -Eeuo pipefail -x {0}
run: |
mkdir -p "$HOME/.local/bin"
if command -v k3d &>/dev/null; then
echo "k3d already installed: $(k3d version)"
else
curl -s https://raw.githubusercontent.com/k3d-io/k3d/${{ inputs.k3d-version }}/install.sh | K3D_INSTALL_DIR="$HOME/.local/bin" USE_SUDO=false bash
fi

- name: Prepare isolated kubeconfig
shell: bash -Eeuo pipefail -x {0}
run: printf 'KUBECONFIG=%s\n' "${{ inputs.kubeconfig-path }}" >> "$GITHUB_ENV"

- name: Cleanup stale k3d clusters and Docker resources
shell: bash -Eeuo pipefail -x {0}
run: |
export PATH="$HOME/.local/bin:$PATH"
for cluster in $(k3d cluster list -o json 2>/dev/null | jq -r '.[].name // empty'); do
started=$(docker inspect --format '{{.State.StartedAt}}' "k3d-${cluster}-server-0" 2>/dev/null) || continue
started_epoch=$(date -d "$started" +%s 2>/dev/null) || continue
age=$(( $(date +%s) - started_epoch ))
if (( age > 3600 )); then
echo "Deleting stale k3d cluster '$cluster' (age: ${age}s)"
k3d cluster delete "$cluster" || true
fi
done
docker builder prune -f || true
docker system prune -f || true

- name: Pre-pull cert-manager and Prometheus images
shell: bash -Eeuo pipefail -x {0}
run: |
pull_and_save() {
local image="$1" tarball="$2"
[[ -f "$tarball" ]] && { echo "Cached: $tarball"; return 0; }
docker pull --platform linux/amd64 "$image"
docker save "$image" -o "$tarball"
}
pull_and_save "quay.io/jetstack/cert-manager-controller:${{ inputs.cert-manager-version }}" /tmp/cert-manager-controller.tar
pull_and_save "quay.io/jetstack/cert-manager-webhook:${{ inputs.cert-manager-version }}" /tmp/cert-manager-webhook.tar
pull_and_save "quay.io/jetstack/cert-manager-cainjector:${{ inputs.cert-manager-version }}" /tmp/cert-manager-cainjector.tar
pull_and_save "${{ inputs.prometheus-image }}" /tmp/prometheus.tar

- name: Create k3d cluster
shell: bash -Eeuo pipefail -x {0}
run: |
export PATH="$HOME/.local/bin:$PATH"
for attempt in 1 2 3; do
if k3d cluster create "${{ inputs.cluster-name }}" \
--image ${{ inputs.k3s-image }} \
--kubeconfig-update-default=false \
--kubeconfig-switch-context=false \
--k3s-arg "--disable=traefik,servicelb@server:*" \
--wait --timeout 120s; then
break
fi
echo "::warning::k3d cluster create attempt $attempt failed, retrying in 10s..."
k3d cluster delete "${{ inputs.cluster-name }}" 2>/dev/null || true
sleep 10
if (( attempt == 3 )); then
echo "::error::k3d cluster creation failed after 3 attempts"
exit 1
fi
done
k3d kubeconfig write "${{ inputs.cluster-name }}" --output "${{ inputs.kubeconfig-path }}"
chmod 600 "${{ inputs.kubeconfig-path }}"

- name: Wait for node Ready
shell: bash -Eeuo pipefail -x {0}
run: kubectl wait --for=condition=Ready nodes --all --timeout=360s

- name: Load cert-manager images into cluster
shell: bash -Eeuo pipefail -x {0}
run: |
export PATH="$HOME/.local/bin:$PATH"
k3d image import \
/tmp/cert-manager-controller.tar \
/tmp/cert-manager-webhook.tar \
/tmp/cert-manager-cainjector.tar \
-c "${{ inputs.cluster-name }}"

- name: Install cert-manager
shell: bash -Eeuo pipefail -x {0}
run: |
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${{ inputs.cert-manager-version }}/cert-manager.yaml
kubectl wait --for=condition=Available deployment/cert-manager -n cert-manager --timeout=120s
kubectl wait --for=condition=Available deployment/cert-manager-webhook -n cert-manager --timeout=120s
kubectl wait --for=condition=Available deployment/cert-manager-cainjector -n cert-manager --timeout=120s

- name: Load Prometheus image into cluster
shell: bash -Eeuo pipefail -x {0}
run: |
export PATH="$HOME/.local/bin:$PATH"
k3d image import /tmp/prometheus.tar -c "${{ inputs.cluster-name }}"

- name: Install Prometheus
shell: bash -Eeuo pipefail -x {0}
run: |
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install prometheus prometheus-community/prometheus \
--version ${{ inputs.prometheus-chart-version }} \
--namespace monitoring --create-namespace \
--set server.image.repository=quay.io/prometheus/prometheus \
--set server.image.tag=v3.4.1 \
--set server.persistentVolume.enabled=false \
--set alertmanager.enabled=false \
--set prometheus-pushgateway.enabled=false \
--set server.global.scrape_interval=15s \
--wait --timeout 5m

- name: Wait for cAdvisor metrics in Prometheus
shell: bash -Eeuo pipefail -x {0}
run: |
echo "Waiting for Prometheus to scrape cAdvisor metrics..."
PROM_POD=$(kubectl get pods -n monitoring -l app.kubernetes.io/name=prometheus,app.kubernetes.io/component=server -o name | head -1)
for i in $(seq 1 30); do
result=$(kubectl exec -n monitoring "$PROM_POD" -- \
wget -qO- 'http://localhost:9090/api/v1/query?query=container_cpu_usage_seconds_total' 2>/dev/null || true)
if echo "$result" | grep -q '"result":\[{'; then
echo "cAdvisor metrics available after ${i}x5s"
break
fi
if [ "$i" -eq 30 ]; then
echo "WARNING: cAdvisor metrics not found after 150s, proceeding anyway"
fi
sleep 5
done

- name: Build and load operator image
shell: bash -Eeuo pipefail -x {0}
run: |
export PATH="$HOME/.local/bin:$PATH"
go install github.com/google/ko@v0.18.0
VERSION=e2e COMMIT=$(git rev-parse --short HEAD) DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
KO_DOCKER_REPO=attune ko build ./cmd/manager/ \
--bare --tags=e2e --platform=linux/$(go env GOARCH) \
--tarball=/tmp/attune-e2e.tar --push=false
k3d image import /tmp/attune-e2e.tar -c "${{ inputs.cluster-name }}"

- name: Install operator via Helm
shell: bash -Eeuo pipefail -x {0}
run: |
helm install attune ./charts/attune \
--namespace attune-system --create-namespace \
--set image.repository=attune \
--set image.tag=e2e \
--set image.pullPolicy=Never \
--set webhooks.enabled=true \
--set metrics.enabled=true \
--set leaderElection.enabled=false \
--set maxConcurrentReconciles=4 \
--set resources.limits.memory=512Mi \
--set resources.requests.memory=256Mi \
--wait --timeout 3m
Loading
Loading