Skip to content

Cypress e2e Test

Cypress e2e Test #5

name: Cypress e2e Test
# =============================================================================
# E2E Test Workflow with Cluster Failover and Dynamic Tag Selection
# =============================================================================
#
# TRIGGERS:
# - Automatically after "Test" workflow completes on PRs
# - Manually via workflow_dispatch (Actions tab → Run workflow)
#
# CLUSTER FAILOVER:
# Primary: dash-e2e-int (checked first via DSC health)
# Secondary: dash-e2e (used if primary is unhealthy)
# Health Check: Logs into cluster → checks DSC conditions (Available, Degraded, odh-dashboardReady)
#
# TEST SELECTION:
# Default (always run):
# - @ci-dashboard-set-1
# - @ci-dashboard-set-2
#
# Run additional tests via PR labels:
# Add labels with 'test:' prefix to your PR:
# test:Pipelines → @Pipelines
# test:ModelServing → @ModelServing
# test:Workbenches → @Workbenches
# Any 'test:<TagName>' label maps to '@<TagName>' Cypress grep tag
#
# Run additional tests manually:
# 1. Go to Actions tab → "Cypress e2e Test" workflow
# 2. Click "Run workflow"
# 3. Enter tags in 'additional_tags' field: @Pipelines,@Workbenches
#
# LIMITS:
# - Max 5 additional tags beyond defaults (prevents runner exhaustion)
# - 10 runners shared across 30+ devs
#
# REQUIRED SECRETS:
# PRIMARY: OC_SERVER_PRIMARY, OCP_CONSOLE_URL_PRIMARY, ODH_DASHBOARD_URL_PRIMARY
# SECONDARY: OC_SERVER, OCP_CONSOLE_URL, ODH_DASHBOARD_URL
# AUTH: GITLAB_TOKEN, GITLAB_TEST_VARS_URL, ODH_NAMESPACES
# =============================================================================
on:
workflow_run:
workflows: ["Test"]
types: [completed]
workflow_dispatch:
inputs:
additional_tags:
description: 'Extra test tags (e.g., @Pipelines,@Workbenches)'
required: false
default: ''
type: string
concurrency:
group: e2e-${{ github.event.workflow_run.head_branch || github.ref }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: read
actions: read
statuses: write
env:
NODE_VERSION: 22.x
DO_NOT_TRACK: 1
# =============================================================================
# JOBS
# =============================================================================
jobs:
# ---------------------------------------------------------------------------
# Cluster Selection - Health check with automatic failover
# ---------------------------------------------------------------------------
select-cluster:
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success')
runs-on: self-hosted
outputs:
cluster_name: ${{ steps.select.outputs.cluster_name }}
steps:
- name: Download test credentials
run: |
echo "🔧 Downloading test credentials for cluster health check..."
curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \
"${{ secrets.GITLAB_TEST_VARS_URL }}" \
-o /tmp/test-variables.yml
echo "✅ Downloaded test credentials"
- name: Select healthy cluster
id: select
env:
PRIMARY_SERVER: ${{ secrets.OC_SERVER_PRIMARY }}
PRIMARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }}
SECONDARY_SERVER: ${{ secrets.OC_SERVER }}
SECONDARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL }}
run: |
# Extract credentials from test-variables.yml
TEST_VARS_FILE="/tmp/test-variables.yml"
OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ')
OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ')
echo "::add-mask::$OC_PASSWORD"
echo "::add-mask::$OC_USERNAME"
# Check DSC health by logging in and verifying conditions
check_dsc_health() {
local server_url="$1"
local cluster_name="$2"
[[ -z "$server_url" ]] && echo " ❌ Server URL is empty" && return 1
echo " 🔗 Attempting login to: $server_url"
# Try to login
LOGIN_OUTPUT=$(oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$server_url" --insecure-skip-tls-verify 2>&1) || true
if ! oc whoami > /dev/null 2>&1; then
echo " ❌ Failed to login to $cluster_name"
echo " 📝 Login output: $LOGIN_OUTPUT" | head -5
return 1
fi
echo " ✅ Login successful"
# Get DSC status with full output for debugging
echo " 🔍 Fetching DataScienceCluster status..."
DSC_JSON=$(oc get datasciencecluster -o json 2>&1)
DSC_EXIT_CODE=$?
if [[ $DSC_EXIT_CODE -ne 0 ]]; then
echo " ❌ Failed to get DSC (exit code: $DSC_EXIT_CODE)"
echo " 📝 Output: $DSC_JSON" | head -5
return 1
fi
if [[ -z "$DSC_JSON" || "$DSC_JSON" == "null" || "$DSC_JSON" == '{"apiVersion":"datasciencecluster.opendatahub.io/v1","items":[],"kind":"List","metadata":{"resourceVersion":""}}' ]]; then
echo " ❌ No DataScienceCluster found on $cluster_name"
return 1
fi
# Print DSC name and status for debugging
DSC_NAME=$(echo "$DSC_JSON" | jq -r '.items[0].metadata.name // "unknown"')
echo " 📦 DSC Name: $DSC_NAME"
# Check phase - this is the most reliable indicator
PHASE=$(echo "$DSC_JSON" | jq -r '.items[0].status.phase // "Unknown"')
echo " 📊 DSC Phase: $PHASE"
# Print all conditions for debugging
echo " 📋 DSC Conditions:"
echo "$DSC_JSON" | jq -r '.items[0].status.conditions[]? | " - \(.type): \(.status) (\(.reason // "no reason"))"' 2>/dev/null || echo " (no conditions found)"
# If phase is Ready, cluster is healthy
if [[ "$PHASE" == "Ready" ]]; then
echo " ✅ DSC is Ready!"
return 0
fi
# Phase not Ready - check conditions for more detail
AVAILABLE=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Available") | .status' 2>/dev/null || echo "")
DEGRADED=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Degraded") | .status' 2>/dev/null || echo "")
# Fallback: if conditions show healthy even though phase isn't Ready
if [[ "$AVAILABLE" == "True" && "$DEGRADED" != "True" ]]; then
echo " ✅ Conditions look healthy despite phase=$PHASE"
return 0
fi
echo " ❌ DSC not healthy (Phase: $PHASE, Available: $AVAILABLE, Degraded: $DEGRADED)"
return 1
}
echo "🔍 Checking PRIMARY cluster (dash-e2e-int)..."
if check_dsc_health "$PRIMARY_SERVER" "dash-e2e-int"; then
echo "✅ PRIMARY cluster is healthy and ready"
echo "cluster_name=dash-e2e-int" >> $GITHUB_OUTPUT
else
echo ""
echo "⚠️ PRIMARY unavailable or not ready, trying SECONDARY (dash-e2e)..."
if check_dsc_health "$SECONDARY_SERVER" "dash-e2e"; then
echo "✅ SECONDARY cluster is healthy and ready"
echo "cluster_name=dash-e2e" >> $GITHUB_OUTPUT
else
echo ""
echo "❌ All clusters unavailable or unhealthy"
exit 1
fi
fi
# Clean up credentials file
rm -f /tmp/test-variables.yml
# ---------------------------------------------------------------------------
# Status - Set pending status on PR (independent - runs before cluster selection)
# ---------------------------------------------------------------------------
set-pending-status:
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success')
runs-on: ubuntu-latest
steps:
- name: Set pending status
env:
GH_TOKEN: ${{ github.token }}
run: |
gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \
-f state=pending \
-f context="Cypress E2E Tests" \
-f description="E2E tests starting..." \
-f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Tag Resolution - Build test matrix from defaults + PR labels/input
# ---------------------------------------------------------------------------
get-test-tags:
needs: [select-cluster]
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.build.outputs.matrix }}
source: ${{ steps.build.outputs.source }}
steps:
- name: Get PR labels
id: labels
if: github.event_name == 'workflow_run'
env:
GH_TOKEN: ${{ github.token }}
run: |
# Get PR number - try multiple methods for fork PR compatibility
PR_NUM="${{ github.event.workflow_run.pull_requests[0].number }}"
# Method 2: commits API (works for same-repo PRs)
if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
PR_NUM=$(gh api "repos/${{ github.repository }}/commits/${{ github.event.workflow_run.head_sha }}/pulls" \
--jq '.[0].number' 2>/dev/null || echo "")
fi
# Method 3: search API (works for fork PRs)
if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
PR_NUM=$(gh api "search/issues?q=repo:${{ github.repository }}+is:pr+is:open+sha:${{ github.event.workflow_run.head_sha }}" \
--jq '.items[0].number' 2>/dev/null || echo "")
fi
if [[ -n "$PR_NUM" && "$PR_NUM" != "null" ]]; then
LABELS=$(gh api "repos/${{ github.repository }}/issues/$PR_NUM/labels" \
--jq '[.[].name] | join(",")' 2>/dev/null || echo "")
echo "labels=$LABELS" >> $GITHUB_OUTPUT
echo "📋 PR #$PR_NUM labels: $LABELS"
else
echo "⚠️ Could not find PR number for SHA ${{ github.event.workflow_run.head_sha }}"
fi
- name: Build test matrix
id: build
run: |
# Configuration
MAX_EXTRA_TAGS=5 # Limit additional tags to prevent runner exhaustion
# Defaults - these ALWAYS run
TAGS="@ci-dashboard-set-1,@ci-dashboard-set-2"
SOURCE="default"
EXTRA_COUNT=0
# Priority 1: Manual input (workflow_dispatch)
if [[ -n "${{ inputs.additional_tags }}" ]]; then
for tag in $(echo "${{ inputs.additional_tags }}" | tr ',' ' '); do
if [[ $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then
TAGS="$TAGS,$tag"
EXTRA_COUNT=$((EXTRA_COUNT + 1))
fi
done
SOURCE="manual"
echo "📝 Added manual tags (limit: $MAX_EXTRA_TAGS)"
# Priority 2: PR labels (test:* pattern)
elif [[ -n "${{ steps.labels.outputs.labels }}" ]]; then
for label in $(echo "${{ steps.labels.outputs.labels }}" | tr ',' ' '); do
if [[ "$label" == test:* && $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then
tag="@${label#test:}"
tag="${tag#@}" # Remove double @
tag="@$tag"
TAGS="$TAGS,$tag"
EXTRA_COUNT=$((EXTRA_COUNT + 1))
SOURCE="pr-labels"
echo "🏷️ Label '$label' → $tag"
fi
done
fi
if [[ $EXTRA_COUNT -ge $MAX_EXTRA_TAGS ]]; then
echo "⚠️ Tag limit reached ($MAX_EXTRA_TAGS max). Some tags were not added."
fi
# Convert to JSON matrix (deduplicated)
MATRIX=$(echo "$TAGS" | tr ',' '\n' | sort -u | grep -v '^$' | \
sed 's/^[^@]/@&/' | jq -Rc '[., inputs] | unique' | jq -sc 'add | unique')
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
echo "source=$SOURCE" >> $GITHUB_OUTPUT
echo "🧪 Final matrix: $MATRIX (source: $SOURCE)"
# ---------------------------------------------------------------------------
# E2E Tests - Run Cypress tests for each tag in parallel
# ---------------------------------------------------------------------------
e2e-tests:
needs: [select-cluster, set-pending-status, get-test-tags]
runs-on: self-hosted
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
tag: ${{ fromJson(needs.get-test-tags.outputs.matrix) }}
env:
CLUSTER_NAME: ${{ needs.select-cluster.outputs.cluster_name }}
steps:
- name: Calculate unique port for this workflow run
run: |
# Dynamic port allocation for parallel execution
BASE_PORT=$((4000 + (${{ github.run_id }} % 1000) * 5))
# Add matrix offset to separate concurrent jobs within same PR
if [[ "${{ matrix.tag }}" == *"set-1"* ]]; then
MATRIX_OFFSET=0
elif [[ "${{ matrix.tag }}" == *"set-2"* ]]; then
MATRIX_OFFSET=1
else
MATRIX_OFFSET=2
fi
WEBPACK_PORT=$((BASE_PORT + MATRIX_OFFSET))
# Store port info with run_id for cleanup tracking
PORT_INFO_DIR="/tmp/gha-ports"
mkdir -p "$PORT_INFO_DIR"
echo "${{ github.run_id }}" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV
echo "PORT_INFO_FILE=$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" >> $GITHUB_ENV
echo "📍 Using port ${WEBPACK_PORT} for ${{ matrix.tag }} (run_id: ${{ github.run_id }})"
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.workflow_run.head_sha || github.sha }}
- name: Restore npm dependencies cache
uses: actions/cache/restore@v4
id: npm-cache
with:
path: |
~/.cache/Cypress
**/node_modules
key: ${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules-
- name: Setup Node.js ${{ env.NODE_VERSION }}
if: steps.npm-cache.outputs.cache-hit != 'true'
uses: actions/setup-node@v4.3.0
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
if: steps.npm-cache.outputs.cache-hit != 'true'
run: npm ci
- name: Restore turbo build artifacts cache
uses: actions/cache/restore@v4
with:
path: ${{ github.workspace }}/.turbo
key: ${{ runner.os }}-${{ env.NODE_VERSION }}-turbo-${{ github.sha }}-e2e
restore-keys: |
${{ runner.os }}-${{ env.NODE_VERSION }}-turbo-
- name: Restore OpenShift CLI tarball cache
uses: actions/cache/restore@v4
id: oc-cache
with:
path: ${{ runner.temp }}/oc.tar.gz
key: ${{ runner.os }}-oc-tarball-${{ env.OC_VERSION || '4.15.0' }}
- name: Download test configuration
run: |
echo "🔧 Downloading test configuration from GitLab..."
curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \
"${{ secrets.GITLAB_TEST_VARS_URL }}" \
-o ${{ github.workspace }}/packages/cypress/test-variables.yml
echo "✅ Downloaded test configuration"
- name: Login to OpenShift cluster
env:
OC_SERVER_PRIMARY: ${{ secrets.OC_SERVER_PRIMARY }}
OC_SERVER_SECONDARY: ${{ secrets.OC_SERVER }}
run: |
TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml"
# Extract credentials
OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ')
OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ')
echo "::add-mask::$OC_PASSWORD"
echo "::add-mask::$OC_USERNAME"
# Look up server URL based on selected cluster (avoids GitHub secret masking in outputs)
if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then
CLUSTER_URL="$OC_SERVER_PRIMARY"
elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then
CLUSTER_URL="$OC_SERVER_SECONDARY"
else
echo "❌ Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2
echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2
exit 1
fi
if [ -z "$CLUSTER_URL" ]; then
echo "❌ CLUSTER_URL is empty for cluster '$CLUSTER_NAME'" >&2
echo "Check that OC_SERVER_PRIMARY/OC_SERVER secrets are configured" >&2
exit 1
fi
echo "Logging in to OpenShift cluster ($CLUSTER_NAME)..."
oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$CLUSTER_URL" --insecure-skip-tls-verify > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "✅ Successfully logged in to $CLUSTER_NAME"
else
echo "❌ Failed to login to OpenShift cluster"
exit 1
fi
echo "KUBECONFIG=$HOME/.kube/config" >> $GITHUB_ENV
- name: Override namespace values
env:
DASHBOARD_URL_PRIMARY: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }}
DASHBOARD_URL_SECONDARY: ${{ secrets.ODH_DASHBOARD_URL }}
ODH_NAMESPACES: ${{ secrets.ODH_NAMESPACES }}
run: |
TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml"
# Look up dashboard URL based on selected cluster (secrets passed as step-level env for security)
if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then
DASHBOARD_URL="$DASHBOARD_URL_PRIMARY"
elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then
DASHBOARD_URL="$DASHBOARD_URL_SECONDARY"
else
echo "❌ Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2
echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2
exit 1
fi
if [ -z "$DASHBOARD_URL" ]; then
echo "❌ DASHBOARD_URL is empty for cluster '$CLUSTER_NAME'" >&2
echo "Check that ODH_DASHBOARD_URL_PRIMARY/ODH_DASHBOARD_URL secrets are configured" >&2
exit 1
fi
# Set dashboard URL for selected cluster
sed -i "s|^ODH_DASHBOARD_URL:.*|ODH_DASHBOARD_URL: $DASHBOARD_URL|" "$TEST_VARS_FILE"
if [ -z "$ODH_NAMESPACES" ]; then
echo "⚠️ ODH_NAMESPACES secret not set, skipping namespace override"
exit 0
fi
echo "::add-mask::$ODH_NAMESPACES"
echo "📝 Overriding namespaces with ODH values..."
IFS=',' read -r OPERATOR_NS APPLICATIONS_NS NOTEBOOKS_NS OPERATOR_NAME PROJECT_NAME <<< "$ODH_NAMESPACES"
sed -i "s|^PRODUCT:.*|PRODUCT: ODH|" "$TEST_VARS_FILE"
sed -i "s|^OPERATOR_NAMESPACE:.*|OPERATOR_NAMESPACE: $OPERATOR_NS|" "$TEST_VARS_FILE"
sed -i "s|^APPLICATIONS_NAMESPACE:.*|APPLICATIONS_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE"
sed -i "s|^MONITORING_NAMESPACE:.*|MONITORING_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE"
sed -i "s|^NOTEBOOKS_NAMESPACE:.*|NOTEBOOKS_NAMESPACE: $NOTEBOOKS_NS|" "$TEST_VARS_FILE"
sed -i "s|^OPERATOR_NAME:.*|OPERATOR_NAME: $OPERATOR_NAME|" "$TEST_VARS_FILE"
sed -i "s|^ODH_DASHBOARD_PROJECT_NAME:.*|ODH_DASHBOARD_PROJECT_NAME: $PROJECT_NAME|" "$TEST_VARS_FILE"
echo "✅ Namespace configuration updated"
- name: Set test configuration
run: |
echo "CY_TEST_CONFIG=${{ github.workspace }}/packages/cypress/test-variables.yml" >> $GITHUB_ENV
- name: Start Cypress Server
run: |
echo "🧹 Cleaning up port ${WEBPACK_PORT}..."
PORT_INFO_DIR="/tmp/gha-ports"
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
CURRENT_RUN_ID="${{ github.run_id }}"
# Check if port is in use
if lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; then
# Check if there's a run_id file for this port
if [ -f "$PORT_INFO_FILE" ]; then
PORT_OWNER_RUN_ID=$(cat "$PORT_INFO_FILE")
if [ "$PORT_OWNER_RUN_ID" != "$CURRENT_RUN_ID" ]; then
echo "⚠️ Port ${WEBPACK_PORT} is owned by different run_id: $PORT_OWNER_RUN_ID"
echo "⚠️ This port is in use by another workflow run - will not kill it"
# Try to find an alternative port
for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do
if ! lsof -i:${alt_port} > /dev/null 2>&1; then
WEBPACK_PORT=$alt_port
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "✅ Found alternative port: ${WEBPACK_PORT}"
break
fi
done
else
echo "✅ Port ${WEBPACK_PORT} is owned by this run - safe to clean up"
fi
else
# No run_id file - check if process is from a recent GitHub Actions run
PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
# Check if process is from a GitHub Actions workflow
if ps -p "$PORT_PID" -o command= 2>/dev/null | grep -q "webpack.*serve\|node.*40[0-9][0-9]"; then
echo "⚠️ Port ${WEBPACK_PORT} in use by potential GHA process (PID: $PORT_PID)"
echo "⚠️ Being cautious - will not kill without run_id confirmation"
# Find alternative port
for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do
if ! lsof -i:${alt_port} > /dev/null 2>&1; then
WEBPACK_PORT=$alt_port
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "✅ Found alternative port: ${WEBPACK_PORT}"
break
fi
done
else
echo "⚠️ Port ${WEBPACK_PORT} in use by non-GHA process - cleaning up"
kill -9 "$PORT_PID" 2>/dev/null || true
fi
fi
fi
fi
# Verify port is free with retry logic
RETRY_COUNT=0
while lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; do
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -gt 10 ]; then
echo "❌ Port ${WEBPACK_PORT} still in use after cleanup!"
lsof -i:${WEBPACK_PORT}
exit 1
fi
echo "⏳ Retrying cleanup... (attempt $RETRY_COUNT/10)"
sleep 2
done
# Claim the port with our run_id
mkdir -p "$PORT_INFO_DIR"
echo "$CURRENT_RUN_ID" > "$PORT_INFO_FILE"
echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV
echo "PORT_INFO_FILE=$PORT_INFO_FILE" >> $GITHUB_ENV
echo "✅ Port ${WEBPACK_PORT} is free and claimed by run_id: $CURRENT_RUN_ID"
echo "🚀 Starting webpack dev server on port ${WEBPACK_PORT} ($CLUSTER_NAME)..."
# Start webpack and filter sensitive output
cd frontend && ODH_PORT=${WEBPACK_PORT} npm run start:dev:ext > /tmp/webpack_${WEBPACK_PORT}.log 2>&1 &
SERVER_PID=$!
echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
echo "$SERVER_PID" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.pid"
# Give server time to initialize
sleep 20
# Show filtered webpack status (hide sensitive cluster URLs)
if [ -f /tmp/webpack_${WEBPACK_PORT}.log ]; then
tail -20 /tmp/webpack_${WEBPACK_PORT}.log | \
grep -v "Dashboard host:" | \
grep -v "Proxy created:" | \
grep -v "Logged in as user:" | \
grep -v "Using project:" || true
fi
- name: Wait for Server Ready
run: |
echo "⏳ Waiting for localhost:${WEBPACK_PORT} to be ready..."
npx wait-on http://localhost:${WEBPACK_PORT} --timeout 120000
# Verify the application loads with dashboard content
for i in {1..10}; do
if curl -s -f http://localhost:${WEBPACK_PORT}/ | grep -q "Data Science Projects\|ODH\|Open Data Hub\|Dashboard"; then
echo "✅ Server is ready and application is loaded!"
break
fi
if [ $i -lt 10 ]; then
echo "⏳ Waiting for application to load... (attempt $i/10)"
sleep 8
else
echo "❌ Application failed to load properly after 10 attempts"
exit 1
fi
done
- name: Run E2E Tests
run: |
cd frontend
echo "🧪 Running E2E tests for ${{ matrix.tag }}..."
echo "🚀 Running tests against live dashboard on port ${WEBPACK_PORT}"
echo "📌 Tag source: ${{ needs.get-test-tags.outputs.source }}"
export CY_RESULTS_DIR="${{ github.workspace }}/packages/cypress/results/${{ matrix.tag }}"
mkdir -p "$CY_RESULTS_DIR"
BASE_URL=http://localhost:${WEBPACK_PORT} npm run cypress:run:chrome -- \
--env skipTags="@Bug @Maintain @NonConcurrent",grepTags="${{ matrix.tag }}",grepFilterSpecs=true \
--config video=true,screenshotsFolder="$CY_RESULTS_DIR/screenshots",videosFolder="$CY_RESULTS_DIR/videos"
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: e2e-results-${{ matrix.tag }}
path: |
packages/cypress/results/
packages/cypress/videos/
packages/cypress/screenshots/
retention-days: 7
- name: Log test completion
if: always()
run: |
echo "🏁 E2E Test completed!"
echo "Status: ${{ job.status }}"
echo "Test Tag: ${{ matrix.tag }}"
echo "Cluster: $CLUSTER_NAME"
echo "Run ID: ${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Final Status - Update PR with test results
# ---------------------------------------------------------------------------
set-final-status:
needs: [select-cluster, e2e-tests]
if: >-
always() &&
(github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success'))
runs-on: ubuntu-latest
steps:
- name: Set final status
env:
GH_TOKEN: ${{ github.token }}
run: |
E2E_RESULT="${{ needs.e2e-tests.result }}"
CLUSTER_RESULT="${{ needs.select-cluster.result }}"
CLUSTER="${{ needs.select-cluster.outputs.cluster_name }}"
echo "📊 Job results: select-cluster=$CLUSTER_RESULT, e2e-tests=$E2E_RESULT"
# Handle cluster selection failure first
if [[ "$CLUSTER_RESULT" == "failure" ]]; then
STATE="failure"
DESC="Cluster health check failed - no healthy cluster available"
elif [[ "$E2E_RESULT" == "success" ]]; then
STATE="success"
DESC="All tests passed on $CLUSTER"
elif [[ "$E2E_RESULT" == "cancelled" ]]; then
STATE="error"
DESC="Tests cancelled"
elif [[ "$E2E_RESULT" == "skipped" && "$CLUSTER_RESULT" == "skipped" ]]; then
# Both skipped means test.yml failed - don't post status
echo "Both jobs skipped (test.yml likely failed) - not posting status"
exit 0
elif [[ "$E2E_RESULT" == "skipped" ]]; then
STATE="failure"
DESC="Tests skipped due to upstream failure"
else
STATE="failure"
DESC="Tests failed on ${CLUSTER:-unknown cluster}"
fi
echo "📝 Posting status: state=$STATE, description=$DESC"
gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \
-f state="$STATE" \
-f context="Cypress E2E Tests" \
-f description="$DESC" \
-f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Cleanup - Stop all servers started by this workflow run
# ---------------------------------------------------------------------------
cleanup-server:
needs: [e2e-tests]
runs-on: self-hosted
if: ${{ always() && (github.event_name == 'workflow_dispatch' || (github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success')) }}
steps:
- name: Stop Cypress Servers
run: |
echo "🛑 Stopping webpack dev server for run_id: ${{ github.run_id }}..."
PORT_INFO_DIR="/tmp/gha-ports"
CURRENT_RUN_ID="${{ github.run_id }}"
KILLED_COUNT=0
# Find all port files owned by this run_id
if [ -d "$PORT_INFO_DIR" ]; then
for port_file in "$PORT_INFO_DIR"/port-*.run_id; do
if [ -f "$port_file" ]; then
PORT_OWNER_RUN_ID=$(cat "$port_file")
if [ "$PORT_OWNER_RUN_ID" = "$CURRENT_RUN_ID" ]; then
# Extract port number from filename
PORT=$(basename "$port_file" | sed 's/port-\([0-9]*\)\.run_id/\1/')
PID_FILE="$PORT_INFO_DIR/port-${PORT}.pid"
# Kill process if PID file exists
if [ -f "$PID_FILE" ]; then
PID=$(cat "$PID_FILE")
if ps -p "$PID" > /dev/null 2>&1; then
echo "🛑 Killing process $PID on port $PORT (run_id: $CURRENT_RUN_ID)"
pkill -P "$PID" 2>/dev/null || true
kill "$PID" 2>/dev/null || true
KILLED_COUNT=$((KILLED_COUNT + 1))
fi
fi
# Also kill any process on this port (double-check)
PORT_PID=$(lsof -ti:${PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
echo "🛑 Killing process $PORT_PID on port $PORT"
pkill -P "$PORT_PID" 2>/dev/null || true
kill "$PORT_PID" 2>/dev/null || true
fi
# Clean up orphaned Chrome processes
ALL_PORT_PIDS=$(lsof -ti:${PORT} 2>/dev/null || true)
if [ -n "$ALL_PORT_PIDS" ]; then
for port_pid in $ALL_PORT_PIDS; do
if ps -p "$port_pid" -o comm= 2>/dev/null | grep -qE "chrome|chromium"; then
echo "🛑 Killing Chrome process $port_pid (using port $PORT)"
pkill -P "$port_pid" 2>/dev/null || true
kill "$port_pid" 2>/dev/null || true
fi
done
fi
# Clean up port info files
rm -f "$port_file" "$PID_FILE"
fi
fi
done
fi
# Clean up stale port files older than 24 hours
find "$PORT_INFO_DIR" -name "*.run_id" -mtime +1 -delete 2>/dev/null || true
find "$PORT_INFO_DIR" -name "*.pid" -mtime +1 -delete 2>/dev/null || true
if [ $KILLED_COUNT -eq 0 ]; then
echo "✅ No processes found for run_id: $CURRENT_RUN_ID"
else
echo "✅ Cleaned up $KILLED_COUNT process(es) for run_id: $CURRENT_RUN_ID"
fi