Cypress e2e Test #186
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Cypress e2e Test | |
| # ============================================================================= | |
| # E2E Test Workflow with Cluster Failover and Smart Test Selection | |
| # ============================================================================= | |
| # | |
| # TRIGGERS: | |
| # - Automatically after "Test" workflow completes on PRs | |
| # - Manually via workflow_dispatch (Actions tab β Run workflow) | |
| # | |
| # CLUSTER FAILOVER: | |
| # Primary: dash-e2e-int (checked first via DSC health) | |
| # Secondary: dash-e2e (used if primary is unhealthy) | |
| # Health Check: Logs into cluster β checks DSC conditions (Available, Degraded, odh-dashboardReady) | |
| # | |
| # TEST SELECTION (priority order): | |
| # Default (always run): | |
| # - @ci-dashboard-regression-tags | |
| # | |
| # 1. Manual input (workflow_dispatch): | |
| # Enter tags in 'additional_tags' field: @Pipelines,@Workbenches | |
| # | |
| # 2. PR labels (test:* pattern): | |
| # Add labels with 'test:' prefix to your PR: | |
| # test:Pipelines β @Pipelines | |
| # test:ModelServing β @ModelServing | |
| # test:Workbenches β @Workbenches | |
| # Any 'test:<TagName>' label maps to '@<TagName>' Cypress grep tag | |
| # | |
| # 3. Auto-detected from PR changes (always additive): | |
| # Turbo detects changed packages β reads "e2eCiTags" from package.json | |
| # Git diff detects changed frontend sub-areas β inline mapping resolves tags | |
| # All auto-detected tags are consolidated into ONE additional matrix job | |
| # | |
| # To add auto-detection for a package: | |
| # Add "e2eCiTags": ["@YourTagCI"] to the package's package.json | |
| # To add auto-detection for a frontend area: | |
| # Add an entry to .github/frontend-ci-tags.json | |
| # | |
| # BFF SUPPORT (Backend-For-Frontend): | |
| # Packages with bffConfig.enabled=true in their package.json are automatically | |
| # detected and started when changes are found. BFFs start after the frontend | |
| # webpack server. See docs/bff-e2e-testing.md for details. | |
| # | |
| # To add BFF support to a package: | |
| # Add "bffConfig" to package.json with enabled, port, healthEndpoint, | |
| # startCommand, and startCommandCluster properties | |
| # | |
| # LIMITS: | |
| # - Max 5 additional tags for labels/manual (prevents runner exhaustion) | |
| # - Auto-detected tags are consolidated into 1 job (no limit needed) | |
| # - 10 runners shared across 30+ devs | |
| # | |
| # REQUIRED SECRETS: | |
| # PRIMARY: OC_SERVER_PRIMARY, OCP_CONSOLE_URL_PRIMARY, ODH_DASHBOARD_URL_PRIMARY | |
| # SECONDARY: OC_SERVER, OCP_CONSOLE_URL, ODH_DASHBOARD_URL | |
| # AUTH: GITLAB_TOKEN, GITLAB_TEST_VARS_URL, ODH_NAMESPACES | |
| # ============================================================================= | |
| on: | |
| workflow_run: | |
| workflows: ["Test"] | |
| types: [completed] | |
| workflow_dispatch: | |
| inputs: | |
| additional_tags: | |
| description: 'Extra test tags (e.g., @Pipelines,@Workbenches)' | |
| required: false | |
| default: '' | |
| type: string | |
| concurrency: | |
| group: e2e-${{ github.event.workflow_run.head_branch || github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| actions: read | |
| statuses: write | |
| env: | |
| NODE_VERSION: 22.x | |
| DO_NOT_TRACK: 1 | |
| # ============================================================================= | |
| # JOBS | |
| # ============================================================================= | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Cluster Selection - Health check with automatic failover | |
| # --------------------------------------------------------------------------- | |
| select-cluster: | |
| if: >- | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.event == 'pull_request' && | |
| github.event.workflow_run.conclusion == 'success') | |
| runs-on: self-hosted | |
| outputs: | |
| cluster_name: ${{ steps.select.outputs.cluster_name }} | |
| steps: | |
| - name: Download test credentials | |
| run: | | |
| echo "π§ Downloading test credentials for cluster health check..." | |
| curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \ | |
| "${{ secrets.GITLAB_TEST_VARS_URL }}" \ | |
| -o /tmp/test-variables.yml | |
| echo "β Downloaded test credentials" | |
| - name: Select healthy cluster | |
| id: select | |
| env: | |
| PRIMARY_SERVER: ${{ secrets.OC_SERVER_PRIMARY }} | |
| PRIMARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }} | |
| SECONDARY_SERVER: ${{ secrets.OC_SERVER }} | |
| SECONDARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL }} | |
| run: | | |
| # Extract credentials from test-variables.yml | |
| TEST_VARS_FILE="/tmp/test-variables.yml" | |
| OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ') | |
| OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ') | |
| echo "::add-mask::$OC_PASSWORD" | |
| echo "::add-mask::$OC_USERNAME" | |
| # Check DSC health by logging in and verifying conditions | |
| check_dsc_health() { | |
| local server_url="$1" | |
| local cluster_name="$2" | |
| [[ -z "$server_url" ]] && echo " β Server URL is empty" && return 1 | |
| echo " π Attempting login to: $server_url" | |
| # Try to login | |
| LOGIN_OUTPUT=$(oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$server_url" --insecure-skip-tls-verify 2>&1) || true | |
| if ! oc whoami > /dev/null 2>&1; then | |
| echo " β Failed to login to $cluster_name" | |
| echo " π Login output: $LOGIN_OUTPUT" | head -5 | |
| return 1 | |
| fi | |
| echo " β Login successful" | |
| # Get DSC status with full output for debugging | |
| echo " π Fetching DataScienceCluster status..." | |
| DSC_JSON=$(oc get datasciencecluster -o json 2>&1) | |
| DSC_EXIT_CODE=$? | |
| if [[ $DSC_EXIT_CODE -ne 0 ]]; then | |
| echo " β Failed to get DSC (exit code: $DSC_EXIT_CODE)" | |
| echo " π Output: $DSC_JSON" | head -5 | |
| return 1 | |
| fi | |
| if [[ -z "$DSC_JSON" || "$DSC_JSON" == "null" || "$DSC_JSON" == '{"apiVersion":"datasciencecluster.opendatahub.io/v1","items":[],"kind":"List","metadata":{"resourceVersion":""}}' ]]; then | |
| echo " β No DataScienceCluster found on $cluster_name" | |
| return 1 | |
| fi | |
| # Print DSC name and status for debugging | |
| DSC_NAME=$(echo "$DSC_JSON" | jq -r '.items[0].metadata.name // "unknown"') | |
| echo " π¦ DSC Name: $DSC_NAME" | |
| # Check phase - this is the most reliable indicator | |
| PHASE=$(echo "$DSC_JSON" | jq -r '.items[0].status.phase // "Unknown"') | |
| echo " π DSC Phase: $PHASE" | |
| # Print all conditions for debugging | |
| echo " π DSC Conditions:" | |
| echo "$DSC_JSON" | jq -r '.items[0].status.conditions[]? | " - \(.type): \(.status) (\(.reason // "no reason"))"' 2>/dev/null || echo " (no conditions found)" | |
| # If phase is Ready, cluster is healthy | |
| if [[ "$PHASE" == "Ready" ]]; then | |
| echo " β DSC is Ready!" | |
| return 0 | |
| fi | |
| # Phase not Ready - check conditions for more detail | |
| AVAILABLE=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Available") | .status' 2>/dev/null || echo "") | |
| DEGRADED=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Degraded") | .status' 2>/dev/null || echo "") | |
| # Fallback: if conditions show healthy even though phase isn't Ready | |
| if [[ "$AVAILABLE" == "True" && "$DEGRADED" != "True" ]]; then | |
| echo " β Conditions look healthy despite phase=$PHASE" | |
| return 0 | |
| fi | |
| echo " β DSC not healthy (Phase: $PHASE, Available: $AVAILABLE, Degraded: $DEGRADED)" | |
| return 1 | |
| } | |
| echo "π Checking PRIMARY cluster (dash-e2e-int)..." | |
| if check_dsc_health "$PRIMARY_SERVER" "dash-e2e-int"; then | |
| echo "β PRIMARY cluster is healthy and ready" | |
| echo "cluster_name=dash-e2e-int" >> $GITHUB_OUTPUT | |
| else | |
| echo "" | |
| echo "β οΈ PRIMARY unavailable or not ready, trying SECONDARY (dash-e2e)..." | |
| if check_dsc_health "$SECONDARY_SERVER" "dash-e2e"; then | |
| echo "β SECONDARY cluster is healthy and ready" | |
| echo "cluster_name=dash-e2e" >> $GITHUB_OUTPUT | |
| else | |
| echo "" | |
| echo "β All clusters unavailable or unhealthy" | |
| exit 1 | |
| fi | |
| fi | |
| # Clean up credentials file | |
| rm -f /tmp/test-variables.yml | |
| # --------------------------------------------------------------------------- | |
| # Status - Set pending status on PR (independent - runs before cluster selection) | |
| # --------------------------------------------------------------------------- | |
| set-pending-status: | |
| if: >- | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.event == 'pull_request' && | |
| github.event.workflow_run.conclusion == 'success') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Set pending status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \ | |
| -f state=pending \ | |
| -f context="Cypress E2E Tests" \ | |
| -f description="E2E tests starting..." \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # --------------------------------------------------------------------------- | |
| # Tag Resolution - Build test matrix from defaults + PR labels/input + auto-detection | |
| # --------------------------------------------------------------------------- | |
| get-test-tags: | |
| needs: [select-cluster] | |
| runs-on: ubuntu-latest | |
| outputs: | |
| matrix: ${{ steps.build.outputs.matrix }} | |
| source: ${{ steps.build.outputs.source }} | |
| bff_packages: ${{ steps.detect.outputs.bff_packages }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event.workflow_run.head_sha || github.sha }} | |
| fetch-depth: 0 | |
| - name: Setup Node.js ${{ env.NODE_VERSION }} | |
| uses: actions/setup-node@v4.3.0 | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| cache: 'npm' | |
| - name: Get PR labels | |
| id: labels | |
| if: github.event_name == 'workflow_run' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| # Get PR number - try multiple methods for fork PR compatibility | |
| PR_NUM="${{ github.event.workflow_run.pull_requests[0].number }}" | |
| # Method 2: commits API (works for same-repo PRs) | |
| if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then | |
| PR_NUM=$(gh api "repos/${{ github.repository }}/commits/${{ github.event.workflow_run.head_sha }}/pulls" \ | |
| --jq '.[0].number' 2>/dev/null || echo "") | |
| fi | |
| # Method 3: search API (works for fork PRs) | |
| if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then | |
| PR_NUM=$(gh api "search/issues?q=repo:${{ github.repository }}+is:pr+is:open+sha:${{ github.event.workflow_run.head_sha }}" \ | |
| --jq '.items[0].number' 2>/dev/null || echo "") | |
| fi | |
| if [[ -n "$PR_NUM" && "$PR_NUM" != "null" ]]; then | |
| LABELS=$(gh api "repos/${{ github.repository }}/issues/$PR_NUM/labels" \ | |
| --jq '[.[].name] | join(",")' 2>/dev/null || echo "") | |
| echo "labels=$LABELS" >> $GITHUB_OUTPUT | |
| echo "π PR #$PR_NUM labels: $LABELS" | |
| else | |
| echo "β οΈ Could not find PR number for SHA ${{ github.event.workflow_run.head_sha }}" | |
| fi | |
| - name: Detect changed areas | |
| id: detect | |
| run: | | |
| # ================================================================= | |
| # Smart Test Selection: Detect changed areas and resolve CI tags | |
| # | |
| # Layer 1: Turbo detects changed packages (including frontend "//" workspace) | |
| # β reads e2eCiTags from each package.json (self-service, teams opt in) | |
| # Layer 2: For frontend changes detected by Layer 1, git diff identifies | |
| # sub-areas β .github/frontend-ci-tags.json mapping resolves CI tags | |
| # ================================================================= | |
| AUTO_TAGS="" | |
| # --- Layer 1: Turbo-based package detection --- | |
| echo "π Running Turbo change detection..." | |
| # Determine base ref for comparison | |
| if [[ "${{ github.event_name }}" == "workflow_run" ]]; then | |
| BASE_SHA="${{ github.event.workflow_run.pull_requests[0].base.sha || 'origin/main' }}" | |
| else | |
| BASE_SHA="origin/main" | |
| fi | |
| HEAD_SHA="${{ github.event.workflow_run.head_sha || github.sha }}" | |
| echo " π Comparing $BASE_SHA...$HEAD_SHA" | |
| # Get changed packages from turbo (uses dependency graph) | |
| CHANGED_PACKAGES=$(npx turbo run lint --dry=json --filter="...[$BASE_SHA...$HEAD_SHA]" 2>/dev/null \ | |
| | jq -r '.packages[]' 2>/dev/null || echo "") | |
| if [[ -n "$CHANGED_PACKAGES" ]]; then | |
| echo " π¦ Changed workspaces detected by Turbo:" | |
| echo "$CHANGED_PACKAGES" | while read -r pkg; do echo " - $pkg"; done | |
| # For each changed package, check for e2eCiTags in its package.json | |
| for pkg_dir in packages/*/; do | |
| pkg_name=$(jq -r '.name // empty' "$pkg_dir/package.json" 2>/dev/null) | |
| if echo "$CHANGED_PACKAGES" | grep -qx "$pkg_name"; then | |
| ci_tags=$(jq -r '.e2eCiTags[]? // empty' "$pkg_dir/package.json" 2>/dev/null) | |
| if [[ -n "$ci_tags" ]]; then | |
| for tag in $ci_tags; do | |
| echo " β $pkg_name β $tag" | |
| AUTO_TAGS="$AUTO_TAGS $tag" | |
| done | |
| else | |
| echo " βοΈ $pkg_name (no e2eCiTags β defaults only)" | |
| fi | |
| fi | |
| done | |
| else | |
| echo " βΉοΈ No package changes detected by Turbo" | |
| fi | |
| # --- Layer 2: Frontend sub-area detection --- | |
| # Turbo sees the entire frontend as one workspace. When it changes, | |
| # use git diff to identify which sub-areas were modified. | |
| # Note: Turbo reports the root/frontend workspace as "//" | |
| if echo "$CHANGED_PACKAGES" | grep -qx "//"; then | |
| echo "" | |
| echo "π Frontend changed β detecting sub-areas via git diff..." | |
| # Load frontend directory β CI tag mapping from external JSON file | |
| # To add a new area: edit .github/frontend-ci-tags.json | |
| MAPPING_FILE=".github/frontend-ci-tags.json" | |
| if [[ ! -f "$MAPPING_FILE" ]]; then | |
| echo " β οΈ $MAPPING_FILE not found β skipping frontend sub-area detection" | |
| else | |
| echo " π Loaded mappings from $MAPPING_FILE" | |
| # Get changed frontend files | |
| CHANGED_FILES=$(git diff --name-only "$BASE_SHA"..."$HEAD_SHA" -- frontend/src/ 2>/dev/null || echo "") | |
| if [[ -n "$CHANGED_FILES" ]]; then | |
| # Scan pages/, concepts/, api/, routes/ using the same mapping | |
| for src_dir in pages concepts api routes; do | |
| DIRS=$(echo "$CHANGED_FILES" | grep "^frontend/src/$src_dir/" | \ | |
| sed "s|^frontend/src/$src_dir/||" | cut -d'/' -f1 | sort -u) | |
| for dir in $DIRS; do | |
| tag=$(jq -r --arg d "$dir" '.[$d] // empty' "$MAPPING_FILE") | |
| if [[ -n "$tag" ]]; then | |
| echo " β $src_dir/$dir β $tag" | |
| AUTO_TAGS="$AUTO_TAGS $tag" | |
| fi | |
| done | |
| done | |
| fi | |
| fi | |
| fi | |
| # Deduplicate auto-detected tags | |
| if [[ -n "$AUTO_TAGS" ]]; then | |
| AUTO_TAGS=$(echo "$AUTO_TAGS" | tr ' ' '\n' | sort -u | tr '\n' ' ' | xargs) | |
| echo "" | |
| echo "π·οΈ Auto-detected CI tags: $AUTO_TAGS" | |
| else | |
| echo "" | |
| echo "βΉοΈ No area-specific CI tags detected β defaults only" | |
| fi | |
| echo "auto_tags=$AUTO_TAGS" >> $GITHUB_OUTPUT | |
| # --- BFF Package Detection --- | |
| # Detect changed packages that have bffConfig.enabled=true | |
| echo "" | |
| echo "π Detecting BFF packages to start..." | |
| BFF_PACKAGES="[]" | |
| if [[ -n "$CHANGED_PACKAGES" ]]; then | |
| for pkg_dir in packages/*/; do | |
| pkg_name=$(jq -r '.name // empty' "$pkg_dir/package.json" 2>/dev/null) | |
| # Check if this package changed | |
| if echo "$CHANGED_PACKAGES" | grep -qx "$pkg_name"; then | |
| # Check if package has bffConfig with enabled=true | |
| bff_enabled=$(jq -r '.bffConfig.enabled // false' "$pkg_dir/package.json" 2>/dev/null) | |
| if [[ "$bff_enabled" == "true" ]]; then | |
| bff_port=$(jq -r '.bffConfig.port // 4000' "$pkg_dir/package.json" 2>/dev/null) | |
| bff_health=$(jq -r '.bffConfig.healthEndpoint // "/api/health"' "$pkg_dir/package.json" 2>/dev/null) | |
| bff_cmd=$(jq -r '.bffConfig.startCommandCluster // .bffConfig.startCommand' "$pkg_dir/package.json" 2>/dev/null) | |
| pkg_dir_name=$(basename "$pkg_dir") | |
| echo " β $pkg_name (port: $bff_port, health: $bff_health)" | |
| # Add to JSON array | |
| BFF_PACKAGES=$(echo "$BFF_PACKAGES" | jq -c --arg name "$pkg_name" --arg dir "$pkg_dir_name" --arg port "$bff_port" --arg health "$bff_health" --arg cmd "$bff_cmd" '. + [{"name": $name, "dir": $dir, "port": ($port | tonumber), "healthEndpoint": $health, "startCommand": $cmd}]') | |
| fi | |
| fi | |
| done | |
| fi | |
| if [[ "$BFF_PACKAGES" == "[]" ]]; then | |
| echo " βΉοΈ No BFF packages detected for this change" | |
| fi | |
| echo "bff_packages=$BFF_PACKAGES" >> $GITHUB_OUTPUT | |
| - name: Build test matrix | |
| id: build | |
| run: | | |
| # Configuration | |
| MAX_EXTRA_TAGS=5 # Limit additional tags to prevent runner exhaustion (for labels/manual only) | |
| # Defaults - these ALWAYS run | |
| TAGS="@ci-dashboard-regression-tags" | |
| SOURCE="default" | |
| EXTRA_COUNT=0 | |
| AUTO_DETECTED_ENTRY="" | |
| # Priority 1: Manual input (workflow_dispatch) | |
| if [[ -n "${{ inputs.additional_tags }}" ]]; then | |
| for tag in $(echo "${{ inputs.additional_tags }}" | tr ',' ' '); do | |
| if [[ $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then | |
| TAGS="$TAGS,$tag" | |
| EXTRA_COUNT=$((EXTRA_COUNT + 1)) | |
| fi | |
| done | |
| SOURCE="manual" | |
| echo "π Added manual tags (limit: $MAX_EXTRA_TAGS)" | |
| # Priority 2: PR labels (test:* pattern) | |
| elif [[ -n "${{ steps.labels.outputs.labels }}" ]]; then | |
| for label in $(echo "${{ steps.labels.outputs.labels }}" | tr ',' ' '); do | |
| if [[ "$label" == test:* && $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then | |
| tag="@${label#test:}" | |
| tag="${tag#@}" # Remove double @ | |
| tag="@$tag" | |
| TAGS="$TAGS,$tag" | |
| EXTRA_COUNT=$((EXTRA_COUNT + 1)) | |
| SOURCE="pr-labels" | |
| echo "π·οΈ Label '$label' β $tag" | |
| fi | |
| done | |
| fi | |
| if [[ $EXTRA_COUNT -ge $MAX_EXTRA_TAGS ]]; then | |
| echo "β οΈ Tag limit reached ($MAX_EXTRA_TAGS max). Some tags were not added." | |
| fi | |
| # Priority 3: Auto-detected from PR changes (always additive, consolidated into ONE job) | |
| AUTO_TAGS="${{ steps.detect.outputs.auto_tags }}" | |
| if [[ -n "$AUTO_TAGS" ]]; then | |
| # Remove any auto-detected tags that already appear in manual/label TAGS | |
| # to prevent the same tests running in two separate matrix jobs | |
| EXISTING_TAGS=$(echo "$TAGS" | tr ',' '\n' | sort -u) | |
| FILTERED_AUTO="" | |
| for auto_tag in $AUTO_TAGS; do | |
| if echo "$EXISTING_TAGS" | grep -qx "$auto_tag"; then | |
| echo "βοΈ Skipping $auto_tag from auto-detected (already in manual/label tags)" | |
| else | |
| FILTERED_AUTO="$FILTERED_AUTO $auto_tag" | |
| fi | |
| done | |
| FILTERED_AUTO=$(echo "$FILTERED_AUTO" | xargs) | |
| if [[ -n "$FILTERED_AUTO" ]]; then | |
| # Consolidate remaining auto-detected tags into a single matrix entry | |
| # Cypress grep treats space-separated tags as OR, so one job covers all areas | |
| AUTO_DETECTED_ENTRY="$FILTERED_AUTO" | |
| if [[ "$SOURCE" == "default" ]]; then | |
| SOURCE="auto-detected" | |
| else | |
| SOURCE="$SOURCE+auto-detected" | |
| fi | |
| echo "π€ Auto-detected tags (consolidated into 1 job): $AUTO_DETECTED_ENTRY" | |
| else | |
| echo "βΉοΈ All auto-detected tags already covered by manual/label tags" | |
| fi | |
| fi | |
| # Convert to JSON matrix (deduplicated) | |
| MATRIX=$(echo "$TAGS" | tr ',' '\n' | sort -u | grep -v '^$' | \ | |
| sed 's/^[^@]/@&/' | jq -Rc '[., inputs] | unique' | jq -sc 'add | unique') | |
| # Append the consolidated auto-detected entry as a single matrix item | |
| if [[ -n "$AUTO_DETECTED_ENTRY" ]]; then | |
| MATRIX=$(echo "$MATRIX" | jq -c --arg entry "$AUTO_DETECTED_ENTRY" '. + [$entry] | unique') | |
| fi | |
| # Ensure compact JSON for GitHub Actions output | |
| MATRIX=$(echo "$MATRIX" | jq -c '.') | |
| echo "matrix=$MATRIX" >> $GITHUB_OUTPUT | |
| echo "source=$SOURCE" >> $GITHUB_OUTPUT | |
| echo "π§ͺ Final matrix: $MATRIX (source: $SOURCE)" | |
| # --------------------------------------------------------------------------- | |
| # E2E Tests - Run Cypress tests for each tag in parallel | |
| # --------------------------------------------------------------------------- | |
| e2e-tests: | |
| needs: [select-cluster, set-pending-status, get-test-tags] | |
| runs-on: self-hosted | |
| timeout-minutes: 30 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| tag: ${{ fromJson(needs.get-test-tags.outputs.matrix) }} | |
| env: | |
| CLUSTER_NAME: ${{ needs.select-cluster.outputs.cluster_name }} | |
| steps: | |
| - name: Check Disk Space | |
| run: | | |
| echo "π Checking available disk space..." | |
| DISK_USAGE=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') | |
| DISK_AVAIL=$(df -h / | tail -1 | awk '{print $4}') | |
| echo "πΎ Disk usage: ${DISK_USAGE}% (${DISK_AVAIL} available)" | |
| echo "DISK_USAGE=$DISK_USAGE" >> $GITHUB_ENV | |
| if [ "$DISK_USAGE" -ge 95 ]; then | |
| echo "β CRITICAL: Disk usage is ${DISK_USAGE}% - will attempt emergency cleanup" | |
| echo "EMERGENCY_CLEANUP=true" >> $GITHUB_ENV | |
| elif [ "$DISK_USAGE" -ge 90 ]; then | |
| echo "β οΈ HIGH: Disk usage is ${DISK_USAGE}% - will attempt aggressive cleanup" | |
| echo "EMERGENCY_CLEANUP=true" >> $GITHUB_ENV | |
| elif [ "$DISK_USAGE" -ge 85 ]; then | |
| echo "β οΈ WARNING: Disk usage is ${DISK_USAGE}% - cleanup recommended" | |
| echo " The cleanup job will run after this workflow completes" | |
| echo "EMERGENCY_CLEANUP=false" >> $GITHUB_ENV | |
| else | |
| echo "β Disk space OK (${DISK_USAGE}% used)" | |
| echo "EMERGENCY_CLEANUP=false" >> $GITHUB_ENV | |
| fi | |
| - name: Emergency Cleanup (if disk space critical) | |
| if: env.EMERGENCY_CLEANUP == 'true' | |
| run: | | |
| echo "π¨ EMERGENCY CLEANUP - Disk usage: ${DISK_USAGE}%" | |
| RUNNER_USER=$(whoami) | |
| HOME_DIR=$(eval echo "~$RUNNER_USER") | |
| CURRENT_WORK_DIR="${{ github.workspace }}" | |
| # Determine how aggressive to be based on disk usage | |
| if [ "$DISK_USAGE" -ge 95 ]; then | |
| AGE_THRESHOLD=1 # CRITICAL: Clean anything >1 day old | |
| echo "β οΈ CRITICAL MODE: Cleaning files >1 day old" | |
| else | |
| AGE_THRESHOLD=7 # Normal: Clean anything >7 days old | |
| echo "β οΈ AGGRESSIVE MODE: Cleaning files >7 days old" | |
| fi | |
| echo "" | |
| if [ "$DISK_USAGE" -ge 95 ]; then | |
| echo "π‘οΈ PARALLEL-SAFE PROTECTIONS (FAST MODE - disk critically full):" | |
| echo " β Current workspace (this job)" | |
| echo " β Active GitHub Actions Runner.Worker process directories" | |
| echo " β‘ Skipping slow checks (lsof, find) for speed" | |
| else | |
| echo "π‘οΈ PARALLEL-SAFE PROTECTIONS (THOROUGH MODE):" | |
| echo " β Current workspace (this job)" | |
| echo " β Active GitHub Actions Runner.Worker processes" | |
| echo " β Directories with open files (lsof with 5s timeout)" | |
| echo " β Directories accessed in last 10 minutes" | |
| fi | |
| echo "" | |
| # Get list of ALL active work directories from currently running GitHub Actions jobs | |
| # This is the safest way to avoid deleting directories from parallel PRs | |
| echo "π Detecting active work directories from parallel jobs (with timeout)..." | |
| ACTIVE_WORK_DIRS=() | |
| ACTIVE_JOBS=0 | |
| # Use faster method: check for active processes, then only protect their workspace | |
| ACTIVE_PIDS=$(pgrep -f "Runner.Worker" -u "$RUNNER_USER" 2>/dev/null || true) | |
| if [ -n "$ACTIVE_PIDS" ]; then | |
| echo " Found active Runner.Worker processes: $ACTIVE_PIDS" | |
| # Get working directories of active processes using lsof (much faster than find) | |
| for pid in $ACTIVE_PIDS; do | |
| ACTIVE_JOBS=$((ACTIVE_JOBS + 1)) | |
| # Get the CWD of this process | |
| if [ -L "/proc/$pid/cwd" ]; then | |
| WORK_CWD=$(readlink "/proc/$pid/cwd" 2>/dev/null || true) | |
| if [[ "$WORK_CWD" == *"odh-dashboard"* ]]; then | |
| # Extract the odh-dashboard directory path | |
| WORK_DIR=$(echo "$WORK_CWD" | sed 's|/odh-dashboard/.*|/odh-dashboard|') | |
| ACTIVE_WORK_DIRS+=("$WORK_DIR") | |
| echo " π‘οΈ Protected: $WORK_DIR (PID $pid)" | |
| fi | |
| fi | |
| done | |
| fi | |
| echo " Found $ACTIVE_JOBS active runner(s) with ${#ACTIVE_WORK_DIRS[@]} protected work directory(ies)" | |
| # Helper function to check if directory is in use by active runner | |
| is_directory_in_use() { | |
| local dir="$1" | |
| # 1. Skip current workspace (absolute must) | |
| if [[ "$dir" == "$CURRENT_WORK_DIR"* ]]; then | |
| return 0 # In use (current job) | |
| fi | |
| # 2. Check if directory is in the active work dirs list (FAST) | |
| for active_dir in "${ACTIVE_WORK_DIRS[@]}"; do | |
| if [[ "$dir" == "$active_dir"* ]]; then | |
| return 0 # In use (active job) | |
| fi | |
| done | |
| # 3. In CRITICAL mode (disk β₯95%), skip slow checks - rely on active work dirs only | |
| if [ "$DISK_USAGE" -ge 95 ]; then | |
| return 1 # Not in active list, safe to delete (fast path) | |
| fi | |
| # 4. Normal mode: Do thorough checks | |
| # Check for ANY processes using this directory (can be slow) | |
| if timeout 5 lsof +D "$dir" 2>/dev/null | grep -q .; then | |
| return 0 # In use (has open files) | |
| fi | |
| # 5. Check if directory was accessed very recently (last 10 minutes only) | |
| if find "$dir" -maxdepth 0 -amin -10 2>/dev/null | grep -q .; then | |
| return 0 # In use (very recent activity) | |
| fi | |
| return 1 # Not in use (safe to delete) | |
| } | |
| echo "" | |
| echo "ποΈ Step 1: Cleaning Go upstream builds (age: >$AGE_THRESHOLD days)..." | |
| UPSTREAM_CLEANED=0 | |
| find "$HOME_DIR"/actions-runner*/_work -type d -path "*/packages/*/upstream" -mtime +$AGE_THRESHOLD 2>/dev/null | while read upstream_dir; do | |
| # Extract work_dir by going up to odh-dashboard parent | |
| work_dir=$(echo "$upstream_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|') | |
| if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then | |
| rm -rf "$upstream_dir" 2>/dev/null && echo " β Cleaned: $upstream_dir" && UPSTREAM_CLEANED=$((UPSTREAM_CLEANED + 1)) || true | |
| fi | |
| done | |
| echo "" | |
| echo "ποΈ Step 2: Cleaning old work directories (age: >$AGE_THRESHOLD days, with multi-layer safety)..." | |
| CLEANED_COUNT=0 | |
| SKIPPED_COUNT=0 | |
| find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +$AGE_THRESHOLD 2>/dev/null | while read work_dir; do | |
| if [ -d "$work_dir" ]; then | |
| if is_directory_in_use "$work_dir"; then | |
| echo " βοΈ Protected (in use): $work_dir" | |
| SKIPPED_COUNT=$((SKIPPED_COUNT + 1)) | |
| else | |
| SIZE_BEFORE=$(du -sh "$work_dir" 2>/dev/null | awk '{print $1}') | |
| if rm -rf "$work_dir" 2>/dev/null; then | |
| echo " β Cleaned $SIZE_BEFORE: $work_dir" | |
| CLEANED_COUNT=$((CLEANED_COUNT + 1)) | |
| fi | |
| fi | |
| fi | |
| done | |
| echo " π Cleaned: $CLEANED_COUNT, Protected: $SKIPPED_COUNT" | |
| echo "" | |
| echo "ποΈ Step 3: Cleaning Cypress artifacts (age: >$AGE_THRESHOLD days)..." | |
| SCREENSHOTS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_work -path "*/cypress/results/screenshots/*" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l) | |
| VIDEOS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_work -path "*/cypress/results/videos/*" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l) | |
| echo " β Cleaned $SCREENSHOTS_CLEANED screenshots, $VIDEOS_CLEANED videos" | |
| echo "" | |
| echo "ποΈ Step 4: Cleaning runner logs (age: >$AGE_THRESHOLD days)..." | |
| LOGS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l) | |
| echo " β Cleaned $LOGS_CLEANED log files" | |
| echo "" | |
| echo "ποΈ Step 5: Cleaning node_modules in old work dirs (age: >$AGE_THRESHOLD days)..." | |
| find "$HOME_DIR"/actions-runner*/_work -type d -name "node_modules" -mtime +$AGE_THRESHOLD 2>/dev/null | while read nm_dir; do | |
| # Extract work_dir by going up to odh-dashboard parent | |
| work_dir=$(echo "$nm_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|') | |
| if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then | |
| SIZE_BEFORE=$(du -sh "$nm_dir" 2>/dev/null | awk '{print $1}') | |
| rm -rf "$nm_dir" 2>/dev/null && echo " β Cleaned $SIZE_BEFORE node_modules: $nm_dir" || true | |
| fi | |
| done | |
| echo "" | |
| echo "ποΈ Step 6: Cleaning .turbo cache in old work dirs (age: >$AGE_THRESHOLD days)..." | |
| find "$HOME_DIR"/actions-runner*/_work -type d -name ".turbo" -mtime +$AGE_THRESHOLD 2>/dev/null | while read turbo_dir; do | |
| # Extract work_dir by going up to odh-dashboard parent | |
| work_dir=$(echo "$turbo_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|') | |
| if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then | |
| SIZE_BEFORE=$(du -sh "$turbo_dir" 2>/dev/null | awk '{print $1}') | |
| rm -rf "$turbo_dir" 2>/dev/null && echo " β Cleaned $SIZE_BEFORE .turbo: $turbo_dir" || true | |
| fi | |
| done | |
| echo "" | |
| echo "π Disk usage after emergency cleanup:" | |
| DISK_USAGE_AFTER=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') | |
| DISK_AVAIL_AFTER=$(df -h / | tail -1 | awk '{print $4}') | |
| echo "πΎ Disk usage: ${DISK_USAGE_AFTER}% (${DISK_AVAIL_AFTER} available)" | |
| FREED=$((DISK_USAGE - DISK_USAGE_AFTER)) | |
| if [ "$FREED" -gt 0 ]; then | |
| echo "β Freed: ${FREED}% disk space" | |
| else | |
| echo "β οΈ Freed: 0% disk space (no files met age threshold)" | |
| fi | |
| # Show what's taking up space | |
| echo "" | |
| echo "π Top disk usage on runner:" | |
| du -sh "$HOME_DIR"/actions-runner*/_work/* 2>/dev/null | sort -rh | head -5 || true | |
| # Decide whether to fail or continue | |
| if [ "$DISK_USAGE_AFTER" -ge 95 ]; then | |
| if [ "$FREED" -gt 0 ]; then | |
| echo "β οΈ WARNING: Still at ${DISK_USAGE_AFTER}% after cleanup, but freed ${FREED}%" | |
| echo " Attempting to proceed - job may fail if more space is needed" | |
| else | |
| echo "β CRITICAL: Still at ${DISK_USAGE_AFTER}% after cleanup and freed 0%" | |
| echo " All work directories are either:" | |
| echo " - Currently in use by active jobs" | |
| echo " - Created within the last $AGE_THRESHOLD day(s)" | |
| echo "" | |
| echo "π Diagnosis - Active work directories:" | |
| find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d 2>/dev/null | while read work_dir; do | |
| MTIME=$(stat -f %m "$work_dir" 2>/dev/null || stat -c %Y "$work_dir" 2>/dev/null || echo "0") | |
| AGE_DAYS=$(( ($(date +%s) - MTIME) / 86400 )) | |
| SIZE=$(du -sh "$work_dir" 2>/dev/null | awk '{print $1}') | |
| echo " - $work_dir: $SIZE, age: ${AGE_DAYS} days" | |
| done | |
| echo "" | |
| echo " Manual intervention required on runner $(hostname)" | |
| exit 1 | |
| fi | |
| elif [ "$DISK_USAGE_AFTER" -ge 90 ]; then | |
| echo "β οΈ WARNING: Still at ${DISK_USAGE_AFTER}% after cleanup" | |
| echo " Job will proceed but may fail due to space" | |
| else | |
| echo "β Cleanup successful - proceeding with tests" | |
| fi | |
| - name: Calculate unique port for this workflow run | |
| run: | | |
| # Dynamic port allocation for parallel execution | |
| BASE_PORT=$((4000 + (${{ github.run_id }} % 1000) * 5)) | |
| # Add matrix offset to separate concurrent jobs within same PR | |
| if [[ "${{ matrix.tag }}" == *"set-1"* ]]; then | |
| MATRIX_OFFSET=0 | |
| elif [[ "${{ matrix.tag }}" == *"set-2"* ]]; then | |
| MATRIX_OFFSET=1 | |
| else | |
| MATRIX_OFFSET=2 | |
| fi | |
| WEBPACK_PORT=$((BASE_PORT + MATRIX_OFFSET)) | |
| PORT_INFO_DIR="/tmp/gha-ports" | |
| mkdir -p "$PORT_INFO_DIR" | |
| echo "π Calculated port ${WEBPACK_PORT} for ${{ matrix.tag }} (run_id: ${{ github.run_id }})" | |
| # SAFE port conflict resolution - only clean orphaned processes | |
| if lsof -ti:${WEBPACK_PORT} > /dev/null 2>&1; then | |
| echo "β οΈ Port ${WEBPACK_PORT} is currently in use - checking ownership..." | |
| PORT_OWNER_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" | |
| if [ -f "$PORT_OWNER_FILE" ]; then | |
| OWNER_RUN_ID=$(cat "$PORT_OWNER_FILE") | |
| FILE_AGE=$(($(date +%s) - $(stat -f %m "$PORT_OWNER_FILE" 2>/dev/null || stat -c %Y "$PORT_OWNER_FILE" 2>/dev/null))) | |
| # Only kill if the owning run is OLD (>30 minutes = likely completed/stuck) | |
| if [ "$FILE_AGE" -gt 1800 ]; then | |
| echo "π§Ή Port owned by old run_id $OWNER_RUN_ID (${FILE_AGE}s old) - cleaning up..." | |
| PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1) | |
| if [ -n "$PORT_PID" ]; then | |
| pkill -P "$PORT_PID" 2>/dev/null || true | |
| kill -9 "$PORT_PID" 2>/dev/null || true | |
| sleep 2 | |
| if lsof -ti:${WEBPACK_PORT} > /dev/null 2>&1; then | |
| echo "β Failed to clean orphaned process - port still in use" | |
| echo " This may require manual intervention on the runner" | |
| exit 1 | |
| else | |
| echo "β Successfully cleaned orphaned process" | |
| fi | |
| fi | |
| else | |
| echo "β οΈ Port owned by recent run_id $OWNER_RUN_ID (${FILE_AGE}s old)" | |
| echo " This is likely an active parallel PR test - DO NOT KILL" | |
| echo "β Port conflict detected - please retry workflow in a few minutes" | |
| exit 1 | |
| fi | |
| else | |
| echo "β οΈ Port in use but no owner info found - checking process age..." | |
| PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1) | |
| if [ -n "$PORT_PID" ]; then | |
| # Check process start time (macOS: -o etime, Linux: -o etimes) | |
| PROCESS_AGE=$(ps -o etimes= -p "$PORT_PID" 2>/dev/null || echo "unknown") | |
| if [ "$PROCESS_AGE" != "unknown" ] && [ "$PROCESS_AGE" -gt 1800 ]; then | |
| echo "π§Ή Orphaned process (${PROCESS_AGE}s old) - cleaning up..." | |
| pkill -P "$PORT_PID" 2>/dev/null || true | |
| kill -9 "$PORT_PID" 2>/dev/null || true | |
| sleep 2 | |
| else | |
| echo "β Port in use by recent process - may be parallel PR test" | |
| echo " Please retry workflow in a few minutes" | |
| exit 1 | |
| fi | |
| fi | |
| fi | |
| else | |
| echo "β Port ${WEBPACK_PORT} is available" | |
| fi | |
| # Store port info with run_id for cleanup tracking | |
| echo "${{ github.run_id }}" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" | |
| echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV | |
| echo "PORT_INFO_FILE=$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" >> $GITHUB_ENV | |
| - name: Cleanup old test artifacts | |
| continue-on-error: true | |
| run: | | |
| echo "π§Ή Cleaning up old test artifacts (>2 days)..." | |
| # Clean old Cypress results/screenshots/videos from workspace (>2 days old) | |
| find ${{ github.workspace }}/packages/cypress/results -type f -mtime +2 -delete 2>/dev/null || true | |
| find ${{ github.workspace }}/packages/cypress/screenshots -type f -mtime +2 -delete 2>/dev/null || true | |
| find ${{ github.workspace }}/packages/cypress/videos -type f -mtime +2 -delete 2>/dev/null || true | |
| # Clean old webpack logs (>2 days old) | |
| find /tmp -name "webpack_*.log" -type f -mtime +2 -delete 2>/dev/null || true | |
| # Note: ~/.cache/Cypress is managed by actions/cache and should not be cleaned here | |
| # to avoid removing the Cypress binary on shared self-hosted runners | |
| # Clean old temporary yaml files (>2 days old) | |
| find /tmp -name "cypress-yaml-*.yaml" -type f -mtime +2 -delete 2>/dev/null || true | |
| # Clean empty directories | |
| find ${{ github.workspace }}/packages/cypress/results -type d -empty -delete 2>/dev/null || true | |
| find ${{ github.workspace }}/packages/cypress/screenshots -type d -empty -delete 2>/dev/null || true | |
| find ${{ github.workspace }}/packages/cypress/videos -type d -empty -delete 2>/dev/null || true | |
| echo "β Cleanup complete (non-critical, continued on any errors)" | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event.workflow_run.head_sha || github.sha }} | |
| - name: Restore npm dependencies cache | |
| uses: actions/cache/restore@v4 | |
| id: npm-cache | |
| with: | |
| path: | | |
| ~/.cache/Cypress | |
| **/node_modules | |
| key: ${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules-${{ hashFiles('**/package-lock.json') }} | |
| restore-keys: | | |
| ${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules- | |
| - name: Setup Node.js ${{ env.NODE_VERSION }} | |
| if: steps.npm-cache.outputs.cache-hit != 'true' | |
| uses: actions/setup-node@v4.3.0 | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| - name: Install dependencies | |
| if: steps.npm-cache.outputs.cache-hit != 'true' | |
| run: npm ci | |
| - name: Restore turbo build artifacts cache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: ${{ github.workspace }}/.turbo | |
| key: ${{ runner.os }}-${{ env.NODE_VERSION }}-turbo-${{ github.sha }}-e2e | |
| restore-keys: | | |
| ${{ runner.os }}-${{ env.NODE_VERSION }}-turbo- | |
| - name: Restore OpenShift CLI tarball cache | |
| uses: actions/cache/restore@v4 | |
| id: oc-cache | |
| with: | |
| path: ${{ runner.temp }}/oc.tar.gz | |
| key: ${{ runner.os }}-oc-tarball-${{ env.OC_VERSION || '4.15.0' }} | |
| - name: Download test configuration | |
| run: | | |
| echo "π§ Downloading test configuration from GitLab..." | |
| curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \ | |
| "${{ secrets.GITLAB_TEST_VARS_URL }}" \ | |
| -o ${{ github.workspace }}/packages/cypress/test-variables.yml | |
| echo "β Downloaded test configuration" | |
| - name: Login to OpenShift cluster | |
| env: | |
| OC_SERVER_PRIMARY: ${{ secrets.OC_SERVER_PRIMARY }} | |
| OC_SERVER_SECONDARY: ${{ secrets.OC_SERVER }} | |
| run: | | |
| TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml" | |
| # Extract credentials based on test type | |
| if [[ "${{ matrix.tag }}" == "@NonAdmin" ]]; then | |
| echo "π Using non-admin credentials (TEST_USER_3) for @NonAdmin tests" | |
| OC_USERNAME=$(grep -A 10 "^TEST_USER_3:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ') | |
| OC_PASSWORD=$(grep -A 10 "^TEST_USER_3:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ') | |
| else | |
| OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ') | |
| OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ') | |
| fi | |
| echo "::add-mask::$OC_PASSWORD" | |
| echo "::add-mask::$OC_USERNAME" | |
| # Look up server URL based on selected cluster (avoids GitHub secret masking in outputs) | |
| if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then | |
| CLUSTER_URL="$OC_SERVER_PRIMARY" | |
| elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then | |
| CLUSTER_URL="$OC_SERVER_SECONDARY" | |
| else | |
| echo "β Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2 | |
| echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2 | |
| exit 1 | |
| fi | |
| if [ -z "$CLUSTER_URL" ]; then | |
| echo "β CLUSTER_URL is empty for cluster '$CLUSTER_NAME'" >&2 | |
| echo "Check that OC_SERVER_PRIMARY/OC_SERVER secrets are configured" >&2 | |
| exit 1 | |
| fi | |
| echo "Logging in to OpenShift cluster ($CLUSTER_NAME)..." | |
| oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$CLUSTER_URL" --insecure-skip-tls-verify > /dev/null 2>&1 | |
| if [ $? -eq 0 ]; then | |
| echo "β Successfully logged in to $CLUSTER_NAME" | |
| else | |
| echo "β Failed to login to OpenShift cluster" | |
| exit 1 | |
| fi | |
| echo "KUBECONFIG=$HOME/.kube/config" >> $GITHUB_ENV | |
| - name: Override namespace values | |
| env: | |
| DASHBOARD_URL_PRIMARY: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }} | |
| DASHBOARD_URL_SECONDARY: ${{ secrets.ODH_DASHBOARD_URL }} | |
| ODH_NAMESPACES: ${{ secrets.ODH_NAMESPACES }} | |
| run: | | |
| TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml" | |
| # Look up dashboard URL based on selected cluster (secrets passed as step-level env for security) | |
| if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then | |
| DASHBOARD_URL="$DASHBOARD_URL_PRIMARY" | |
| elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then | |
| DASHBOARD_URL="$DASHBOARD_URL_SECONDARY" | |
| else | |
| echo "β Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2 | |
| echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2 | |
| exit 1 | |
| fi | |
| if [ -z "$DASHBOARD_URL" ]; then | |
| echo "β DASHBOARD_URL is empty for cluster '$CLUSTER_NAME'" >&2 | |
| echo "Check that ODH_DASHBOARD_URL_PRIMARY/ODH_DASHBOARD_URL secrets are configured" >&2 | |
| exit 1 | |
| fi | |
| # Mask dashboard URL to prevent exposure in logs | |
| echo "::add-mask::$DASHBOARD_URL" | |
| # Set dashboard URL for selected cluster | |
| sed -i "s|^ODH_DASHBOARD_URL:.*|ODH_DASHBOARD_URL: $DASHBOARD_URL|" "$TEST_VARS_FILE" | |
| # Export dashboard host (without protocol) for webpack ODH_DASHBOARD_HOST | |
| DASHBOARD_HOST=$(echo "$DASHBOARD_URL" | sed -E 's|https?://||' | sed 's|/.*||') | |
| echo "::add-mask::$DASHBOARD_HOST" | |
| echo "DASHBOARD_HOST=$DASHBOARD_HOST" >> $GITHUB_ENV | |
| if [ -z "$ODH_NAMESPACES" ]; then | |
| echo "β οΈ ODH_NAMESPACES secret not set, skipping namespace override" | |
| exit 0 | |
| fi | |
| echo "::add-mask::$ODH_NAMESPACES" | |
| echo "π Overriding namespaces with ODH values..." | |
| IFS=',' read -r OPERATOR_NS APPLICATIONS_NS NOTEBOOKS_NS OPERATOR_NAME PROJECT_NAME <<< "$ODH_NAMESPACES" | |
| sed -i "s|^PRODUCT:.*|PRODUCT: ODH|" "$TEST_VARS_FILE" | |
| sed -i "s|^OPERATOR_NAMESPACE:.*|OPERATOR_NAMESPACE: $OPERATOR_NS|" "$TEST_VARS_FILE" | |
| sed -i "s|^APPLICATIONS_NAMESPACE:.*|APPLICATIONS_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE" | |
| sed -i "s|^MONITORING_NAMESPACE:.*|MONITORING_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE" | |
| sed -i "s|^NOTEBOOKS_NAMESPACE:.*|NOTEBOOKS_NAMESPACE: $NOTEBOOKS_NS|" "$TEST_VARS_FILE" | |
| sed -i "s|^OPERATOR_NAME:.*|OPERATOR_NAME: $OPERATOR_NAME|" "$TEST_VARS_FILE" | |
| sed -i "s|^ODH_DASHBOARD_PROJECT_NAME:.*|ODH_DASHBOARD_PROJECT_NAME: $PROJECT_NAME|" "$TEST_VARS_FILE" | |
| echo "β Namespace configuration updated" | |
| - name: Set test configuration | |
| run: | | |
| echo "CY_TEST_CONFIG=${{ github.workspace }}/packages/cypress/test-variables.yml" >> $GITHUB_ENV | |
| - name: Start Cypress Server | |
| run: | | |
| echo "π§Ή Cleaning up port ${WEBPACK_PORT}..." | |
| PORT_INFO_DIR="/tmp/gha-ports" | |
| PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" | |
| CURRENT_RUN_ID="${{ github.run_id }}" | |
| # Check if port is in use | |
| if lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; then | |
| # Check if there's a run_id file for this port | |
| if [ -f "$PORT_INFO_FILE" ]; then | |
| PORT_OWNER_RUN_ID=$(cat "$PORT_INFO_FILE") | |
| if [ "$PORT_OWNER_RUN_ID" != "$CURRENT_RUN_ID" ]; then | |
| echo "β οΈ Port ${WEBPACK_PORT} is owned by different run_id: $PORT_OWNER_RUN_ID" | |
| echo "β οΈ This port is in use by another workflow run - will not kill it" | |
| # Try to find an alternative port | |
| for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do | |
| if ! lsof -i:${alt_port} > /dev/null 2>&1; then | |
| WEBPACK_PORT=$alt_port | |
| PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" | |
| echo "β Found alternative port: ${WEBPACK_PORT}" | |
| break | |
| fi | |
| done | |
| else | |
| echo "β Port ${WEBPACK_PORT} is owned by this run - safe to clean up" | |
| fi | |
| else | |
| # No run_id file - check if process is from a recent GitHub Actions run | |
| PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1) | |
| if [ -n "$PORT_PID" ]; then | |
| # Check if process is from a GitHub Actions workflow | |
| if ps -p "$PORT_PID" -o command= 2>/dev/null | grep -q "webpack.*serve\|node.*40[0-9][0-9]"; then | |
| echo "β οΈ Port ${WEBPACK_PORT} in use by potential GHA process (PID: $PORT_PID)" | |
| echo "β οΈ Being cautious - will not kill without run_id confirmation" | |
| # Find alternative port | |
| for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do | |
| if ! lsof -i:${alt_port} > /dev/null 2>&1; then | |
| WEBPACK_PORT=$alt_port | |
| PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" | |
| echo "β Found alternative port: ${WEBPACK_PORT}" | |
| break | |
| fi | |
| done | |
| else | |
| echo "β οΈ Port ${WEBPACK_PORT} in use by non-GHA process - cleaning up" | |
| kill -9 "$PORT_PID" 2>/dev/null || true | |
| fi | |
| fi | |
| fi | |
| fi | |
| # Verify port is free with retry logic | |
| RETRY_COUNT=0 | |
| while lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; do | |
| RETRY_COUNT=$((RETRY_COUNT + 1)) | |
| if [ $RETRY_COUNT -gt 10 ]; then | |
| echo "β Port ${WEBPACK_PORT} still in use after cleanup!" | |
| lsof -i:${WEBPACK_PORT} | |
| exit 1 | |
| fi | |
| echo "β³ Retrying cleanup... (attempt $RETRY_COUNT/10)" | |
| sleep 2 | |
| done | |
| # Claim the port with our run_id | |
| mkdir -p "$PORT_INFO_DIR" | |
| echo "$CURRENT_RUN_ID" > "$PORT_INFO_FILE" | |
| echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV | |
| echo "PORT_INFO_FILE=$PORT_INFO_FILE" >> $GITHUB_ENV | |
| echo "β Port ${WEBPACK_PORT} is free and claimed by run_id: $CURRENT_RUN_ID" | |
| echo "π Starting webpack dev server on port ${WEBPACK_PORT} ($CLUSTER_NAME)..." | |
| # Start webpack with explicit dashboard host (ensures correct proxy target for all tests) | |
| cd frontend && env ODH_DASHBOARD_HOST=${DASHBOARD_HOST} ODH_PORT=${WEBPACK_PORT} npm run start:dev:ext > /tmp/webpack_${WEBPACK_PORT}.log 2>&1 & | |
| SERVER_PID=$! | |
| echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV | |
| echo "$SERVER_PID" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.pid" | |
| # Give server time to initialize | |
| sleep 20 | |
| # Show filtered webpack status (hide sensitive cluster URLs) | |
| if [ -f /tmp/webpack_${WEBPACK_PORT}.log ]; then | |
| tail -20 /tmp/webpack_${WEBPACK_PORT}.log | \ | |
| grep -v "Dashboard host:" | \ | |
| grep -v "Proxy created:" | \ | |
| grep -v "Logged in as user:" | \ | |
| grep -v "Using project:" || true | |
| fi | |
| - name: Install make for BFF builds | |
| if: needs.get-test-tags.outputs.bff_packages != '[]' | |
| run: | | |
| if ! command -v make &> /dev/null; then | |
| echo "π¦ Installing make..." | |
| sudo dnf install -y make 2>/dev/null || sudo yum install -y make 2>/dev/null || sudo apt-get install -y make 2>/dev/null | |
| else | |
| echo "β make already available: $(make --version | head -1)" | |
| fi | |
| - name: Setup Go for BFF builds | |
| if: needs.get-test-tags.outputs.bff_packages != '[]' | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version: '1.24' | |
| cache: false | |
| - name: Start BFF Services | |
| if: needs.get-test-tags.outputs.bff_packages != '[]' | |
| env: | |
| BFF_PACKAGES: ${{ needs.get-test-tags.outputs.bff_packages }} | |
| run: | | |
| echo "π Starting BFF services for changed packages..." | |
| BFF_INFO_DIR="/tmp/gha-bff/${{ github.run_id }}" | |
| mkdir -p "$BFF_INFO_DIR" | |
| # Allowlists to prevent command injection from PR code (CWE-94) | |
| ALLOWED_COMMANDS=("make dev-bff-e2e-mock" "make dev-bff-e2e-cluster" "make -C upstream dev-bff-e2e-mock" "make -C upstream dev-bff-e2e-cluster") | |
| ALLOWED_DIRS=("automl" "autorag" "eval-hub" "gen-ai" "maas" "mlflow" "model-registry") | |
| # Parse BFF packages JSON and start each one | |
| echo "$BFF_PACKAGES" | jq -c '.[]' | while read -r bff_config; do | |
| BFF_NAME=$(echo "$bff_config" | jq -r '.name') | |
| BFF_DIR=$(echo "$bff_config" | jq -r '.dir') | |
| BFF_PORT=$(echo "$bff_config" | jq -r '.port') | |
| BFF_HEALTH=$(echo "$bff_config" | jq -r '.healthEndpoint') | |
| BFF_CMD=$(echo "$bff_config" | jq -r '.startCommand') | |
| echo "" | |
| echo "π¦ Starting BFF for $BFF_NAME on port $BFF_PORT..." | |
| # Validate BFF_CMD against allowlist (prevent command injection) | |
| CMD_VALID=false | |
| for allowed in "${ALLOWED_COMMANDS[@]}"; do | |
| if [ "$BFF_CMD" = "$allowed" ]; then | |
| CMD_VALID=true | |
| break | |
| fi | |
| done | |
| if [ "$CMD_VALID" = "false" ]; then | |
| echo " β Rejected untrusted startCommand: '$BFF_CMD'" | |
| echo " Allowed commands: ${ALLOWED_COMMANDS[*]}" | |
| exit 1 | |
| fi | |
| # Validate BFF_DIR against allowlist (prevent path traversal) | |
| DIR_VALID=false | |
| for allowed in "${ALLOWED_DIRS[@]}"; do | |
| if [ "$BFF_DIR" = "$allowed" ]; then | |
| DIR_VALID=true | |
| break | |
| fi | |
| done | |
| if [ "$DIR_VALID" = "false" ]; then | |
| echo " β Rejected untrusted package directory: '$BFF_DIR'" | |
| echo " Allowed directories: ${ALLOWED_DIRS[*]}" | |
| exit 1 | |
| fi | |
| # Validate port is numeric and health endpoint is a safe path | |
| if ! echo "$BFF_PORT" | grep -qE '^[0-9]+$'; then | |
| echo " β Invalid BFF port: '$BFF_PORT' (must be numeric)" | |
| exit 1 | |
| fi | |
| if ! echo "$BFF_HEALTH" | grep -qE '^/[a-zA-Z0-9/_-]+$'; then | |
| echo " β Invalid health endpoint: '$BFF_HEALTH' (must be a simple path)" | |
| exit 1 | |
| fi | |
| # Reuse if already healthy on this port (avoid parallel startup race) | |
| if curl -sf "http://localhost:$BFF_PORT$BFF_HEALTH" > /dev/null 2>&1; then | |
| echo " β»οΈ BFF $BFF_NAME already healthy on $BFF_PORT, reusing existing process" | |
| continue | |
| fi | |
| # Fail fast if port is occupied but unhealthy | |
| if lsof -ti:"$BFF_PORT" > /dev/null 2>&1; then | |
| echo " β Port $BFF_PORT already in use and health check failed" | |
| lsof -i:"$BFF_PORT" || true | |
| exit 1 | |
| fi | |
| # Start BFF in background with configurable port | |
| cd "packages/$BFF_DIR" | |
| E2E_BFF_PORT=$BFF_PORT $BFF_CMD > "/tmp/bff_${BFF_DIR}.log" 2>&1 & | |
| BFF_PID=$! | |
| cd - > /dev/null | |
| # Save PID for cleanup | |
| echo "$BFF_PID" >> "$BFF_INFO_DIR/pids.txt" | |
| echo "$BFF_PORT" >> "$BFF_INFO_DIR/ports.txt" | |
| echo " π PID: $BFF_PID" | |
| # Wait for health check (max 60 seconds) | |
| echo " β³ Waiting for health at localhost:$BFF_PORT$BFF_HEALTH..." | |
| for i in {1..30}; do | |
| if curl -sf "http://localhost:$BFF_PORT$BFF_HEALTH" > /dev/null 2>&1; then | |
| echo " β BFF $BFF_NAME is healthy!" | |
| break | |
| fi | |
| if [ $i -eq 30 ]; then | |
| echo " β BFF $BFF_NAME failed health check after 60 seconds" | |
| echo " π BFF logs:" | |
| tail -50 "/tmp/bff_${BFF_DIR}.log" || true | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| done | |
| echo "" | |
| echo "β All BFF services started successfully" | |
| - name: Wait for Server Ready | |
| run: | | |
| echo "β³ Waiting for localhost:${WEBPACK_PORT} to be ready..." | |
| npx wait-on http://localhost:${WEBPACK_PORT} --timeout 120000 | |
| # Verify the application loads with dashboard content | |
| for i in {1..10}; do | |
| if curl -s -f http://localhost:${WEBPACK_PORT}/ | grep -q "Data Science Projects\|ODH\|Open Data Hub\|Dashboard"; then | |
| echo "β Server is ready and application is loaded!" | |
| break | |
| fi | |
| if [ $i -lt 10 ]; then | |
| echo "β³ Waiting for application to load... (attempt $i/10)" | |
| sleep 8 | |
| else | |
| echo "β Application failed to load properly after 10 attempts" | |
| exit 1 | |
| fi | |
| done | |
| - name: Run E2E Tests | |
| env: | |
| OC_SERVER_PRIMARY: ${{ secrets.OC_SERVER_PRIMARY }} | |
| OC_SERVER_SECONDARY: ${{ secrets.OC_SERVER }} | |
| run: | | |
| cd frontend | |
| echo "π§ͺ Running E2E tests for ${{ matrix.tag }}..." | |
| echo "π Running tests against live dashboard on port ${WEBPACK_PORT}" | |
| echo "π Tag source: ${{ needs.get-test-tags.outputs.source }}" | |
| export CY_RESULTS_DIR="${{ github.workspace }}/packages/cypress/results/${{ matrix.tag }}" | |
| mkdir -p "$CY_RESULTS_DIR" | |
| # Determine OC_SERVER based on cluster (for oc user switching in tests) | |
| if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then | |
| OC_SERVER="$OC_SERVER_PRIMARY" | |
| elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then | |
| OC_SERVER="$OC_SERVER_SECONDARY" | |
| else | |
| echo "β οΈ Unknown cluster: $CLUSTER_NAME, defaulting to OC_SERVER_PRIMARY" | |
| OC_SERVER="$OC_SERVER_PRIMARY" | |
| fi | |
| # Set IS_NON_ADMIN_RUN flag for non-admin tests to skip admin-only setup hooks | |
| EXTRA_CYPRESS_ENV="OC_SERVER=${OC_SERVER}," | |
| if [[ "${{ matrix.tag }}" == "@NonAdmin" ]]; then | |
| EXTRA_CYPRESS_ENV="${EXTRA_CYPRESS_ENV}IS_NON_ADMIN_RUN=true," | |
| echo "π Running in non-admin mode - admin setup hooks will be skipped" | |
| fi | |
| BASE_URL=http://localhost:${WEBPACK_PORT} npm run cypress:run:chrome -- \ | |
| --env ${EXTRA_CYPRESS_ENV}skipTags="@Bug @Maintain @NonConcurrent",grepTags="${{ matrix.tag }}",grepFilterSpecs=true \ | |
| --config video=true,screenshotsFolder="$CY_RESULTS_DIR/screenshots",videosFolder="$CY_RESULTS_DIR/videos" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: e2e-results-${{ matrix.tag }} | |
| path: | | |
| packages/cypress/results/ | |
| packages/cypress/videos/ | |
| packages/cypress/screenshots/ | |
| retention-days: 7 | |
| - name: Log test completion | |
| if: always() | |
| run: | | |
| echo "π E2E Test completed!" | |
| echo "Status: ${{ job.status }}" | |
| echo "Test Tag: ${{ matrix.tag }}" | |
| echo "Cluster: $CLUSTER_NAME" | |
| echo "Run ID: ${{ github.run_id }}" | |
| # --------------------------------------------------------------------------- | |
| # Final Status - Update PR with test results | |
| # --------------------------------------------------------------------------- | |
| set-final-status: | |
| needs: [select-cluster, e2e-tests] | |
| if: >- | |
| always() && | |
| (github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.event == 'pull_request' && | |
| github.event.workflow_run.conclusion == 'success')) | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Set final status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| E2E_RESULT="${{ needs.e2e-tests.result }}" | |
| CLUSTER_RESULT="${{ needs.select-cluster.result }}" | |
| CLUSTER="${{ needs.select-cluster.outputs.cluster_name }}" | |
| echo "π Job results: select-cluster=$CLUSTER_RESULT, e2e-tests=$E2E_RESULT" | |
| # Handle cluster selection failure first | |
| if [[ "$CLUSTER_RESULT" == "failure" ]]; then | |
| STATE="failure" | |
| DESC="Cluster health check failed - no healthy cluster available" | |
| elif [[ "$E2E_RESULT" == "success" ]]; then | |
| STATE="success" | |
| DESC="All tests passed on $CLUSTER" | |
| elif [[ "$E2E_RESULT" == "cancelled" ]]; then | |
| STATE="error" | |
| DESC="Tests cancelled" | |
| elif [[ "$E2E_RESULT" == "skipped" && "$CLUSTER_RESULT" == "skipped" ]]; then | |
| # Both skipped means test.yml failed - don't post status | |
| echo "Both jobs skipped (test.yml likely failed) - not posting status" | |
| exit 0 | |
| elif [[ "$E2E_RESULT" == "skipped" ]]; then | |
| STATE="failure" | |
| DESC="Tests skipped due to upstream failure" | |
| else | |
| STATE="failure" | |
| DESC="Tests failed on ${CLUSTER:-unknown cluster}" | |
| fi | |
| echo "π Posting status: state=$STATE, description=$DESC" | |
| gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \ | |
| -f state="$STATE" \ | |
| -f context="Cypress E2E Tests" \ | |
| -f description="$DESC" \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # --------------------------------------------------------------------------- | |
| # Cleanup - Stop all servers started by this workflow run | |
| # --------------------------------------------------------------------------- | |
| cleanup-server: | |
| needs: [e2e-tests] | |
| runs-on: self-hosted | |
| if: ${{ always() && (github.event_name == 'workflow_dispatch' || (github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success')) }} | |
| steps: | |
| - name: Stop BFF Services | |
| run: | | |
| echo "π Stopping BFF services for run_id: ${{ github.run_id }}..." | |
| BFF_INFO_DIR="/tmp/gha-bff/${{ github.run_id }}" | |
| BFF_KILLED_COUNT=0 | |
| # Check if this workflow started any BFFs | |
| if [ -d "$BFF_INFO_DIR" ]; then | |
| # Kill all BFF processes | |
| if [ -f "$BFF_INFO_DIR/pids.txt" ]; then | |
| while read -r pid; do | |
| if ps -p "$pid" > /dev/null 2>&1; then | |
| echo " π Killing BFF process $pid" | |
| pkill -P "$pid" 2>/dev/null || true | |
| kill "$pid" 2>/dev/null || true | |
| BFF_KILLED_COUNT=$((BFF_KILLED_COUNT + 1)) | |
| fi | |
| done < "$BFF_INFO_DIR/pids.txt" | |
| fi | |
| # Clean up BFF ports | |
| if [ -f "$BFF_INFO_DIR/ports.txt" ]; then | |
| while read -r port; do | |
| BFF_PID=$(lsof -ti:${port} 2>/dev/null | head -1) | |
| if [ -n "$BFF_PID" ]; then | |
| echo " π Killing process on BFF port $port (PID: $BFF_PID)" | |
| pkill -P "$BFF_PID" 2>/dev/null || true | |
| kill "$BFF_PID" 2>/dev/null || true | |
| fi | |
| done < "$BFF_INFO_DIR/ports.txt" | |
| fi | |
| # Clean up info files | |
| rm -rf "$BFF_INFO_DIR" | |
| fi | |
| if [ $BFF_KILLED_COUNT -eq 0 ]; then | |
| echo "β No BFF processes found for run_id: ${{ github.run_id }}" | |
| else | |
| echo "β Cleaned up $BFF_KILLED_COUNT BFF process(es) for run_id: ${{ github.run_id }}" | |
| fi | |
| - name: Stop Cypress Servers | |
| run: | | |
| echo "π Stopping webpack dev server for run_id: ${{ github.run_id }}..." | |
| PORT_INFO_DIR="/tmp/gha-ports" | |
| CURRENT_RUN_ID="${{ github.run_id }}" | |
| KILLED_COUNT=0 | |
| # Find all port files owned by this run_id | |
| if [ -d "$PORT_INFO_DIR" ]; then | |
| for port_file in "$PORT_INFO_DIR"/port-*.run_id; do | |
| if [ -f "$port_file" ]; then | |
| PORT_OWNER_RUN_ID=$(cat "$port_file") | |
| if [ "$PORT_OWNER_RUN_ID" = "$CURRENT_RUN_ID" ]; then | |
| # Extract port number from filename | |
| PORT=$(basename "$port_file" | sed 's/port-\([0-9]*\)\.run_id/\1/') | |
| PID_FILE="$PORT_INFO_DIR/port-${PORT}.pid" | |
| # Kill process if PID file exists | |
| if [ -f "$PID_FILE" ]; then | |
| PID=$(cat "$PID_FILE") | |
| if ps -p "$PID" > /dev/null 2>&1; then | |
| echo "π Killing process $PID on port $PORT (run_id: $CURRENT_RUN_ID)" | |
| pkill -P "$PID" 2>/dev/null || true | |
| kill "$PID" 2>/dev/null || true | |
| KILLED_COUNT=$((KILLED_COUNT + 1)) | |
| fi | |
| fi | |
| # Also kill any process on this port (double-check) | |
| PORT_PID=$(lsof -ti:${PORT} 2>/dev/null | head -1) | |
| if [ -n "$PORT_PID" ]; then | |
| echo "π Killing process $PORT_PID on port $PORT" | |
| pkill -P "$PORT_PID" 2>/dev/null || true | |
| kill "$PORT_PID" 2>/dev/null || true | |
| fi | |
| # Clean up orphaned Chrome processes | |
| ALL_PORT_PIDS=$(lsof -ti:${PORT} 2>/dev/null || true) | |
| if [ -n "$ALL_PORT_PIDS" ]; then | |
| for port_pid in $ALL_PORT_PIDS; do | |
| if ps -p "$port_pid" -o comm= 2>/dev/null | grep -qE "chrome|chromium"; then | |
| echo "π Killing Chrome process $port_pid (using port $PORT)" | |
| pkill -P "$port_pid" 2>/dev/null || true | |
| kill "$port_pid" 2>/dev/null || true | |
| fi | |
| done | |
| fi | |
| # Clean up port info files | |
| rm -f "$port_file" "$PID_FILE" | |
| fi | |
| fi | |
| done | |
| fi | |
| # Clean up stale port files older than 24 hours | |
| find "$PORT_INFO_DIR" -name "*.run_id" -mtime +1 -delete 2>/dev/null || true | |
| find "$PORT_INFO_DIR" -name "*.pid" -mtime +1 -delete 2>/dev/null || true | |
| if [ $KILLED_COUNT -eq 0 ]; then | |
| echo "β No processes found for run_id: $CURRENT_RUN_ID" | |
| else | |
| echo "β Cleaned up $KILLED_COUNT process(es) for run_id: $CURRENT_RUN_ID" | |
| fi | |
| - name: Clean Work Directories and Build Artifacts | |
| run: | | |
| echo "π§Ή Starting comprehensive cleanup (SAFE for parallel jobs)..." | |
| RUNNER_USER=$(whoami) | |
| HOME_DIR=$(eval echo "~$RUNNER_USER") | |
| CLEANED_SPACE=0 | |
| echo "π Disk usage before cleanup:" | |
| df -h / | grep -v Filesystem | |
| # Helper function to check if directory is in use | |
| is_directory_in_use() { | |
| local dir="$1" | |
| # Check if any processes are using this directory | |
| if lsof +D "$dir" 2>/dev/null | grep -qE "node|npm|webpack|chrome|cypress"; then | |
| return 0 # In use | |
| fi | |
| return 1 # Not in use | |
| } | |
| # 1. Clean Go upstream build artifacts (only in old work dirs) | |
| echo "" | |
| echo "ποΈ Cleaning Go upstream builds (in work dirs >7 days, not in use)..." | |
| UPSTREAM_COUNT=0 | |
| find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +7 2>/dev/null | while read work_dir; do | |
| if [ -d "$work_dir" ] && ! is_directory_in_use "$work_dir"; then | |
| UPSTREAM_DIRS=$(find "$work_dir" -type d -path "*/packages/*/upstream" 2>/dev/null || true) | |
| for dir in $UPSTREAM_DIRS; do | |
| if [ -d "$dir" ]; then | |
| SIZE=$(du -sm "$dir" 2>/dev/null | cut -f1) | |
| rm -rf "$dir" 2>/dev/null || true | |
| CLEANED_SPACE=$((CLEANED_SPACE + SIZE)) | |
| UPSTREAM_COUNT=$((UPSTREAM_COUNT + 1)) | |
| fi | |
| done | |
| fi | |
| done | |
| echo " β Cleaned $UPSTREAM_COUNT upstream build directories" | |
| # 2. Clean old work directories (>7 days, not in use) | |
| echo "" | |
| echo "ποΈ Cleaning old work directories (>7 days, not in use)..." | |
| OLD_WORK_COUNT=0 | |
| SKIPPED_COUNT=0 | |
| find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +7 2>/dev/null | while read work_dir; do | |
| if [ -d "$work_dir" ]; then | |
| if is_directory_in_use "$work_dir"; then | |
| echo " βοΈ Skipped (in use): $(basename $(dirname $work_dir))" | |
| SKIPPED_COUNT=$((SKIPPED_COUNT + 1)) | |
| else | |
| SIZE=$(du -sm "$work_dir" 2>/dev/null | cut -f1) | |
| rm -rf "$work_dir" 2>/dev/null || true | |
| CLEANED_SPACE=$((CLEANED_SPACE + SIZE)) | |
| OLD_WORK_COUNT=$((OLD_WORK_COUNT + 1)) | |
| echo " β Cleaned: $(basename $(dirname $work_dir)) (~${SIZE}MB)" | |
| fi | |
| fi | |
| done | |
| echo " β Cleaned $OLD_WORK_COUNT old work directories" | |
| if [ "$SKIPPED_COUNT" -gt 0 ]; then | |
| echo " βοΈ Skipped $SKIPPED_COUNT directories (in use by parallel jobs)" | |
| fi | |
| # 3. Clean old runner diagnostic logs (>7 days) | |
| echo "" | |
| echo "ποΈ Cleaning old runner logs (>7 days)..." | |
| LOG_COUNT=$(find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +7 -type f 2>/dev/null | wc -l) | |
| find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +7 -delete 2>/dev/null || true | |
| echo " β Cleaned $LOG_COUNT old log files" | |
| # 4. Clean old Cypress artifacts (>7 days only - safe even if parallel jobs running) | |
| echo "" | |
| echo "ποΈ Cleaning old Cypress artifacts (>7 days)..." | |
| ARTIFACT_COUNT=0 | |
| for runner_dir in "$HOME_DIR"/actions-runner*; do | |
| # Screenshots | |
| if [ -d "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" ]; then | |
| SCREENSHOTS=$(find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" -type f -mtime +7 2>/dev/null | wc -l) | |
| find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" -type f -mtime +7 -delete 2>/dev/null || true | |
| ARTIFACT_COUNT=$((ARTIFACT_COUNT + SCREENSHOTS)) | |
| fi | |
| # Videos | |
| if [ -d "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" ]; then | |
| VIDEOS=$(find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" -type f -mtime +7 2>/dev/null | wc -l) | |
| find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" -type f -mtime +7 -delete 2>/dev/null || true | |
| ARTIFACT_COUNT=$((ARTIFACT_COUNT + VIDEOS)) | |
| fi | |
| done | |
| echo " β Cleaned $ARTIFACT_COUNT old Cypress artifacts" | |
| # 5. Check disk space after cleanup | |
| echo "" | |
| echo "π Disk usage after cleanup:" | |
| df -h / | grep -v Filesystem | |
| echo "" | |
| echo "πΎ Total space cleaned: ~${CLEANED_SPACE}MB" | |
| echo "β Cleanup complete!" |