Skip to content

Cypress e2e Test

Cypress e2e Test #186

name: Cypress e2e Test
# =============================================================================
# E2E Test Workflow with Cluster Failover and Smart Test Selection
# =============================================================================
#
# TRIGGERS:
# - Automatically after "Test" workflow completes on PRs
# - Manually via workflow_dispatch (Actions tab β†’ Run workflow)
#
# CLUSTER FAILOVER:
# Primary: dash-e2e-int (checked first via DSC health)
# Secondary: dash-e2e (used if primary is unhealthy)
# Health Check: Logs into cluster β†’ checks DSC conditions (Available, Degraded, odh-dashboardReady)
#
# TEST SELECTION (priority order):
# Default (always run):
# - @ci-dashboard-regression-tags
#
# 1. Manual input (workflow_dispatch):
# Enter tags in 'additional_tags' field: @Pipelines,@Workbenches
#
# 2. PR labels (test:* pattern):
# Add labels with 'test:' prefix to your PR:
# test:Pipelines β†’ @Pipelines
# test:ModelServing β†’ @ModelServing
# test:Workbenches β†’ @Workbenches
# Any 'test:<TagName>' label maps to '@<TagName>' Cypress grep tag
#
# 3. Auto-detected from PR changes (always additive):
# Turbo detects changed packages β†’ reads "e2eCiTags" from package.json
# Git diff detects changed frontend sub-areas β†’ inline mapping resolves tags
# All auto-detected tags are consolidated into ONE additional matrix job
#
# To add auto-detection for a package:
# Add "e2eCiTags": ["@YourTagCI"] to the package's package.json
# To add auto-detection for a frontend area:
# Add an entry to .github/frontend-ci-tags.json
#
# BFF SUPPORT (Backend-For-Frontend):
# Packages with bffConfig.enabled=true in their package.json are automatically
# detected and started when changes are found. BFFs start after the frontend
# webpack server. See docs/bff-e2e-testing.md for details.
#
# To add BFF support to a package:
# Add "bffConfig" to package.json with enabled, port, healthEndpoint,
# startCommand, and startCommandCluster properties
#
# LIMITS:
# - Max 5 additional tags for labels/manual (prevents runner exhaustion)
# - Auto-detected tags are consolidated into 1 job (no limit needed)
# - 10 runners shared across 30+ devs
#
# REQUIRED SECRETS:
# PRIMARY: OC_SERVER_PRIMARY, OCP_CONSOLE_URL_PRIMARY, ODH_DASHBOARD_URL_PRIMARY
# SECONDARY: OC_SERVER, OCP_CONSOLE_URL, ODH_DASHBOARD_URL
# AUTH: GITLAB_TOKEN, GITLAB_TEST_VARS_URL, ODH_NAMESPACES
# =============================================================================
on:
workflow_run:
workflows: ["Test"]
types: [completed]
workflow_dispatch:
inputs:
additional_tags:
description: 'Extra test tags (e.g., @Pipelines,@Workbenches)'
required: false
default: ''
type: string
concurrency:
group: e2e-${{ github.event.workflow_run.head_branch || github.ref }}
cancel-in-progress: true
permissions:
contents: read
pull-requests: read
actions: read
statuses: write
env:
NODE_VERSION: 22.x
DO_NOT_TRACK: 1
# =============================================================================
# JOBS
# =============================================================================
jobs:
# ---------------------------------------------------------------------------
# Cluster Selection - Health check with automatic failover
# ---------------------------------------------------------------------------
select-cluster:
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success')
runs-on: self-hosted
outputs:
cluster_name: ${{ steps.select.outputs.cluster_name }}
steps:
- name: Download test credentials
run: |
echo "πŸ”§ Downloading test credentials for cluster health check..."
curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \
"${{ secrets.GITLAB_TEST_VARS_URL }}" \
-o /tmp/test-variables.yml
echo "βœ… Downloaded test credentials"
- name: Select healthy cluster
id: select
env:
PRIMARY_SERVER: ${{ secrets.OC_SERVER_PRIMARY }}
PRIMARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }}
SECONDARY_SERVER: ${{ secrets.OC_SERVER }}
SECONDARY_DASHBOARD: ${{ secrets.ODH_DASHBOARD_URL }}
run: |
# Extract credentials from test-variables.yml
TEST_VARS_FILE="/tmp/test-variables.yml"
OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ')
OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ')
echo "::add-mask::$OC_PASSWORD"
echo "::add-mask::$OC_USERNAME"
# Check DSC health by logging in and verifying conditions
check_dsc_health() {
local server_url="$1"
local cluster_name="$2"
[[ -z "$server_url" ]] && echo " ❌ Server URL is empty" && return 1
echo " πŸ”— Attempting login to: $server_url"
# Try to login
LOGIN_OUTPUT=$(oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$server_url" --insecure-skip-tls-verify 2>&1) || true
if ! oc whoami > /dev/null 2>&1; then
echo " ❌ Failed to login to $cluster_name"
echo " πŸ“ Login output: $LOGIN_OUTPUT" | head -5
return 1
fi
echo " βœ… Login successful"
# Get DSC status with full output for debugging
echo " πŸ” Fetching DataScienceCluster status..."
DSC_JSON=$(oc get datasciencecluster -o json 2>&1)
DSC_EXIT_CODE=$?
if [[ $DSC_EXIT_CODE -ne 0 ]]; then
echo " ❌ Failed to get DSC (exit code: $DSC_EXIT_CODE)"
echo " πŸ“ Output: $DSC_JSON" | head -5
return 1
fi
if [[ -z "$DSC_JSON" || "$DSC_JSON" == "null" || "$DSC_JSON" == '{"apiVersion":"datasciencecluster.opendatahub.io/v1","items":[],"kind":"List","metadata":{"resourceVersion":""}}' ]]; then
echo " ❌ No DataScienceCluster found on $cluster_name"
return 1
fi
# Print DSC name and status for debugging
DSC_NAME=$(echo "$DSC_JSON" | jq -r '.items[0].metadata.name // "unknown"')
echo " πŸ“¦ DSC Name: $DSC_NAME"
# Check phase - this is the most reliable indicator
PHASE=$(echo "$DSC_JSON" | jq -r '.items[0].status.phase // "Unknown"')
echo " πŸ“Š DSC Phase: $PHASE"
# Print all conditions for debugging
echo " πŸ“‹ DSC Conditions:"
echo "$DSC_JSON" | jq -r '.items[0].status.conditions[]? | " - \(.type): \(.status) (\(.reason // "no reason"))"' 2>/dev/null || echo " (no conditions found)"
# If phase is Ready, cluster is healthy
if [[ "$PHASE" == "Ready" ]]; then
echo " βœ… DSC is Ready!"
return 0
fi
# Phase not Ready - check conditions for more detail
AVAILABLE=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Available") | .status' 2>/dev/null || echo "")
DEGRADED=$(echo "$DSC_JSON" | jq -r '.items[0].status.conditions[] | select(.type=="Degraded") | .status' 2>/dev/null || echo "")
# Fallback: if conditions show healthy even though phase isn't Ready
if [[ "$AVAILABLE" == "True" && "$DEGRADED" != "True" ]]; then
echo " βœ… Conditions look healthy despite phase=$PHASE"
return 0
fi
echo " ❌ DSC not healthy (Phase: $PHASE, Available: $AVAILABLE, Degraded: $DEGRADED)"
return 1
}
echo "πŸ” Checking PRIMARY cluster (dash-e2e-int)..."
if check_dsc_health "$PRIMARY_SERVER" "dash-e2e-int"; then
echo "βœ… PRIMARY cluster is healthy and ready"
echo "cluster_name=dash-e2e-int" >> $GITHUB_OUTPUT
else
echo ""
echo "⚠️ PRIMARY unavailable or not ready, trying SECONDARY (dash-e2e)..."
if check_dsc_health "$SECONDARY_SERVER" "dash-e2e"; then
echo "βœ… SECONDARY cluster is healthy and ready"
echo "cluster_name=dash-e2e" >> $GITHUB_OUTPUT
else
echo ""
echo "❌ All clusters unavailable or unhealthy"
exit 1
fi
fi
# Clean up credentials file
rm -f /tmp/test-variables.yml
# ---------------------------------------------------------------------------
# Status - Set pending status on PR (independent - runs before cluster selection)
# ---------------------------------------------------------------------------
set-pending-status:
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success')
runs-on: ubuntu-latest
steps:
- name: Set pending status
env:
GH_TOKEN: ${{ github.token }}
run: |
gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \
-f state=pending \
-f context="Cypress E2E Tests" \
-f description="E2E tests starting..." \
-f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Tag Resolution - Build test matrix from defaults + PR labels/input + auto-detection
# ---------------------------------------------------------------------------
get-test-tags:
needs: [select-cluster]
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.build.outputs.matrix }}
source: ${{ steps.build.outputs.source }}
bff_packages: ${{ steps.detect.outputs.bff_packages }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.workflow_run.head_sha || github.sha }}
fetch-depth: 0
- name: Setup Node.js ${{ env.NODE_VERSION }}
uses: actions/setup-node@v4.3.0
with:
node-version: ${{ env.NODE_VERSION }}
cache: 'npm'
- name: Get PR labels
id: labels
if: github.event_name == 'workflow_run'
env:
GH_TOKEN: ${{ github.token }}
run: |
# Get PR number - try multiple methods for fork PR compatibility
PR_NUM="${{ github.event.workflow_run.pull_requests[0].number }}"
# Method 2: commits API (works for same-repo PRs)
if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
PR_NUM=$(gh api "repos/${{ github.repository }}/commits/${{ github.event.workflow_run.head_sha }}/pulls" \
--jq '.[0].number' 2>/dev/null || echo "")
fi
# Method 3: search API (works for fork PRs)
if [[ -z "$PR_NUM" || "$PR_NUM" == "null" ]]; then
PR_NUM=$(gh api "search/issues?q=repo:${{ github.repository }}+is:pr+is:open+sha:${{ github.event.workflow_run.head_sha }}" \
--jq '.items[0].number' 2>/dev/null || echo "")
fi
if [[ -n "$PR_NUM" && "$PR_NUM" != "null" ]]; then
LABELS=$(gh api "repos/${{ github.repository }}/issues/$PR_NUM/labels" \
--jq '[.[].name] | join(",")' 2>/dev/null || echo "")
echo "labels=$LABELS" >> $GITHUB_OUTPUT
echo "πŸ“‹ PR #$PR_NUM labels: $LABELS"
else
echo "⚠️ Could not find PR number for SHA ${{ github.event.workflow_run.head_sha }}"
fi
- name: Detect changed areas
id: detect
run: |
# =================================================================
# Smart Test Selection: Detect changed areas and resolve CI tags
#
# Layer 1: Turbo detects changed packages (including frontend "//" workspace)
# β†’ reads e2eCiTags from each package.json (self-service, teams opt in)
# Layer 2: For frontend changes detected by Layer 1, git diff identifies
# sub-areas β†’ .github/frontend-ci-tags.json mapping resolves CI tags
# =================================================================
AUTO_TAGS=""
# --- Layer 1: Turbo-based package detection ---
echo "πŸ” Running Turbo change detection..."
# Determine base ref for comparison
if [[ "${{ github.event_name }}" == "workflow_run" ]]; then
BASE_SHA="${{ github.event.workflow_run.pull_requests[0].base.sha || 'origin/main' }}"
else
BASE_SHA="origin/main"
fi
HEAD_SHA="${{ github.event.workflow_run.head_sha || github.sha }}"
echo " πŸ“Œ Comparing $BASE_SHA...$HEAD_SHA"
# Get changed packages from turbo (uses dependency graph)
CHANGED_PACKAGES=$(npx turbo run lint --dry=json --filter="...[$BASE_SHA...$HEAD_SHA]" 2>/dev/null \
| jq -r '.packages[]' 2>/dev/null || echo "")
if [[ -n "$CHANGED_PACKAGES" ]]; then
echo " πŸ“¦ Changed workspaces detected by Turbo:"
echo "$CHANGED_PACKAGES" | while read -r pkg; do echo " - $pkg"; done
# For each changed package, check for e2eCiTags in its package.json
for pkg_dir in packages/*/; do
pkg_name=$(jq -r '.name // empty' "$pkg_dir/package.json" 2>/dev/null)
if echo "$CHANGED_PACKAGES" | grep -qx "$pkg_name"; then
ci_tags=$(jq -r '.e2eCiTags[]? // empty' "$pkg_dir/package.json" 2>/dev/null)
if [[ -n "$ci_tags" ]]; then
for tag in $ci_tags; do
echo " βœ… $pkg_name β†’ $tag"
AUTO_TAGS="$AUTO_TAGS $tag"
done
else
echo " ⏭️ $pkg_name (no e2eCiTags β€” defaults only)"
fi
fi
done
else
echo " ℹ️ No package changes detected by Turbo"
fi
# --- Layer 2: Frontend sub-area detection ---
# Turbo sees the entire frontend as one workspace. When it changes,
# use git diff to identify which sub-areas were modified.
# Note: Turbo reports the root/frontend workspace as "//"
if echo "$CHANGED_PACKAGES" | grep -qx "//"; then
echo ""
echo "πŸ” Frontend changed β€” detecting sub-areas via git diff..."
# Load frontend directory β†’ CI tag mapping from external JSON file
# To add a new area: edit .github/frontend-ci-tags.json
MAPPING_FILE=".github/frontend-ci-tags.json"
if [[ ! -f "$MAPPING_FILE" ]]; then
echo " ⚠️ $MAPPING_FILE not found β€” skipping frontend sub-area detection"
else
echo " πŸ“„ Loaded mappings from $MAPPING_FILE"
# Get changed frontend files
CHANGED_FILES=$(git diff --name-only "$BASE_SHA"..."$HEAD_SHA" -- frontend/src/ 2>/dev/null || echo "")
if [[ -n "$CHANGED_FILES" ]]; then
# Scan pages/, concepts/, api/, routes/ using the same mapping
for src_dir in pages concepts api routes; do
DIRS=$(echo "$CHANGED_FILES" | grep "^frontend/src/$src_dir/" | \
sed "s|^frontend/src/$src_dir/||" | cut -d'/' -f1 | sort -u)
for dir in $DIRS; do
tag=$(jq -r --arg d "$dir" '.[$d] // empty' "$MAPPING_FILE")
if [[ -n "$tag" ]]; then
echo " βœ… $src_dir/$dir β†’ $tag"
AUTO_TAGS="$AUTO_TAGS $tag"
fi
done
done
fi
fi
fi
# Deduplicate auto-detected tags
if [[ -n "$AUTO_TAGS" ]]; then
AUTO_TAGS=$(echo "$AUTO_TAGS" | tr ' ' '\n' | sort -u | tr '\n' ' ' | xargs)
echo ""
echo "🏷️ Auto-detected CI tags: $AUTO_TAGS"
else
echo ""
echo "ℹ️ No area-specific CI tags detected β€” defaults only"
fi
echo "auto_tags=$AUTO_TAGS" >> $GITHUB_OUTPUT
# --- BFF Package Detection ---
# Detect changed packages that have bffConfig.enabled=true
echo ""
echo "πŸ” Detecting BFF packages to start..."
BFF_PACKAGES="[]"
if [[ -n "$CHANGED_PACKAGES" ]]; then
for pkg_dir in packages/*/; do
pkg_name=$(jq -r '.name // empty' "$pkg_dir/package.json" 2>/dev/null)
# Check if this package changed
if echo "$CHANGED_PACKAGES" | grep -qx "$pkg_name"; then
# Check if package has bffConfig with enabled=true
bff_enabled=$(jq -r '.bffConfig.enabled // false' "$pkg_dir/package.json" 2>/dev/null)
if [[ "$bff_enabled" == "true" ]]; then
bff_port=$(jq -r '.bffConfig.port // 4000' "$pkg_dir/package.json" 2>/dev/null)
bff_health=$(jq -r '.bffConfig.healthEndpoint // "/api/health"' "$pkg_dir/package.json" 2>/dev/null)
bff_cmd=$(jq -r '.bffConfig.startCommandCluster // .bffConfig.startCommand' "$pkg_dir/package.json" 2>/dev/null)
pkg_dir_name=$(basename "$pkg_dir")
echo " βœ… $pkg_name (port: $bff_port, health: $bff_health)"
# Add to JSON array
BFF_PACKAGES=$(echo "$BFF_PACKAGES" | jq -c --arg name "$pkg_name" --arg dir "$pkg_dir_name" --arg port "$bff_port" --arg health "$bff_health" --arg cmd "$bff_cmd" '. + [{"name": $name, "dir": $dir, "port": ($port | tonumber), "healthEndpoint": $health, "startCommand": $cmd}]')
fi
fi
done
fi
if [[ "$BFF_PACKAGES" == "[]" ]]; then
echo " ℹ️ No BFF packages detected for this change"
fi
echo "bff_packages=$BFF_PACKAGES" >> $GITHUB_OUTPUT
- name: Build test matrix
id: build
run: |
# Configuration
MAX_EXTRA_TAGS=5 # Limit additional tags to prevent runner exhaustion (for labels/manual only)
# Defaults - these ALWAYS run
TAGS="@ci-dashboard-regression-tags"
SOURCE="default"
EXTRA_COUNT=0
AUTO_DETECTED_ENTRY=""
# Priority 1: Manual input (workflow_dispatch)
if [[ -n "${{ inputs.additional_tags }}" ]]; then
for tag in $(echo "${{ inputs.additional_tags }}" | tr ',' ' '); do
if [[ $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then
TAGS="$TAGS,$tag"
EXTRA_COUNT=$((EXTRA_COUNT + 1))
fi
done
SOURCE="manual"
echo "πŸ“ Added manual tags (limit: $MAX_EXTRA_TAGS)"
# Priority 2: PR labels (test:* pattern)
elif [[ -n "${{ steps.labels.outputs.labels }}" ]]; then
for label in $(echo "${{ steps.labels.outputs.labels }}" | tr ',' ' '); do
if [[ "$label" == test:* && $EXTRA_COUNT -lt $MAX_EXTRA_TAGS ]]; then
tag="@${label#test:}"
tag="${tag#@}" # Remove double @
tag="@$tag"
TAGS="$TAGS,$tag"
EXTRA_COUNT=$((EXTRA_COUNT + 1))
SOURCE="pr-labels"
echo "🏷️ Label '$label' β†’ $tag"
fi
done
fi
if [[ $EXTRA_COUNT -ge $MAX_EXTRA_TAGS ]]; then
echo "⚠️ Tag limit reached ($MAX_EXTRA_TAGS max). Some tags were not added."
fi
# Priority 3: Auto-detected from PR changes (always additive, consolidated into ONE job)
AUTO_TAGS="${{ steps.detect.outputs.auto_tags }}"
if [[ -n "$AUTO_TAGS" ]]; then
# Remove any auto-detected tags that already appear in manual/label TAGS
# to prevent the same tests running in two separate matrix jobs
EXISTING_TAGS=$(echo "$TAGS" | tr ',' '\n' | sort -u)
FILTERED_AUTO=""
for auto_tag in $AUTO_TAGS; do
if echo "$EXISTING_TAGS" | grep -qx "$auto_tag"; then
echo "⏭️ Skipping $auto_tag from auto-detected (already in manual/label tags)"
else
FILTERED_AUTO="$FILTERED_AUTO $auto_tag"
fi
done
FILTERED_AUTO=$(echo "$FILTERED_AUTO" | xargs)
if [[ -n "$FILTERED_AUTO" ]]; then
# Consolidate remaining auto-detected tags into a single matrix entry
# Cypress grep treats space-separated tags as OR, so one job covers all areas
AUTO_DETECTED_ENTRY="$FILTERED_AUTO"
if [[ "$SOURCE" == "default" ]]; then
SOURCE="auto-detected"
else
SOURCE="$SOURCE+auto-detected"
fi
echo "πŸ€– Auto-detected tags (consolidated into 1 job): $AUTO_DETECTED_ENTRY"
else
echo "ℹ️ All auto-detected tags already covered by manual/label tags"
fi
fi
# Convert to JSON matrix (deduplicated)
MATRIX=$(echo "$TAGS" | tr ',' '\n' | sort -u | grep -v '^$' | \
sed 's/^[^@]/@&/' | jq -Rc '[., inputs] | unique' | jq -sc 'add | unique')
# Append the consolidated auto-detected entry as a single matrix item
if [[ -n "$AUTO_DETECTED_ENTRY" ]]; then
MATRIX=$(echo "$MATRIX" | jq -c --arg entry "$AUTO_DETECTED_ENTRY" '. + [$entry] | unique')
fi
# Ensure compact JSON for GitHub Actions output
MATRIX=$(echo "$MATRIX" | jq -c '.')
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
echo "source=$SOURCE" >> $GITHUB_OUTPUT
echo "πŸ§ͺ Final matrix: $MATRIX (source: $SOURCE)"
# ---------------------------------------------------------------------------
# E2E Tests - Run Cypress tests for each tag in parallel
# ---------------------------------------------------------------------------
e2e-tests:
needs: [select-cluster, set-pending-status, get-test-tags]
runs-on: self-hosted
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
tag: ${{ fromJson(needs.get-test-tags.outputs.matrix) }}
env:
CLUSTER_NAME: ${{ needs.select-cluster.outputs.cluster_name }}
steps:
- name: Check Disk Space
run: |
echo "πŸ“Š Checking available disk space..."
DISK_USAGE=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')
DISK_AVAIL=$(df -h / | tail -1 | awk '{print $4}')
echo "πŸ’Ύ Disk usage: ${DISK_USAGE}% (${DISK_AVAIL} available)"
echo "DISK_USAGE=$DISK_USAGE" >> $GITHUB_ENV
if [ "$DISK_USAGE" -ge 95 ]; then
echo "❌ CRITICAL: Disk usage is ${DISK_USAGE}% - will attempt emergency cleanup"
echo "EMERGENCY_CLEANUP=true" >> $GITHUB_ENV
elif [ "$DISK_USAGE" -ge 90 ]; then
echo "⚠️ HIGH: Disk usage is ${DISK_USAGE}% - will attempt aggressive cleanup"
echo "EMERGENCY_CLEANUP=true" >> $GITHUB_ENV
elif [ "$DISK_USAGE" -ge 85 ]; then
echo "⚠️ WARNING: Disk usage is ${DISK_USAGE}% - cleanup recommended"
echo " The cleanup job will run after this workflow completes"
echo "EMERGENCY_CLEANUP=false" >> $GITHUB_ENV
else
echo "βœ… Disk space OK (${DISK_USAGE}% used)"
echo "EMERGENCY_CLEANUP=false" >> $GITHUB_ENV
fi
- name: Emergency Cleanup (if disk space critical)
if: env.EMERGENCY_CLEANUP == 'true'
run: |
echo "🚨 EMERGENCY CLEANUP - Disk usage: ${DISK_USAGE}%"
RUNNER_USER=$(whoami)
HOME_DIR=$(eval echo "~$RUNNER_USER")
CURRENT_WORK_DIR="${{ github.workspace }}"
# Determine how aggressive to be based on disk usage
if [ "$DISK_USAGE" -ge 95 ]; then
AGE_THRESHOLD=1 # CRITICAL: Clean anything >1 day old
echo "⚠️ CRITICAL MODE: Cleaning files >1 day old"
else
AGE_THRESHOLD=7 # Normal: Clean anything >7 days old
echo "⚠️ AGGRESSIVE MODE: Cleaning files >7 days old"
fi
echo ""
if [ "$DISK_USAGE" -ge 95 ]; then
echo "πŸ›‘οΈ PARALLEL-SAFE PROTECTIONS (FAST MODE - disk critically full):"
echo " βœ“ Current workspace (this job)"
echo " βœ“ Active GitHub Actions Runner.Worker process directories"
echo " ⚑ Skipping slow checks (lsof, find) for speed"
else
echo "πŸ›‘οΈ PARALLEL-SAFE PROTECTIONS (THOROUGH MODE):"
echo " βœ“ Current workspace (this job)"
echo " βœ“ Active GitHub Actions Runner.Worker processes"
echo " βœ“ Directories with open files (lsof with 5s timeout)"
echo " βœ“ Directories accessed in last 10 minutes"
fi
echo ""
# Get list of ALL active work directories from currently running GitHub Actions jobs
# This is the safest way to avoid deleting directories from parallel PRs
echo "πŸ” Detecting active work directories from parallel jobs (with timeout)..."
ACTIVE_WORK_DIRS=()
ACTIVE_JOBS=0
# Use faster method: check for active processes, then only protect their workspace
ACTIVE_PIDS=$(pgrep -f "Runner.Worker" -u "$RUNNER_USER" 2>/dev/null || true)
if [ -n "$ACTIVE_PIDS" ]; then
echo " Found active Runner.Worker processes: $ACTIVE_PIDS"
# Get working directories of active processes using lsof (much faster than find)
for pid in $ACTIVE_PIDS; do
ACTIVE_JOBS=$((ACTIVE_JOBS + 1))
# Get the CWD of this process
if [ -L "/proc/$pid/cwd" ]; then
WORK_CWD=$(readlink "/proc/$pid/cwd" 2>/dev/null || true)
if [[ "$WORK_CWD" == *"odh-dashboard"* ]]; then
# Extract the odh-dashboard directory path
WORK_DIR=$(echo "$WORK_CWD" | sed 's|/odh-dashboard/.*|/odh-dashboard|')
ACTIVE_WORK_DIRS+=("$WORK_DIR")
echo " πŸ›‘οΈ Protected: $WORK_DIR (PID $pid)"
fi
fi
done
fi
echo " Found $ACTIVE_JOBS active runner(s) with ${#ACTIVE_WORK_DIRS[@]} protected work directory(ies)"
# Helper function to check if directory is in use by active runner
is_directory_in_use() {
local dir="$1"
# 1. Skip current workspace (absolute must)
if [[ "$dir" == "$CURRENT_WORK_DIR"* ]]; then
return 0 # In use (current job)
fi
# 2. Check if directory is in the active work dirs list (FAST)
for active_dir in "${ACTIVE_WORK_DIRS[@]}"; do
if [[ "$dir" == "$active_dir"* ]]; then
return 0 # In use (active job)
fi
done
# 3. In CRITICAL mode (disk β‰₯95%), skip slow checks - rely on active work dirs only
if [ "$DISK_USAGE" -ge 95 ]; then
return 1 # Not in active list, safe to delete (fast path)
fi
# 4. Normal mode: Do thorough checks
# Check for ANY processes using this directory (can be slow)
if timeout 5 lsof +D "$dir" 2>/dev/null | grep -q .; then
return 0 # In use (has open files)
fi
# 5. Check if directory was accessed very recently (last 10 minutes only)
if find "$dir" -maxdepth 0 -amin -10 2>/dev/null | grep -q .; then
return 0 # In use (very recent activity)
fi
return 1 # Not in use (safe to delete)
}
echo ""
echo "πŸ—‘οΈ Step 1: Cleaning Go upstream builds (age: >$AGE_THRESHOLD days)..."
UPSTREAM_CLEANED=0
find "$HOME_DIR"/actions-runner*/_work -type d -path "*/packages/*/upstream" -mtime +$AGE_THRESHOLD 2>/dev/null | while read upstream_dir; do
# Extract work_dir by going up to odh-dashboard parent
work_dir=$(echo "$upstream_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|')
if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then
rm -rf "$upstream_dir" 2>/dev/null && echo " βœ… Cleaned: $upstream_dir" && UPSTREAM_CLEANED=$((UPSTREAM_CLEANED + 1)) || true
fi
done
echo ""
echo "πŸ—‘οΈ Step 2: Cleaning old work directories (age: >$AGE_THRESHOLD days, with multi-layer safety)..."
CLEANED_COUNT=0
SKIPPED_COUNT=0
find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +$AGE_THRESHOLD 2>/dev/null | while read work_dir; do
if [ -d "$work_dir" ]; then
if is_directory_in_use "$work_dir"; then
echo " ⏭️ Protected (in use): $work_dir"
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
else
SIZE_BEFORE=$(du -sh "$work_dir" 2>/dev/null | awk '{print $1}')
if rm -rf "$work_dir" 2>/dev/null; then
echo " βœ… Cleaned $SIZE_BEFORE: $work_dir"
CLEANED_COUNT=$((CLEANED_COUNT + 1))
fi
fi
fi
done
echo " πŸ“Š Cleaned: $CLEANED_COUNT, Protected: $SKIPPED_COUNT"
echo ""
echo "πŸ—‘οΈ Step 3: Cleaning Cypress artifacts (age: >$AGE_THRESHOLD days)..."
SCREENSHOTS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_work -path "*/cypress/results/screenshots/*" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l)
VIDEOS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_work -path "*/cypress/results/videos/*" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l)
echo " βœ… Cleaned $SCREENSHOTS_CLEANED screenshots, $VIDEOS_CLEANED videos"
echo ""
echo "πŸ—‘οΈ Step 4: Cleaning runner logs (age: >$AGE_THRESHOLD days)..."
LOGS_CLEANED=$(find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +$AGE_THRESHOLD -delete -print 2>/dev/null | wc -l)
echo " βœ… Cleaned $LOGS_CLEANED log files"
echo ""
echo "πŸ—‘οΈ Step 5: Cleaning node_modules in old work dirs (age: >$AGE_THRESHOLD days)..."
find "$HOME_DIR"/actions-runner*/_work -type d -name "node_modules" -mtime +$AGE_THRESHOLD 2>/dev/null | while read nm_dir; do
# Extract work_dir by going up to odh-dashboard parent
work_dir=$(echo "$nm_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|')
if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then
SIZE_BEFORE=$(du -sh "$nm_dir" 2>/dev/null | awk '{print $1}')
rm -rf "$nm_dir" 2>/dev/null && echo " βœ… Cleaned $SIZE_BEFORE node_modules: $nm_dir" || true
fi
done
echo ""
echo "πŸ—‘οΈ Step 6: Cleaning .turbo cache in old work dirs (age: >$AGE_THRESHOLD days)..."
find "$HOME_DIR"/actions-runner*/_work -type d -name ".turbo" -mtime +$AGE_THRESHOLD 2>/dev/null | while read turbo_dir; do
# Extract work_dir by going up to odh-dashboard parent
work_dir=$(echo "$turbo_dir" | sed 's|/odh-dashboard/.*|/odh-dashboard|')
if [ -n "$work_dir" ] && ! is_directory_in_use "$work_dir"; then
SIZE_BEFORE=$(du -sh "$turbo_dir" 2>/dev/null | awk '{print $1}')
rm -rf "$turbo_dir" 2>/dev/null && echo " βœ… Cleaned $SIZE_BEFORE .turbo: $turbo_dir" || true
fi
done
echo ""
echo "πŸ“Š Disk usage after emergency cleanup:"
DISK_USAGE_AFTER=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')
DISK_AVAIL_AFTER=$(df -h / | tail -1 | awk '{print $4}')
echo "πŸ’Ύ Disk usage: ${DISK_USAGE_AFTER}% (${DISK_AVAIL_AFTER} available)"
FREED=$((DISK_USAGE - DISK_USAGE_AFTER))
if [ "$FREED" -gt 0 ]; then
echo "βœ… Freed: ${FREED}% disk space"
else
echo "⚠️ Freed: 0% disk space (no files met age threshold)"
fi
# Show what's taking up space
echo ""
echo "πŸ“Š Top disk usage on runner:"
du -sh "$HOME_DIR"/actions-runner*/_work/* 2>/dev/null | sort -rh | head -5 || true
# Decide whether to fail or continue
if [ "$DISK_USAGE_AFTER" -ge 95 ]; then
if [ "$FREED" -gt 0 ]; then
echo "⚠️ WARNING: Still at ${DISK_USAGE_AFTER}% after cleanup, but freed ${FREED}%"
echo " Attempting to proceed - job may fail if more space is needed"
else
echo "❌ CRITICAL: Still at ${DISK_USAGE_AFTER}% after cleanup and freed 0%"
echo " All work directories are either:"
echo " - Currently in use by active jobs"
echo " - Created within the last $AGE_THRESHOLD day(s)"
echo ""
echo "πŸ” Diagnosis - Active work directories:"
find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d 2>/dev/null | while read work_dir; do
MTIME=$(stat -f %m "$work_dir" 2>/dev/null || stat -c %Y "$work_dir" 2>/dev/null || echo "0")
AGE_DAYS=$(( ($(date +%s) - MTIME) / 86400 ))
SIZE=$(du -sh "$work_dir" 2>/dev/null | awk '{print $1}')
echo " - $work_dir: $SIZE, age: ${AGE_DAYS} days"
done
echo ""
echo " Manual intervention required on runner $(hostname)"
exit 1
fi
elif [ "$DISK_USAGE_AFTER" -ge 90 ]; then
echo "⚠️ WARNING: Still at ${DISK_USAGE_AFTER}% after cleanup"
echo " Job will proceed but may fail due to space"
else
echo "βœ… Cleanup successful - proceeding with tests"
fi
- name: Calculate unique port for this workflow run
run: |
# Dynamic port allocation for parallel execution
BASE_PORT=$((4000 + (${{ github.run_id }} % 1000) * 5))
# Add matrix offset to separate concurrent jobs within same PR
if [[ "${{ matrix.tag }}" == *"set-1"* ]]; then
MATRIX_OFFSET=0
elif [[ "${{ matrix.tag }}" == *"set-2"* ]]; then
MATRIX_OFFSET=1
else
MATRIX_OFFSET=2
fi
WEBPACK_PORT=$((BASE_PORT + MATRIX_OFFSET))
PORT_INFO_DIR="/tmp/gha-ports"
mkdir -p "$PORT_INFO_DIR"
echo "πŸ“ Calculated port ${WEBPACK_PORT} for ${{ matrix.tag }} (run_id: ${{ github.run_id }})"
# SAFE port conflict resolution - only clean orphaned processes
if lsof -ti:${WEBPACK_PORT} > /dev/null 2>&1; then
echo "⚠️ Port ${WEBPACK_PORT} is currently in use - checking ownership..."
PORT_OWNER_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
if [ -f "$PORT_OWNER_FILE" ]; then
OWNER_RUN_ID=$(cat "$PORT_OWNER_FILE")
FILE_AGE=$(($(date +%s) - $(stat -f %m "$PORT_OWNER_FILE" 2>/dev/null || stat -c %Y "$PORT_OWNER_FILE" 2>/dev/null)))
# Only kill if the owning run is OLD (>30 minutes = likely completed/stuck)
if [ "$FILE_AGE" -gt 1800 ]; then
echo "🧹 Port owned by old run_id $OWNER_RUN_ID (${FILE_AGE}s old) - cleaning up..."
PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
pkill -P "$PORT_PID" 2>/dev/null || true
kill -9 "$PORT_PID" 2>/dev/null || true
sleep 2
if lsof -ti:${WEBPACK_PORT} > /dev/null 2>&1; then
echo "❌ Failed to clean orphaned process - port still in use"
echo " This may require manual intervention on the runner"
exit 1
else
echo "βœ… Successfully cleaned orphaned process"
fi
fi
else
echo "⚠️ Port owned by recent run_id $OWNER_RUN_ID (${FILE_AGE}s old)"
echo " This is likely an active parallel PR test - DO NOT KILL"
echo "❌ Port conflict detected - please retry workflow in a few minutes"
exit 1
fi
else
echo "⚠️ Port in use but no owner info found - checking process age..."
PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
# Check process start time (macOS: -o etime, Linux: -o etimes)
PROCESS_AGE=$(ps -o etimes= -p "$PORT_PID" 2>/dev/null || echo "unknown")
if [ "$PROCESS_AGE" != "unknown" ] && [ "$PROCESS_AGE" -gt 1800 ]; then
echo "🧹 Orphaned process (${PROCESS_AGE}s old) - cleaning up..."
pkill -P "$PORT_PID" 2>/dev/null || true
kill -9 "$PORT_PID" 2>/dev/null || true
sleep 2
else
echo "❌ Port in use by recent process - may be parallel PR test"
echo " Please retry workflow in a few minutes"
exit 1
fi
fi
fi
else
echo "βœ… Port ${WEBPACK_PORT} is available"
fi
# Store port info with run_id for cleanup tracking
echo "${{ github.run_id }}" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV
echo "PORT_INFO_FILE=$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id" >> $GITHUB_ENV
- name: Cleanup old test artifacts
continue-on-error: true
run: |
echo "🧹 Cleaning up old test artifacts (>2 days)..."
# Clean old Cypress results/screenshots/videos from workspace (>2 days old)
find ${{ github.workspace }}/packages/cypress/results -type f -mtime +2 -delete 2>/dev/null || true
find ${{ github.workspace }}/packages/cypress/screenshots -type f -mtime +2 -delete 2>/dev/null || true
find ${{ github.workspace }}/packages/cypress/videos -type f -mtime +2 -delete 2>/dev/null || true
# Clean old webpack logs (>2 days old)
find /tmp -name "webpack_*.log" -type f -mtime +2 -delete 2>/dev/null || true
# Note: ~/.cache/Cypress is managed by actions/cache and should not be cleaned here
# to avoid removing the Cypress binary on shared self-hosted runners
# Clean old temporary yaml files (>2 days old)
find /tmp -name "cypress-yaml-*.yaml" -type f -mtime +2 -delete 2>/dev/null || true
# Clean empty directories
find ${{ github.workspace }}/packages/cypress/results -type d -empty -delete 2>/dev/null || true
find ${{ github.workspace }}/packages/cypress/screenshots -type d -empty -delete 2>/dev/null || true
find ${{ github.workspace }}/packages/cypress/videos -type d -empty -delete 2>/dev/null || true
echo "βœ… Cleanup complete (non-critical, continued on any errors)"
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ github.event.workflow_run.head_sha || github.sha }}
- name: Restore npm dependencies cache
uses: actions/cache/restore@v4
id: npm-cache
with:
path: |
~/.cache/Cypress
**/node_modules
key: ${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-${{ env.NODE_VERSION }}-all-modules-
- name: Setup Node.js ${{ env.NODE_VERSION }}
if: steps.npm-cache.outputs.cache-hit != 'true'
uses: actions/setup-node@v4.3.0
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
if: steps.npm-cache.outputs.cache-hit != 'true'
run: npm ci
- name: Restore turbo build artifacts cache
uses: actions/cache/restore@v4
with:
path: ${{ github.workspace }}/.turbo
key: ${{ runner.os }}-${{ env.NODE_VERSION }}-turbo-${{ github.sha }}-e2e
restore-keys: |
${{ runner.os }}-${{ env.NODE_VERSION }}-turbo-
- name: Restore OpenShift CLI tarball cache
uses: actions/cache/restore@v4
id: oc-cache
with:
path: ${{ runner.temp }}/oc.tar.gz
key: ${{ runner.os }}-oc-tarball-${{ env.OC_VERSION || '4.15.0' }}
- name: Download test configuration
run: |
echo "πŸ”§ Downloading test configuration from GitLab..."
curl -fk -H "Authorization: Bearer ${{ secrets.GITLAB_TOKEN }}" \
"${{ secrets.GITLAB_TEST_VARS_URL }}" \
-o ${{ github.workspace }}/packages/cypress/test-variables.yml
echo "βœ… Downloaded test configuration"
- name: Login to OpenShift cluster
env:
OC_SERVER_PRIMARY: ${{ secrets.OC_SERVER_PRIMARY }}
OC_SERVER_SECONDARY: ${{ secrets.OC_SERVER }}
run: |
TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml"
# Extract credentials based on test type
if [[ "${{ matrix.tag }}" == "@NonAdmin" ]]; then
echo "πŸ”‘ Using non-admin credentials (TEST_USER_3) for @NonAdmin tests"
OC_USERNAME=$(grep -A 10 "^TEST_USER_3:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ')
OC_PASSWORD=$(grep -A 10 "^TEST_USER_3:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ')
else
OC_USERNAME=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "USERNAME:" | head -1 | sed 's/.*USERNAME: //' | tr -d ' ')
OC_PASSWORD=$(grep -A 10 "^OCP_ADMIN_USER:" "$TEST_VARS_FILE" | grep "PASSWORD:" | head -1 | sed 's/.*PASSWORD: //' | tr -d ' ')
fi
echo "::add-mask::$OC_PASSWORD"
echo "::add-mask::$OC_USERNAME"
# Look up server URL based on selected cluster (avoids GitHub secret masking in outputs)
if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then
CLUSTER_URL="$OC_SERVER_PRIMARY"
elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then
CLUSTER_URL="$OC_SERVER_SECONDARY"
else
echo "❌ Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2
echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2
exit 1
fi
if [ -z "$CLUSTER_URL" ]; then
echo "❌ CLUSTER_URL is empty for cluster '$CLUSTER_NAME'" >&2
echo "Check that OC_SERVER_PRIMARY/OC_SERVER secrets are configured" >&2
exit 1
fi
echo "Logging in to OpenShift cluster ($CLUSTER_NAME)..."
oc login -u "$OC_USERNAME" -p "$OC_PASSWORD" --server="$CLUSTER_URL" --insecure-skip-tls-verify > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "βœ… Successfully logged in to $CLUSTER_NAME"
else
echo "❌ Failed to login to OpenShift cluster"
exit 1
fi
echo "KUBECONFIG=$HOME/.kube/config" >> $GITHUB_ENV
- name: Override namespace values
env:
DASHBOARD_URL_PRIMARY: ${{ secrets.ODH_DASHBOARD_URL_PRIMARY }}
DASHBOARD_URL_SECONDARY: ${{ secrets.ODH_DASHBOARD_URL }}
ODH_NAMESPACES: ${{ secrets.ODH_NAMESPACES }}
run: |
TEST_VARS_FILE="${{ github.workspace }}/packages/cypress/test-variables.yml"
# Look up dashboard URL based on selected cluster (secrets passed as step-level env for security)
if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then
DASHBOARD_URL="$DASHBOARD_URL_PRIMARY"
elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then
DASHBOARD_URL="$DASHBOARD_URL_SECONDARY"
else
echo "❌ Unknown or empty CLUSTER_NAME: '$CLUSTER_NAME'" >&2
echo "Expected 'dash-e2e-int' or 'dash-e2e'" >&2
exit 1
fi
if [ -z "$DASHBOARD_URL" ]; then
echo "❌ DASHBOARD_URL is empty for cluster '$CLUSTER_NAME'" >&2
echo "Check that ODH_DASHBOARD_URL_PRIMARY/ODH_DASHBOARD_URL secrets are configured" >&2
exit 1
fi
# Mask dashboard URL to prevent exposure in logs
echo "::add-mask::$DASHBOARD_URL"
# Set dashboard URL for selected cluster
sed -i "s|^ODH_DASHBOARD_URL:.*|ODH_DASHBOARD_URL: $DASHBOARD_URL|" "$TEST_VARS_FILE"
# Export dashboard host (without protocol) for webpack ODH_DASHBOARD_HOST
DASHBOARD_HOST=$(echo "$DASHBOARD_URL" | sed -E 's|https?://||' | sed 's|/.*||')
echo "::add-mask::$DASHBOARD_HOST"
echo "DASHBOARD_HOST=$DASHBOARD_HOST" >> $GITHUB_ENV
if [ -z "$ODH_NAMESPACES" ]; then
echo "⚠️ ODH_NAMESPACES secret not set, skipping namespace override"
exit 0
fi
echo "::add-mask::$ODH_NAMESPACES"
echo "πŸ“ Overriding namespaces with ODH values..."
IFS=',' read -r OPERATOR_NS APPLICATIONS_NS NOTEBOOKS_NS OPERATOR_NAME PROJECT_NAME <<< "$ODH_NAMESPACES"
sed -i "s|^PRODUCT:.*|PRODUCT: ODH|" "$TEST_VARS_FILE"
sed -i "s|^OPERATOR_NAMESPACE:.*|OPERATOR_NAMESPACE: $OPERATOR_NS|" "$TEST_VARS_FILE"
sed -i "s|^APPLICATIONS_NAMESPACE:.*|APPLICATIONS_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE"
sed -i "s|^MONITORING_NAMESPACE:.*|MONITORING_NAMESPACE: $APPLICATIONS_NS|" "$TEST_VARS_FILE"
sed -i "s|^NOTEBOOKS_NAMESPACE:.*|NOTEBOOKS_NAMESPACE: $NOTEBOOKS_NS|" "$TEST_VARS_FILE"
sed -i "s|^OPERATOR_NAME:.*|OPERATOR_NAME: $OPERATOR_NAME|" "$TEST_VARS_FILE"
sed -i "s|^ODH_DASHBOARD_PROJECT_NAME:.*|ODH_DASHBOARD_PROJECT_NAME: $PROJECT_NAME|" "$TEST_VARS_FILE"
echo "βœ… Namespace configuration updated"
- name: Set test configuration
run: |
echo "CY_TEST_CONFIG=${{ github.workspace }}/packages/cypress/test-variables.yml" >> $GITHUB_ENV
- name: Start Cypress Server
run: |
echo "🧹 Cleaning up port ${WEBPACK_PORT}..."
PORT_INFO_DIR="/tmp/gha-ports"
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
CURRENT_RUN_ID="${{ github.run_id }}"
# Check if port is in use
if lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; then
# Check if there's a run_id file for this port
if [ -f "$PORT_INFO_FILE" ]; then
PORT_OWNER_RUN_ID=$(cat "$PORT_INFO_FILE")
if [ "$PORT_OWNER_RUN_ID" != "$CURRENT_RUN_ID" ]; then
echo "⚠️ Port ${WEBPACK_PORT} is owned by different run_id: $PORT_OWNER_RUN_ID"
echo "⚠️ This port is in use by another workflow run - will not kill it"
# Try to find an alternative port
for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do
if ! lsof -i:${alt_port} > /dev/null 2>&1; then
WEBPACK_PORT=$alt_port
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "βœ… Found alternative port: ${WEBPACK_PORT}"
break
fi
done
else
echo "βœ… Port ${WEBPACK_PORT} is owned by this run - safe to clean up"
fi
else
# No run_id file - check if process is from a recent GitHub Actions run
PORT_PID=$(lsof -ti:${WEBPACK_PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
# Check if process is from a GitHub Actions workflow
if ps -p "$PORT_PID" -o command= 2>/dev/null | grep -q "webpack.*serve\|node.*40[0-9][0-9]"; then
echo "⚠️ Port ${WEBPACK_PORT} in use by potential GHA process (PID: $PORT_PID)"
echo "⚠️ Being cautious - will not kill without run_id confirmation"
# Find alternative port
for alt_port in $(seq $((WEBPACK_PORT + 5)) $((WEBPACK_PORT + 50)) 5); do
if ! lsof -i:${alt_port} > /dev/null 2>&1; then
WEBPACK_PORT=$alt_port
PORT_INFO_FILE="$PORT_INFO_DIR/port-${WEBPACK_PORT}.run_id"
echo "βœ… Found alternative port: ${WEBPACK_PORT}"
break
fi
done
else
echo "⚠️ Port ${WEBPACK_PORT} in use by non-GHA process - cleaning up"
kill -9 "$PORT_PID" 2>/dev/null || true
fi
fi
fi
fi
# Verify port is free with retry logic
RETRY_COUNT=0
while lsof -i:${WEBPACK_PORT} > /dev/null 2>&1; do
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -gt 10 ]; then
echo "❌ Port ${WEBPACK_PORT} still in use after cleanup!"
lsof -i:${WEBPACK_PORT}
exit 1
fi
echo "⏳ Retrying cleanup... (attempt $RETRY_COUNT/10)"
sleep 2
done
# Claim the port with our run_id
mkdir -p "$PORT_INFO_DIR"
echo "$CURRENT_RUN_ID" > "$PORT_INFO_FILE"
echo "WEBPACK_PORT=$WEBPACK_PORT" >> $GITHUB_ENV
echo "PORT_INFO_FILE=$PORT_INFO_FILE" >> $GITHUB_ENV
echo "βœ… Port ${WEBPACK_PORT} is free and claimed by run_id: $CURRENT_RUN_ID"
echo "πŸš€ Starting webpack dev server on port ${WEBPACK_PORT} ($CLUSTER_NAME)..."
# Start webpack with explicit dashboard host (ensures correct proxy target for all tests)
cd frontend && env ODH_DASHBOARD_HOST=${DASHBOARD_HOST} ODH_PORT=${WEBPACK_PORT} npm run start:dev:ext > /tmp/webpack_${WEBPACK_PORT}.log 2>&1 &
SERVER_PID=$!
echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
echo "$SERVER_PID" > "$PORT_INFO_DIR/port-${WEBPACK_PORT}.pid"
# Give server time to initialize
sleep 20
# Show filtered webpack status (hide sensitive cluster URLs)
if [ -f /tmp/webpack_${WEBPACK_PORT}.log ]; then
tail -20 /tmp/webpack_${WEBPACK_PORT}.log | \
grep -v "Dashboard host:" | \
grep -v "Proxy created:" | \
grep -v "Logged in as user:" | \
grep -v "Using project:" || true
fi
- name: Install make for BFF builds
if: needs.get-test-tags.outputs.bff_packages != '[]'
run: |
if ! command -v make &> /dev/null; then
echo "πŸ“¦ Installing make..."
sudo dnf install -y make 2>/dev/null || sudo yum install -y make 2>/dev/null || sudo apt-get install -y make 2>/dev/null
else
echo "βœ… make already available: $(make --version | head -1)"
fi
- name: Setup Go for BFF builds
if: needs.get-test-tags.outputs.bff_packages != '[]'
uses: actions/setup-go@v5
with:
go-version: '1.24'
cache: false
- name: Start BFF Services
if: needs.get-test-tags.outputs.bff_packages != '[]'
env:
BFF_PACKAGES: ${{ needs.get-test-tags.outputs.bff_packages }}
run: |
echo "πŸš€ Starting BFF services for changed packages..."
BFF_INFO_DIR="/tmp/gha-bff/${{ github.run_id }}"
mkdir -p "$BFF_INFO_DIR"
# Allowlists to prevent command injection from PR code (CWE-94)
ALLOWED_COMMANDS=("make dev-bff-e2e-mock" "make dev-bff-e2e-cluster" "make -C upstream dev-bff-e2e-mock" "make -C upstream dev-bff-e2e-cluster")
ALLOWED_DIRS=("automl" "autorag" "eval-hub" "gen-ai" "maas" "mlflow" "model-registry")
# Parse BFF packages JSON and start each one
echo "$BFF_PACKAGES" | jq -c '.[]' | while read -r bff_config; do
BFF_NAME=$(echo "$bff_config" | jq -r '.name')
BFF_DIR=$(echo "$bff_config" | jq -r '.dir')
BFF_PORT=$(echo "$bff_config" | jq -r '.port')
BFF_HEALTH=$(echo "$bff_config" | jq -r '.healthEndpoint')
BFF_CMD=$(echo "$bff_config" | jq -r '.startCommand')
echo ""
echo "πŸ“¦ Starting BFF for $BFF_NAME on port $BFF_PORT..."
# Validate BFF_CMD against allowlist (prevent command injection)
CMD_VALID=false
for allowed in "${ALLOWED_COMMANDS[@]}"; do
if [ "$BFF_CMD" = "$allowed" ]; then
CMD_VALID=true
break
fi
done
if [ "$CMD_VALID" = "false" ]; then
echo " ❌ Rejected untrusted startCommand: '$BFF_CMD'"
echo " Allowed commands: ${ALLOWED_COMMANDS[*]}"
exit 1
fi
# Validate BFF_DIR against allowlist (prevent path traversal)
DIR_VALID=false
for allowed in "${ALLOWED_DIRS[@]}"; do
if [ "$BFF_DIR" = "$allowed" ]; then
DIR_VALID=true
break
fi
done
if [ "$DIR_VALID" = "false" ]; then
echo " ❌ Rejected untrusted package directory: '$BFF_DIR'"
echo " Allowed directories: ${ALLOWED_DIRS[*]}"
exit 1
fi
# Validate port is numeric and health endpoint is a safe path
if ! echo "$BFF_PORT" | grep -qE '^[0-9]+$'; then
echo " ❌ Invalid BFF port: '$BFF_PORT' (must be numeric)"
exit 1
fi
if ! echo "$BFF_HEALTH" | grep -qE '^/[a-zA-Z0-9/_-]+$'; then
echo " ❌ Invalid health endpoint: '$BFF_HEALTH' (must be a simple path)"
exit 1
fi
# Reuse if already healthy on this port (avoid parallel startup race)
if curl -sf "http://localhost:$BFF_PORT$BFF_HEALTH" > /dev/null 2>&1; then
echo " ♻️ BFF $BFF_NAME already healthy on $BFF_PORT, reusing existing process"
continue
fi
# Fail fast if port is occupied but unhealthy
if lsof -ti:"$BFF_PORT" > /dev/null 2>&1; then
echo " ❌ Port $BFF_PORT already in use and health check failed"
lsof -i:"$BFF_PORT" || true
exit 1
fi
# Start BFF in background with configurable port
cd "packages/$BFF_DIR"
E2E_BFF_PORT=$BFF_PORT $BFF_CMD > "/tmp/bff_${BFF_DIR}.log" 2>&1 &
BFF_PID=$!
cd - > /dev/null
# Save PID for cleanup
echo "$BFF_PID" >> "$BFF_INFO_DIR/pids.txt"
echo "$BFF_PORT" >> "$BFF_INFO_DIR/ports.txt"
echo " πŸ“ PID: $BFF_PID"
# Wait for health check (max 60 seconds)
echo " ⏳ Waiting for health at localhost:$BFF_PORT$BFF_HEALTH..."
for i in {1..30}; do
if curl -sf "http://localhost:$BFF_PORT$BFF_HEALTH" > /dev/null 2>&1; then
echo " βœ… BFF $BFF_NAME is healthy!"
break
fi
if [ $i -eq 30 ]; then
echo " ❌ BFF $BFF_NAME failed health check after 60 seconds"
echo " πŸ“‹ BFF logs:"
tail -50 "/tmp/bff_${BFF_DIR}.log" || true
exit 1
fi
sleep 2
done
done
echo ""
echo "βœ… All BFF services started successfully"
- name: Wait for Server Ready
run: |
echo "⏳ Waiting for localhost:${WEBPACK_PORT} to be ready..."
npx wait-on http://localhost:${WEBPACK_PORT} --timeout 120000
# Verify the application loads with dashboard content
for i in {1..10}; do
if curl -s -f http://localhost:${WEBPACK_PORT}/ | grep -q "Data Science Projects\|ODH\|Open Data Hub\|Dashboard"; then
echo "βœ… Server is ready and application is loaded!"
break
fi
if [ $i -lt 10 ]; then
echo "⏳ Waiting for application to load... (attempt $i/10)"
sleep 8
else
echo "❌ Application failed to load properly after 10 attempts"
exit 1
fi
done
- name: Run E2E Tests
env:
OC_SERVER_PRIMARY: ${{ secrets.OC_SERVER_PRIMARY }}
OC_SERVER_SECONDARY: ${{ secrets.OC_SERVER }}
run: |
cd frontend
echo "πŸ§ͺ Running E2E tests for ${{ matrix.tag }}..."
echo "πŸš€ Running tests against live dashboard on port ${WEBPACK_PORT}"
echo "πŸ“Œ Tag source: ${{ needs.get-test-tags.outputs.source }}"
export CY_RESULTS_DIR="${{ github.workspace }}/packages/cypress/results/${{ matrix.tag }}"
mkdir -p "$CY_RESULTS_DIR"
# Determine OC_SERVER based on cluster (for oc user switching in tests)
if [ "$CLUSTER_NAME" = "dash-e2e-int" ]; then
OC_SERVER="$OC_SERVER_PRIMARY"
elif [ "$CLUSTER_NAME" = "dash-e2e" ]; then
OC_SERVER="$OC_SERVER_SECONDARY"
else
echo "⚠️ Unknown cluster: $CLUSTER_NAME, defaulting to OC_SERVER_PRIMARY"
OC_SERVER="$OC_SERVER_PRIMARY"
fi
# Set IS_NON_ADMIN_RUN flag for non-admin tests to skip admin-only setup hooks
EXTRA_CYPRESS_ENV="OC_SERVER=${OC_SERVER},"
if [[ "${{ matrix.tag }}" == "@NonAdmin" ]]; then
EXTRA_CYPRESS_ENV="${EXTRA_CYPRESS_ENV}IS_NON_ADMIN_RUN=true,"
echo "πŸ” Running in non-admin mode - admin setup hooks will be skipped"
fi
BASE_URL=http://localhost:${WEBPACK_PORT} npm run cypress:run:chrome -- \
--env ${EXTRA_CYPRESS_ENV}skipTags="@Bug @Maintain @NonConcurrent",grepTags="${{ matrix.tag }}",grepFilterSpecs=true \
--config video=true,screenshotsFolder="$CY_RESULTS_DIR/screenshots",videosFolder="$CY_RESULTS_DIR/videos"
- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: e2e-results-${{ matrix.tag }}
path: |
packages/cypress/results/
packages/cypress/videos/
packages/cypress/screenshots/
retention-days: 7
- name: Log test completion
if: always()
run: |
echo "🏁 E2E Test completed!"
echo "Status: ${{ job.status }}"
echo "Test Tag: ${{ matrix.tag }}"
echo "Cluster: $CLUSTER_NAME"
echo "Run ID: ${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Final Status - Update PR with test results
# ---------------------------------------------------------------------------
set-final-status:
needs: [select-cluster, e2e-tests]
if: >-
always() &&
(github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success'))
runs-on: ubuntu-latest
steps:
- name: Set final status
env:
GH_TOKEN: ${{ github.token }}
run: |
E2E_RESULT="${{ needs.e2e-tests.result }}"
CLUSTER_RESULT="${{ needs.select-cluster.result }}"
CLUSTER="${{ needs.select-cluster.outputs.cluster_name }}"
echo "πŸ“Š Job results: select-cluster=$CLUSTER_RESULT, e2e-tests=$E2E_RESULT"
# Handle cluster selection failure first
if [[ "$CLUSTER_RESULT" == "failure" ]]; then
STATE="failure"
DESC="Cluster health check failed - no healthy cluster available"
elif [[ "$E2E_RESULT" == "success" ]]; then
STATE="success"
DESC="All tests passed on $CLUSTER"
elif [[ "$E2E_RESULT" == "cancelled" ]]; then
STATE="error"
DESC="Tests cancelled"
elif [[ "$E2E_RESULT" == "skipped" && "$CLUSTER_RESULT" == "skipped" ]]; then
# Both skipped means test.yml failed - don't post status
echo "Both jobs skipped (test.yml likely failed) - not posting status"
exit 0
elif [[ "$E2E_RESULT" == "skipped" ]]; then
STATE="failure"
DESC="Tests skipped due to upstream failure"
else
STATE="failure"
DESC="Tests failed on ${CLUSTER:-unknown cluster}"
fi
echo "πŸ“ Posting status: state=$STATE, description=$DESC"
gh api repos/${{ github.repository }}/statuses/${{ github.event.workflow_run.head_sha || github.sha }} \
-f state="$STATE" \
-f context="Cypress E2E Tests" \
-f description="$DESC" \
-f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# ---------------------------------------------------------------------------
# Cleanup - Stop all servers started by this workflow run
# ---------------------------------------------------------------------------
cleanup-server:
needs: [e2e-tests]
runs-on: self-hosted
if: ${{ always() && (github.event_name == 'workflow_dispatch' || (github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success')) }}
steps:
- name: Stop BFF Services
run: |
echo "πŸ›‘ Stopping BFF services for run_id: ${{ github.run_id }}..."
BFF_INFO_DIR="/tmp/gha-bff/${{ github.run_id }}"
BFF_KILLED_COUNT=0
# Check if this workflow started any BFFs
if [ -d "$BFF_INFO_DIR" ]; then
# Kill all BFF processes
if [ -f "$BFF_INFO_DIR/pids.txt" ]; then
while read -r pid; do
if ps -p "$pid" > /dev/null 2>&1; then
echo " πŸ›‘ Killing BFF process $pid"
pkill -P "$pid" 2>/dev/null || true
kill "$pid" 2>/dev/null || true
BFF_KILLED_COUNT=$((BFF_KILLED_COUNT + 1))
fi
done < "$BFF_INFO_DIR/pids.txt"
fi
# Clean up BFF ports
if [ -f "$BFF_INFO_DIR/ports.txt" ]; then
while read -r port; do
BFF_PID=$(lsof -ti:${port} 2>/dev/null | head -1)
if [ -n "$BFF_PID" ]; then
echo " πŸ›‘ Killing process on BFF port $port (PID: $BFF_PID)"
pkill -P "$BFF_PID" 2>/dev/null || true
kill "$BFF_PID" 2>/dev/null || true
fi
done < "$BFF_INFO_DIR/ports.txt"
fi
# Clean up info files
rm -rf "$BFF_INFO_DIR"
fi
if [ $BFF_KILLED_COUNT -eq 0 ]; then
echo "βœ… No BFF processes found for run_id: ${{ github.run_id }}"
else
echo "βœ… Cleaned up $BFF_KILLED_COUNT BFF process(es) for run_id: ${{ github.run_id }}"
fi
- name: Stop Cypress Servers
run: |
echo "πŸ›‘ Stopping webpack dev server for run_id: ${{ github.run_id }}..."
PORT_INFO_DIR="/tmp/gha-ports"
CURRENT_RUN_ID="${{ github.run_id }}"
KILLED_COUNT=0
# Find all port files owned by this run_id
if [ -d "$PORT_INFO_DIR" ]; then
for port_file in "$PORT_INFO_DIR"/port-*.run_id; do
if [ -f "$port_file" ]; then
PORT_OWNER_RUN_ID=$(cat "$port_file")
if [ "$PORT_OWNER_RUN_ID" = "$CURRENT_RUN_ID" ]; then
# Extract port number from filename
PORT=$(basename "$port_file" | sed 's/port-\([0-9]*\)\.run_id/\1/')
PID_FILE="$PORT_INFO_DIR/port-${PORT}.pid"
# Kill process if PID file exists
if [ -f "$PID_FILE" ]; then
PID=$(cat "$PID_FILE")
if ps -p "$PID" > /dev/null 2>&1; then
echo "πŸ›‘ Killing process $PID on port $PORT (run_id: $CURRENT_RUN_ID)"
pkill -P "$PID" 2>/dev/null || true
kill "$PID" 2>/dev/null || true
KILLED_COUNT=$((KILLED_COUNT + 1))
fi
fi
# Also kill any process on this port (double-check)
PORT_PID=$(lsof -ti:${PORT} 2>/dev/null | head -1)
if [ -n "$PORT_PID" ]; then
echo "πŸ›‘ Killing process $PORT_PID on port $PORT"
pkill -P "$PORT_PID" 2>/dev/null || true
kill "$PORT_PID" 2>/dev/null || true
fi
# Clean up orphaned Chrome processes
ALL_PORT_PIDS=$(lsof -ti:${PORT} 2>/dev/null || true)
if [ -n "$ALL_PORT_PIDS" ]; then
for port_pid in $ALL_PORT_PIDS; do
if ps -p "$port_pid" -o comm= 2>/dev/null | grep -qE "chrome|chromium"; then
echo "πŸ›‘ Killing Chrome process $port_pid (using port $PORT)"
pkill -P "$port_pid" 2>/dev/null || true
kill "$port_pid" 2>/dev/null || true
fi
done
fi
# Clean up port info files
rm -f "$port_file" "$PID_FILE"
fi
fi
done
fi
# Clean up stale port files older than 24 hours
find "$PORT_INFO_DIR" -name "*.run_id" -mtime +1 -delete 2>/dev/null || true
find "$PORT_INFO_DIR" -name "*.pid" -mtime +1 -delete 2>/dev/null || true
if [ $KILLED_COUNT -eq 0 ]; then
echo "βœ… No processes found for run_id: $CURRENT_RUN_ID"
else
echo "βœ… Cleaned up $KILLED_COUNT process(es) for run_id: $CURRENT_RUN_ID"
fi
- name: Clean Work Directories and Build Artifacts
run: |
echo "🧹 Starting comprehensive cleanup (SAFE for parallel jobs)..."
RUNNER_USER=$(whoami)
HOME_DIR=$(eval echo "~$RUNNER_USER")
CLEANED_SPACE=0
echo "πŸ“Š Disk usage before cleanup:"
df -h / | grep -v Filesystem
# Helper function to check if directory is in use
is_directory_in_use() {
local dir="$1"
# Check if any processes are using this directory
if lsof +D "$dir" 2>/dev/null | grep -qE "node|npm|webpack|chrome|cypress"; then
return 0 # In use
fi
return 1 # Not in use
}
# 1. Clean Go upstream build artifacts (only in old work dirs)
echo ""
echo "πŸ—‘οΈ Cleaning Go upstream builds (in work dirs >7 days, not in use)..."
UPSTREAM_COUNT=0
find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +7 2>/dev/null | while read work_dir; do
if [ -d "$work_dir" ] && ! is_directory_in_use "$work_dir"; then
UPSTREAM_DIRS=$(find "$work_dir" -type d -path "*/packages/*/upstream" 2>/dev/null || true)
for dir in $UPSTREAM_DIRS; do
if [ -d "$dir" ]; then
SIZE=$(du -sm "$dir" 2>/dev/null | cut -f1)
rm -rf "$dir" 2>/dev/null || true
CLEANED_SPACE=$((CLEANED_SPACE + SIZE))
UPSTREAM_COUNT=$((UPSTREAM_COUNT + 1))
fi
done
fi
done
echo " βœ… Cleaned $UPSTREAM_COUNT upstream build directories"
# 2. Clean old work directories (>7 days, not in use)
echo ""
echo "πŸ—‘οΈ Cleaning old work directories (>7 days, not in use)..."
OLD_WORK_COUNT=0
SKIPPED_COUNT=0
find "$HOME_DIR"/actions-runner*/_work -maxdepth 1 -name "odh-dashboard" -type d -mtime +7 2>/dev/null | while read work_dir; do
if [ -d "$work_dir" ]; then
if is_directory_in_use "$work_dir"; then
echo " ⏭️ Skipped (in use): $(basename $(dirname $work_dir))"
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
else
SIZE=$(du -sm "$work_dir" 2>/dev/null | cut -f1)
rm -rf "$work_dir" 2>/dev/null || true
CLEANED_SPACE=$((CLEANED_SPACE + SIZE))
OLD_WORK_COUNT=$((OLD_WORK_COUNT + 1))
echo " βœ… Cleaned: $(basename $(dirname $work_dir)) (~${SIZE}MB)"
fi
fi
done
echo " βœ… Cleaned $OLD_WORK_COUNT old work directories"
if [ "$SKIPPED_COUNT" -gt 0 ]; then
echo " ⏭️ Skipped $SKIPPED_COUNT directories (in use by parallel jobs)"
fi
# 3. Clean old runner diagnostic logs (>7 days)
echo ""
echo "πŸ—‘οΈ Cleaning old runner logs (>7 days)..."
LOG_COUNT=$(find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +7 -type f 2>/dev/null | wc -l)
find "$HOME_DIR"/actions-runner*/_diag -name "*.log" -mtime +7 -delete 2>/dev/null || true
echo " βœ… Cleaned $LOG_COUNT old log files"
# 4. Clean old Cypress artifacts (>7 days only - safe even if parallel jobs running)
echo ""
echo "πŸ—‘οΈ Cleaning old Cypress artifacts (>7 days)..."
ARTIFACT_COUNT=0
for runner_dir in "$HOME_DIR"/actions-runner*; do
# Screenshots
if [ -d "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" ]; then
SCREENSHOTS=$(find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" -type f -mtime +7 2>/dev/null | wc -l)
find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/screenshots" -type f -mtime +7 -delete 2>/dev/null || true
ARTIFACT_COUNT=$((ARTIFACT_COUNT + SCREENSHOTS))
fi
# Videos
if [ -d "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" ]; then
VIDEOS=$(find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" -type f -mtime +7 2>/dev/null | wc -l)
find "$runner_dir/_work/odh-dashboard/odh-dashboard/packages/cypress/results/videos" -type f -mtime +7 -delete 2>/dev/null || true
ARTIFACT_COUNT=$((ARTIFACT_COUNT + VIDEOS))
fi
done
echo " βœ… Cleaned $ARTIFACT_COUNT old Cypress artifacts"
# 5. Check disk space after cleanup
echo ""
echo "πŸ“Š Disk usage after cleanup:"
df -h / | grep -v Filesystem
echo ""
echo "πŸ’Ύ Total space cleaned: ~${CLEANED_SPACE}MB"
echo "βœ… Cleanup complete!"