Skip to content

Merge pull request #87 from shruthis4/updateParams #121

Merge pull request #87 from shruthis4/updateParams

Merge pull request #87 from shruthis4/updateParams #121

# ============================================================================
# OpenShift Docling E2E Test Workflow (EC2 Runner)
# ============================================================================
#
# This workflow tests the Spark operator using Kustomize manifests and
# shell-based E2E tests.
#
# Tests performed:
# 1. Installs Spark operator via Kustomize (config/default/)
# 2. Verifies fsGroup is NOT 185 (OpenShift security requirement)
# 3. Runs the docling-spark-app workload from examples/openshift/k8s/
#
# Infrastructure:
# - Uses self-hosted EC2 runners for larger disk space and resources
# - EC2 instances are started on-demand and terminated after tests
#
# Triggers:
# - Manual (workflow_dispatch)
# - Push to main branch when OpenShift-related files change
#
# ============================================================================
name: OpenShift Docling E2E Test
on:
# Manual trigger - allows running from GitHub Actions UI or CLI
workflow_dispatch:
inputs:
k8s_version:
description: "Kubernetes version to test"
required: false
default: "v1.30.8"
# Automatic trigger on push to release branches
push:
branches:
- main
paths:
- 'examples/openshift/**'
- 'config/**'
- '.github/workflows/openshift-docling-e2e.yaml'
env:
INSTANCE_TYPE: "m5.xlarge"
# We don't need anything other than Bash for our shell
defaults:
run:
shell: bash
# Cancel in-progress runs for the same branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
# ============================================================================
# Job 1: Launch EC2 Runner
# ============================================================================
launch-ec2-runner:
runs-on: ubuntu-latest
permissions:
id-token: write # Required for OIDC (AWS auth)
contents: read
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
with:
role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}"
aws-region: us-east-2
role-session-name: odh-spark-operator # For tracking in CloudTrail
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3
with:
mode: start
github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}"
ec2-instance-type: "${{ env.INSTANCE_TYPE }}"
availability-zones-config: >
[
{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2A_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"},
{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2B_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"},
{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2C_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}
]
block-device-mappings: >
[
{"DeviceName": "/dev/sda1", "Ebs": {"VolumeSize": 150}}
]
aws-resource-tags: >
[
{"Key": "Name", "Value": "spark-operator-gh-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number || 'N/A' }}"}
]
# ============================================================================
# Job 2: OpenShift E2E Tests (runs on EC2)
# ============================================================================
openshift-e2e:
name: OpenShift E2E Tests
needs:
- launch-ec2-runner
runs-on: ${{ needs.launch-ec2-runner.outputs.label }}
env:
K8S_VERSION: ${{ inputs.k8s_version || 'v1.30.8' }}
# Home and Kubernetes config (EC2 runner may not have HOME set)
HOME: ${{ github.workspace }}/home
KUBECONFIG: ${{ github.workspace }}/home/.kube/config
steps:
- name: Setup Environment
run: |
echo "Running on EC2 ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}"
echo "Instance type: ${{ env.INSTANCE_TYPE }}"
cat /etc/os-release
df -h
# Create home and kubeconfig directories
mkdir -p $HOME/.kube
echo "HOME=$HOME"
echo "KUBECONFIG=$KUBECONFIG"
# ======================================================================
# Step 1: Checkout the repository
# ======================================================================
- name: Checkout source code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
with:
fetch-depth: 0
# ======================================================================
# Step 2: Install dependencies (docker, make, kubectl, kind)
# The EC2 runner AMI doesn't have these pre-installed
# ======================================================================
- name: Install dependencies
run: |
set -eou pipefail
echo "Installing Docker CE..."
# Install required packages
sudo yum install -y yum-utils
# Add Docker's official repository
sudo yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo
# Install Docker CE
sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# Start and enable Docker
sudo systemctl start docker
sudo systemctl enable docker
# Allow docker commands without sudo for this session
sudo chmod 666 /var/run/docker.sock
docker --version
echo "Installing build tools..."
sudo yum install -y make git
echo "Installing kubectl..."
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
echo "Installing Kind..."
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.31.0/kind-linux-amd64
sudo install -o root -g root -m 0755 kind /usr/local/bin/kind
kind version
echo "All dependencies installed successfully!"
# ======================================================================
# Step 3: Free up disk space
# The docling-spark image is very large (~9.5GB), need space
# ======================================================================
- name: Free up disk space
run: |
echo "=== Disk space BEFORE cleanup ==="
df -h
echo ""
echo "Removing unused Docker resources..."
docker system prune -af --volumes || true
echo ""
echo "=== Disk space AFTER cleanup ==="
df -h
# ======================================================================
# Step 4: Create Kind cluster, load docling image, upload test PDFs
# Uses the standardized make target for full setup
# ======================================================================
- name: Setup Kind cluster with docling image and test data
env:
K8S_VERSION: ${{ inputs.k8s_version || 'v1.30.8' }}
KIND_KUBE_CONFIG: ${{ env.KUBECONFIG }}
DOCLING_IMAGE: quay.io/rishasin/docling-spark@sha256:7e8431fc89dbc6c10aec1f0401aadd9c9cd66b9728fbcb98f6bf40ba3e3b4cdb
run: make -C examples/openshift kind-setup-full
# ======================================================================
# Step 5: Verify cluster is ready
# ======================================================================
- name: Verify cluster
run: |
echo "Verifying Kind cluster is ready..."
kubectl cluster-info
kubectl get nodes
kubectl get pvc -n spark-operator
echo "Cluster is ready!"
# ======================================================================
# Step 6: Install Spark operator via Kustomize
# ======================================================================
- name: Install Spark operator
run: CLEANUP=false make -C examples/openshift operator-install
# ======================================================================
# Step 7: Run Docling Spark E2E test
# ======================================================================
- name: Run Docling Spark E2E test
run: CLEANUP=false make -C examples/openshift test-docling-spark
# ======================================================================
# Step 8: Download and show results
# Multi-output format produces per-PDF files:
# - {name}.md (markdown content)
# - {name}.json (docling JSON format)
# - {name}_metadata.json (processing metadata)
# - {name}_error.json (for failed PDFs)
# - summary.jsonl (combined summary)
# ======================================================================
- name: Download and show results
if: success()
run: |
echo "Downloading results from output PVC..."
mkdir -p ./output
cd examples/openshift
chmod +x k8s/deploy.sh
./k8s/deploy.sh download ../../output/
echo ""
echo "=== Output Directory Contents ==="
ls -la ../../output/
# Count different file types
MD_COUNT=$(find ../../output/ -maxdepth 1 -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
JSON_COUNT=$(find ../../output/ -maxdepth 1 -name "*.json" ! -name "*_error.json" ! -name "*_metadata.json" 2>/dev/null | wc -l | tr -d ' ')
METADATA_COUNT=$(find ../../output/ -maxdepth 1 -name "*_metadata.json" 2>/dev/null | wc -l | tr -d ' ')
ERROR_COUNT=$(find ../../output/ -maxdepth 1 -name "*_error.json" 2>/dev/null | wc -l | tr -d ' ')
echo ""
echo "=== File Summary ==="
echo "Markdown files (.md): $MD_COUNT"
echo "JSON files (.json): $JSON_COUNT"
echo "Metadata files: $METADATA_COUNT"
echo "Error files (*_error.json): $ERROR_COUNT"
# Check summary.jsonl
if [ -s ../../output/summary.jsonl ]; then
LINE_COUNT=$(wc -l < ../../output/summary.jsonl | tr -d ' ')
echo ""
echo "=== Processing Summary (summary.jsonl: $LINE_COUNT documents) ==="
python3 -c "
import sys, json
success_count = 0
fail_count = 0
for line in open('../../output/summary.jsonl'):
d = json.loads(line.strip())
doc_path = d.get('document_path', 'unknown')
# Extract just filename
doc_name = doc_path.split('/')[-1] if '/' in doc_path else doc_path
if d.get('success', False):
print(f' ✅ {doc_name}')
success_count += 1
else:
error_msg = d.get('error_message', 'Unknown error')
# Truncate long error messages
if error_msg and len(error_msg) > 80:
error_msg = error_msg[:77] + '...'
print(f' ❌ {doc_name}: {error_msg}')
fail_count += 1
print(f'\nTotal: {success_count} succeeded, {fail_count} failed')
"
fi
# Show sample markdown if any exist
if [ "$MD_COUNT" -gt 0 ]; then
echo ""
echo "=== Sample Markdown Output ==="
FIRST_MD=$(find ../../output/ -maxdepth 1 -name "*.md" | head -1)
echo "File: $(basename $FIRST_MD)"
echo "Size: $(wc -c < "$FIRST_MD" | tr -d ' ') bytes"
echo "Preview (first 500 chars):"
echo "---"
head -c 500 "$FIRST_MD"
echo ""
echo "---"
fi
# Show error details if any failures
if [ "$ERROR_COUNT" -gt 0 ]; then
echo ""
echo "=== Error Details ==="
for err_file in ../../output/*_error.json; do
if [ -f "$err_file" ]; then
echo "File: $(basename $err_file)"
python3 -c "
import json
with open('$err_file') as f:
d = json.load(f)
print(f' Source: {d.get(\"source_file\", \"unknown\")}')
err = d.get('error_message', 'No error message')
if len(err) > 200:
err = err[:197] + '...'
print(f' Error: {err}')
"
fi
done
fi
# Determine success/failure
echo ""
if [ -s ../../output/summary.jsonl ]; then
echo "✅ Download complete! Found $MD_COUNT markdown, $JSON_COUNT JSON, $ERROR_COUNT error files."
else
echo "❌ ERROR: summary.jsonl is empty or missing"
exit 1
fi
# ======================================================================
# Step 10: Collect logs on failure
# ======================================================================
- name: Collect logs on failure
if: failure()
run: |
echo "=== Pod Status ==="
kubectl get pods -A
echo ""
echo "=== Spark Operator Logs ==="
kubectl logs -n spark-operator -l app.kubernetes.io/name=spark-operator --tail=100 || true
echo ""
echo "=== Spark Operator Namespace Pods ==="
kubectl get pods -n spark-operator -o wide || true
echo ""
echo "=== Driver Pod Details ==="
kubectl describe pod -n spark-operator -l spark-role=driver || true
echo ""
echo "=== Driver Pod Logs ==="
kubectl logs -n spark-operator -l spark-role=driver --tail=100 || true
echo ""
echo "=== SparkApplication Status ==="
kubectl get sparkapplication -A -o yaml || true
echo ""
echo "=== Events in spark-operator namespace ==="
kubectl get events -n spark-operator --sort-by='.lastTimestamp' || true
echo ""
echo "=== All Recent Events ==="
kubectl get events -A --sort-by='.lastTimestamp' | tail -50
# ======================================================================
# Step 11: Cleanup Kind cluster
# ======================================================================
- name: Cleanup Kind cluster
if: always()
run: make -C examples/openshift kind-cleanup || true
# ============================================================================
# Job 3: Stop EC2 Runner
# ============================================================================
stop-ec2-runner:
permissions:
id-token: write # Required for OIDC (AWS auth)
contents: read
needs:
- launch-ec2-runner
- openshift-e2e
runs-on: ubuntu-latest
if: ${{ always() && needs.launch-ec2-runner.outputs.ec2-instance-id != '' }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
with:
role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}"
aws-region: us-east-2
role-session-name: odh-spark-operator # For tracking in CloudTrail
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3
with:
mode: stop
github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}"
label: ${{ needs.launch-ec2-runner.outputs.label }}
ec2-instance-id: ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}