Merge pull request #87 from shruthis4/updateParams #121
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ============================================================================ | |
| # OpenShift Docling E2E Test Workflow (EC2 Runner) | |
| # ============================================================================ | |
| # | |
| # This workflow tests the Spark operator using Kustomize manifests and | |
| # shell-based E2E tests. | |
| # | |
| # Tests performed: | |
| # 1. Installs Spark operator via Kustomize (config/default/) | |
| # 2. Verifies fsGroup is NOT 185 (OpenShift security requirement) | |
| # 3. Runs the docling-spark-app workload from examples/openshift/k8s/ | |
| # | |
| # Infrastructure: | |
| # - Uses self-hosted EC2 runners for larger disk space and resources | |
| # - EC2 instances are started on-demand and terminated after tests | |
| # | |
| # Triggers: | |
| # - Manual (workflow_dispatch) | |
| # - Push to main branch when OpenShift-related files change | |
| # | |
| # ============================================================================ | |
| name: OpenShift Docling E2E Test | |
| on: | |
| # Manual trigger - allows running from GitHub Actions UI or CLI | |
| workflow_dispatch: | |
| inputs: | |
| k8s_version: | |
| description: "Kubernetes version to test" | |
| required: false | |
| default: "v1.30.8" | |
| # Automatic trigger on push to release branches | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - 'examples/openshift/**' | |
| - 'config/**' | |
| - '.github/workflows/openshift-docling-e2e.yaml' | |
| env: | |
| INSTANCE_TYPE: "m5.xlarge" | |
| # We don't need anything other than Bash for our shell | |
| defaults: | |
| run: | |
| shell: bash | |
| # Cancel in-progress runs for the same branch | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| # ============================================================================ | |
| # Job 1: Launch EC2 Runner | |
| # ============================================================================ | |
| launch-ec2-runner: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| id-token: write # Required for OIDC (AWS auth) | |
| contents: read | |
| outputs: | |
| label: ${{ steps.start-ec2-runner.outputs.label }} | |
| ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 | |
| - name: Configure AWS Credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}" | |
| aws-region: us-east-2 | |
| role-session-name: odh-spark-operator # For tracking in CloudTrail | |
| - name: Start EC2 runner | |
| id: start-ec2-runner | |
| uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3 | |
| with: | |
| mode: start | |
| github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}" | |
| ec2-instance-type: "${{ env.INSTANCE_TYPE }}" | |
| availability-zones-config: > | |
| [ | |
| {"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2A_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}, | |
| {"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2B_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}, | |
| {"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2C_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"} | |
| ] | |
| block-device-mappings: > | |
| [ | |
| {"DeviceName": "/dev/sda1", "Ebs": {"VolumeSize": 150}} | |
| ] | |
| aws-resource-tags: > | |
| [ | |
| {"Key": "Name", "Value": "spark-operator-gh-runner"}, | |
| {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, | |
| {"Key": "GitHubRef", "Value": "${{ github.ref }}"}, | |
| {"Key": "GitHubPR", "Value": "${{ github.event.number || 'N/A' }}"} | |
| ] | |
| # ============================================================================ | |
| # Job 2: OpenShift E2E Tests (runs on EC2) | |
| # ============================================================================ | |
| openshift-e2e: | |
| name: OpenShift E2E Tests | |
| needs: | |
| - launch-ec2-runner | |
| runs-on: ${{ needs.launch-ec2-runner.outputs.label }} | |
| env: | |
| K8S_VERSION: ${{ inputs.k8s_version || 'v1.30.8' }} | |
| # Home and Kubernetes config (EC2 runner may not have HOME set) | |
| HOME: ${{ github.workspace }}/home | |
| KUBECONFIG: ${{ github.workspace }}/home/.kube/config | |
| steps: | |
| - name: Setup Environment | |
| run: | | |
| echo "Running on EC2 ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}" | |
| echo "Instance type: ${{ env.INSTANCE_TYPE }}" | |
| cat /etc/os-release | |
| df -h | |
| # Create home and kubeconfig directories | |
| mkdir -p $HOME/.kube | |
| echo "HOME=$HOME" | |
| echo "KUBECONFIG=$KUBECONFIG" | |
| # ====================================================================== | |
| # Step 1: Checkout the repository | |
| # ====================================================================== | |
| - name: Checkout source code | |
| uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 | |
| with: | |
| fetch-depth: 0 | |
| # ====================================================================== | |
| # Step 2: Install dependencies (docker, make, kubectl, kind) | |
| # The EC2 runner AMI doesn't have these pre-installed | |
| # ====================================================================== | |
| - name: Install dependencies | |
| run: | | |
| set -eou pipefail | |
| echo "Installing Docker CE..." | |
| # Install required packages | |
| sudo yum install -y yum-utils | |
| # Add Docker's official repository | |
| sudo yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo | |
| # Install Docker CE | |
| sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin | |
| # Start and enable Docker | |
| sudo systemctl start docker | |
| sudo systemctl enable docker | |
| # Allow docker commands without sudo for this session | |
| sudo chmod 666 /var/run/docker.sock | |
| docker --version | |
| echo "Installing build tools..." | |
| sudo yum install -y make git | |
| echo "Installing kubectl..." | |
| curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
| sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl | |
| kubectl version --client | |
| echo "Installing Kind..." | |
| curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.31.0/kind-linux-amd64 | |
| sudo install -o root -g root -m 0755 kind /usr/local/bin/kind | |
| kind version | |
| echo "All dependencies installed successfully!" | |
| # ====================================================================== | |
| # Step 3: Free up disk space | |
| # The docling-spark image is very large (~9.5GB), need space | |
| # ====================================================================== | |
| - name: Free up disk space | |
| run: | | |
| echo "=== Disk space BEFORE cleanup ===" | |
| df -h | |
| echo "" | |
| echo "Removing unused Docker resources..." | |
| docker system prune -af --volumes || true | |
| echo "" | |
| echo "=== Disk space AFTER cleanup ===" | |
| df -h | |
| # ====================================================================== | |
| # Step 4: Create Kind cluster, load docling image, upload test PDFs | |
| # Uses the standardized make target for full setup | |
| # ====================================================================== | |
| - name: Setup Kind cluster with docling image and test data | |
| env: | |
| K8S_VERSION: ${{ inputs.k8s_version || 'v1.30.8' }} | |
| KIND_KUBE_CONFIG: ${{ env.KUBECONFIG }} | |
| DOCLING_IMAGE: quay.io/rishasin/docling-spark@sha256:7e8431fc89dbc6c10aec1f0401aadd9c9cd66b9728fbcb98f6bf40ba3e3b4cdb | |
| run: make -C examples/openshift kind-setup-full | |
| # ====================================================================== | |
| # Step 5: Verify cluster is ready | |
| # ====================================================================== | |
| - name: Verify cluster | |
| run: | | |
| echo "Verifying Kind cluster is ready..." | |
| kubectl cluster-info | |
| kubectl get nodes | |
| kubectl get pvc -n spark-operator | |
| echo "Cluster is ready!" | |
| # ====================================================================== | |
| # Step 6: Install Spark operator via Kustomize | |
| # ====================================================================== | |
| - name: Install Spark operator | |
| run: CLEANUP=false make -C examples/openshift operator-install | |
| # ====================================================================== | |
| # Step 7: Run Docling Spark E2E test | |
| # ====================================================================== | |
| - name: Run Docling Spark E2E test | |
| run: CLEANUP=false make -C examples/openshift test-docling-spark | |
| # ====================================================================== | |
| # Step 8: Download and show results | |
| # Multi-output format produces per-PDF files: | |
| # - {name}.md (markdown content) | |
| # - {name}.json (docling JSON format) | |
| # - {name}_metadata.json (processing metadata) | |
| # - {name}_error.json (for failed PDFs) | |
| # - summary.jsonl (combined summary) | |
| # ====================================================================== | |
| - name: Download and show results | |
| if: success() | |
| run: | | |
| echo "Downloading results from output PVC..." | |
| mkdir -p ./output | |
| cd examples/openshift | |
| chmod +x k8s/deploy.sh | |
| ./k8s/deploy.sh download ../../output/ | |
| echo "" | |
| echo "=== Output Directory Contents ===" | |
| ls -la ../../output/ | |
| # Count different file types | |
| MD_COUNT=$(find ../../output/ -maxdepth 1 -name "*.md" 2>/dev/null | wc -l | tr -d ' ') | |
| JSON_COUNT=$(find ../../output/ -maxdepth 1 -name "*.json" ! -name "*_error.json" ! -name "*_metadata.json" 2>/dev/null | wc -l | tr -d ' ') | |
| METADATA_COUNT=$(find ../../output/ -maxdepth 1 -name "*_metadata.json" 2>/dev/null | wc -l | tr -d ' ') | |
| ERROR_COUNT=$(find ../../output/ -maxdepth 1 -name "*_error.json" 2>/dev/null | wc -l | tr -d ' ') | |
| echo "" | |
| echo "=== File Summary ===" | |
| echo "Markdown files (.md): $MD_COUNT" | |
| echo "JSON files (.json): $JSON_COUNT" | |
| echo "Metadata files: $METADATA_COUNT" | |
| echo "Error files (*_error.json): $ERROR_COUNT" | |
| # Check summary.jsonl | |
| if [ -s ../../output/summary.jsonl ]; then | |
| LINE_COUNT=$(wc -l < ../../output/summary.jsonl | tr -d ' ') | |
| echo "" | |
| echo "=== Processing Summary (summary.jsonl: $LINE_COUNT documents) ===" | |
| python3 -c " | |
| import sys, json | |
| success_count = 0 | |
| fail_count = 0 | |
| for line in open('../../output/summary.jsonl'): | |
| d = json.loads(line.strip()) | |
| doc_path = d.get('document_path', 'unknown') | |
| # Extract just filename | |
| doc_name = doc_path.split('/')[-1] if '/' in doc_path else doc_path | |
| if d.get('success', False): | |
| print(f' ✅ {doc_name}') | |
| success_count += 1 | |
| else: | |
| error_msg = d.get('error_message', 'Unknown error') | |
| # Truncate long error messages | |
| if error_msg and len(error_msg) > 80: | |
| error_msg = error_msg[:77] + '...' | |
| print(f' ❌ {doc_name}: {error_msg}') | |
| fail_count += 1 | |
| print(f'\nTotal: {success_count} succeeded, {fail_count} failed') | |
| " | |
| fi | |
| # Show sample markdown if any exist | |
| if [ "$MD_COUNT" -gt 0 ]; then | |
| echo "" | |
| echo "=== Sample Markdown Output ===" | |
| FIRST_MD=$(find ../../output/ -maxdepth 1 -name "*.md" | head -1) | |
| echo "File: $(basename $FIRST_MD)" | |
| echo "Size: $(wc -c < "$FIRST_MD" | tr -d ' ') bytes" | |
| echo "Preview (first 500 chars):" | |
| echo "---" | |
| head -c 500 "$FIRST_MD" | |
| echo "" | |
| echo "---" | |
| fi | |
| # Show error details if any failures | |
| if [ "$ERROR_COUNT" -gt 0 ]; then | |
| echo "" | |
| echo "=== Error Details ===" | |
| for err_file in ../../output/*_error.json; do | |
| if [ -f "$err_file" ]; then | |
| echo "File: $(basename $err_file)" | |
| python3 -c " | |
| import json | |
| with open('$err_file') as f: | |
| d = json.load(f) | |
| print(f' Source: {d.get(\"source_file\", \"unknown\")}') | |
| err = d.get('error_message', 'No error message') | |
| if len(err) > 200: | |
| err = err[:197] + '...' | |
| print(f' Error: {err}') | |
| " | |
| fi | |
| done | |
| fi | |
| # Determine success/failure | |
| echo "" | |
| if [ -s ../../output/summary.jsonl ]; then | |
| echo "✅ Download complete! Found $MD_COUNT markdown, $JSON_COUNT JSON, $ERROR_COUNT error files." | |
| else | |
| echo "❌ ERROR: summary.jsonl is empty or missing" | |
| exit 1 | |
| fi | |
| # ====================================================================== | |
| # Step 10: Collect logs on failure | |
| # ====================================================================== | |
| - name: Collect logs on failure | |
| if: failure() | |
| run: | | |
| echo "=== Pod Status ===" | |
| kubectl get pods -A | |
| echo "" | |
| echo "=== Spark Operator Logs ===" | |
| kubectl logs -n spark-operator -l app.kubernetes.io/name=spark-operator --tail=100 || true | |
| echo "" | |
| echo "=== Spark Operator Namespace Pods ===" | |
| kubectl get pods -n spark-operator -o wide || true | |
| echo "" | |
| echo "=== Driver Pod Details ===" | |
| kubectl describe pod -n spark-operator -l spark-role=driver || true | |
| echo "" | |
| echo "=== Driver Pod Logs ===" | |
| kubectl logs -n spark-operator -l spark-role=driver --tail=100 || true | |
| echo "" | |
| echo "=== SparkApplication Status ===" | |
| kubectl get sparkapplication -A -o yaml || true | |
| echo "" | |
| echo "=== Events in spark-operator namespace ===" | |
| kubectl get events -n spark-operator --sort-by='.lastTimestamp' || true | |
| echo "" | |
| echo "=== All Recent Events ===" | |
| kubectl get events -A --sort-by='.lastTimestamp' | tail -50 | |
| # ====================================================================== | |
| # Step 11: Cleanup Kind cluster | |
| # ====================================================================== | |
| - name: Cleanup Kind cluster | |
| if: always() | |
| run: make -C examples/openshift kind-cleanup || true | |
| # ============================================================================ | |
| # Job 3: Stop EC2 Runner | |
| # ============================================================================ | |
| stop-ec2-runner: | |
| permissions: | |
| id-token: write # Required for OIDC (AWS auth) | |
| contents: read | |
| needs: | |
| - launch-ec2-runner | |
| - openshift-e2e | |
| runs-on: ubuntu-latest | |
| if: ${{ always() && needs.launch-ec2-runner.outputs.ec2-instance-id != '' }} | |
| steps: | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 | |
| with: | |
| role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}" | |
| aws-region: us-east-2 | |
| role-session-name: odh-spark-operator # For tracking in CloudTrail | |
| - name: Stop EC2 runner | |
| uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3 | |
| with: | |
| mode: stop | |
| github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}" | |
| label: ${{ needs.launch-ec2-runner.outputs.label }} | |
| ec2-instance-id: ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }} |