OpenShift Docling E2E Test #103

Workflow file for this run

.github/workflows/openshift-docling-e2e.yaml at df6b1ee

	# ============================================================================
	# OpenShift Docling E2E Test Workflow (EC2 Runner)
	# ============================================================================
	#
	# This workflow tests the Spark operator using Kustomize manifests and
	# shell-based E2E tests.
	#
	# Tests performed:
	# 1. Installs Spark operator via Kustomize (config/default/)
	# 2. Verifies fsGroup is NOT 185 (OpenShift security requirement)
	# 3. Runs the docling-spark-app workload from examples/openshift/k8s/
	#
	# Infrastructure:
	# - Uses self-hosted EC2 runners for larger disk space and resources
	# - EC2 instances are started on-demand and terminated after tests
	#
	# Triggers:
	# - Manual (workflow_dispatch)
	# - Push to main branch when OpenShift-related files change
	#
	# ============================================================================

	name: OpenShift Docling E2E Test

	on:
	# Manual trigger - allows running from GitHub Actions UI or CLI
	workflow_dispatch:
	inputs:
	k8s_version:
	description: "Kubernetes version to test"
	required: false
	default: "v1.30.8"

	# Automatic trigger on push to release branches
	push:
	branches:
	- main
	paths:
	- 'examples/openshift/**'
	- 'config/**'
	- '.github/workflows/openshift-docling-e2e.yaml'

	env:
	INSTANCE_TYPE: "m5.xlarge"

	# We don't need anything other than Bash for our shell
	defaults:
	run:
	shell: bash

	# Cancel in-progress runs for the same branch
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	# ============================================================================
	# Job 1: Launch EC2 Runner
	# ============================================================================
	launch-ec2-runner:
	runs-on: ubuntu-latest
	permissions:
	id-token: write # Required for OIDC (AWS auth)
	contents: read
	outputs:
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}

	steps:
	- name: Checkout repository
	uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6

	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
	with:
	role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}"
	aws-region: us-east-2
	role-session-name: odh-spark-operator # For tracking in CloudTrail

	- name: Start EC2 runner
	id: start-ec2-runner
	uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3
	with:
	mode: start
	github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}"
	ec2-instance-type: "${{ env.INSTANCE_TYPE }}"
	availability-zones-config: >
	[
	{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2A_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"},
	{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2B_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"},
	{"imageId": "ami-03ae00a3c1d00fd0f", "subnetId": "${{ vars.US_EAST_2C_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}
	]
	block-device-mappings: >
	[
	{"DeviceName": "/dev/sda1", "Ebs": {"VolumeSize": 150}}
	]
	aws-resource-tags: >
	[
	{"Key": "Name", "Value": "spark-operator-gh-runner"},
	{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
	{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
	{"Key": "GitHubPR", "Value": "${{ github.event.number \|\| 'N/A' }}"}
	]

	# ============================================================================
	# Job 2: OpenShift E2E Tests (runs on EC2)
	# ============================================================================
	openshift-e2e:
	name: OpenShift E2E Tests
	needs:
	- launch-ec2-runner
	runs-on: ${{ needs.launch-ec2-runner.outputs.label }}
	env:
	K8S_VERSION: ${{ inputs.k8s_version \|\| 'v1.30.8' }}
	# Go environment
	GOPATH: ${{ github.workspace }}/go
	GOMODCACHE: ${{ github.workspace }}/go/pkg/mod
	GOCACHE: ${{ github.workspace }}/go/cache
	# Home and Kubernetes config (EC2 runner may not have HOME set)
	HOME: ${{ github.workspace }}/home
	KUBECONFIG: ${{ github.workspace }}/home/.kube/config

	steps:
	- name: Setup Environment
	run: \|
	echo "Running on EC2 ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}"
	echo "Instance type: ${{ env.INSTANCE_TYPE }}"
	cat /etc/os-release
	df -h

	# Create home and kubeconfig directories
	mkdir -p $HOME/.kube
	echo "HOME=$HOME"
	echo "KUBECONFIG=$KUBECONFIG"

	# ======================================================================
	# Step 1: Checkout the repository
	# ======================================================================
	- name: Checkout source code
	uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
	with:
	fetch-depth: 0

	# ======================================================================
	# Step 2: Set up Go
	# ======================================================================
	- name: Set up Go
	uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6
	with:
	go-version-file: go.mod

	- name: Setup Go environment
	run: \|
	mkdir -p $GOPATH/bin $GOMODCACHE $GOCACHE
	echo "$GOPATH/bin" >> $GITHUB_PATH
	go version
	go env

	# ======================================================================
	# Step 3: Install dependencies (docker, make, kubectl, helm, kind)
	# The EC2 runner AMI doesn't have these pre-installed
	# ======================================================================
	- name: Install dependencies
	run: \|
	set -eou pipefail
	echo "Installing Docker CE..."
	# Install required packages
	sudo yum install -y yum-utils

	# Add Docker's official repository
	sudo yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo

	# Install Docker CE
	sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin

	# Start and enable Docker
	sudo systemctl start docker
	sudo systemctl enable docker

	# Allow docker commands without sudo for this session
	sudo chmod 666 /var/run/docker.sock
	docker --version

	echo "Installing build tools..."
	sudo yum install -y make git

	echo "Installing kubectl..."
	curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
	sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
	kubectl version --client

	echo "Installing Helm..."
	curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \| bash
	helm version

	echo "Installing Kind..."
	curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.31.0/kind-linux-amd64
	sudo install -o root -g root -m 0755 kind /usr/local/bin/kind
	kind version

	echo "All dependencies installed successfully!"

	# ======================================================================
	# Step 4: Free up disk space
	# The docling-spark image is very large (~9.5GB), need space
	# ======================================================================
	- name: Free up disk space
	run: \|
	echo "=== Disk space BEFORE cleanup ==="
	df -h

	echo ""
	echo "Removing unused Docker resources..."
	docker system prune -af --volumes \|\| true

	echo ""
	echo "=== Disk space AFTER cleanup ==="
	df -h

	# ======================================================================
	# Step 5: Create a Kind cluster
	# This creates a local Kubernetes cluster using Docker containers
	# ======================================================================
	- name: Create Kind cluster
	env:
	K8S_VERSION: ${{ inputs.k8s_version \|\| 'v1.30.8' }}
	run: \|
	echo "Creating Kind cluster with Kubernetes ${K8S_VERSION}..."
	kind create cluster \
	--name spark-operator \
	--config charts/spark-operator-chart/ci/kind-config.yaml \
	--image kindest/node:${K8S_VERSION} \
	--kubeconfig $KUBECONFIG \
	--wait=1m

	echo "Kubeconfig written to: $KUBECONFIG"
	kubectl cluster-info

	# ======================================================================
	# Step 6: Verify cluster is ready
	# ======================================================================
	- name: Verify cluster
	run: \|
	echo "Verifying Kind cluster is ready..."
	kubectl cluster-info
	kubectl get nodes
	echo "Cluster is ready!"

	# ======================================================================
	# Step 7: Pre-pull and load docling-spark image into Kind
	# The image is ~9.5GB, pre-loading avoids timeout during test
	# ======================================================================
	- name: Pre-pull and load docling-spark image
	env:
	DOCLING_IMAGE: quay.io/rishasin/docling-spark@sha256:7e8431fc89dbc6c10aec1f0401aadd9c9cd66b9728fbcb98f6bf40ba3e3b4cdb
	run: \|
	echo "Pulling docling-spark image (~9.5GB, this may take a while)..."
	docker pull $DOCLING_IMAGE

	echo ""
	echo "Verifying PDFs exist in image..."
	docker run --rm $DOCLING_IMAGE ls -la /app/assets/

	echo ""
	echo "Loading image into Kind cluster..."
	kind load docker-image $DOCLING_IMAGE --name spark-operator

	echo ""
	echo "✅ Image loaded into Kind cluster successfully!"

	# ======================================================================
	# Step 8: Setup PVCs and upload test PDFs
	# Namespace and RBAC are handled by test-docling-spark.sh
	# ======================================================================
	- name: Setup PVCs and upload test PDFs
	run: \|
	echo "Setting up PVCs and uploading test data..."

	echo "Creating namespace..."
	kubectl create namespace spark-operator --dry-run=client -o yaml \| kubectl apply -f -

	echo "Creating PVCs (using Kind's default StorageClass)..."
	sed '/storageClassName:/d' examples/openshift/k8s/docling-input-pvc.yaml \| kubectl apply -f -
	sed '/storageClassName:/d' examples/openshift/k8s/docling-output-pvc.yaml \| kubectl apply -f -

	echo "PVC status:"
	kubectl get pvc -n spark-operator

	echo "Uploading test PDFs from examples/openshift/tests/assets/..."
	chmod +x examples/openshift/k8s/deploy.sh
	cd examples/openshift && ./k8s/deploy.sh upload ./tests/assets/

	echo "PVCs created and test PDFs uploaded successfully!"

	# ======================================================================
	# Step 9: Install Spark operator via Kustomize
	# ======================================================================
	- name: Install Spark operator
	run: \|
	echo "Installing Spark operator..."
	chmod +x examples/openshift/tests/test-operator-install.sh
	CLEANUP=false examples/openshift/tests/test-operator-install.sh

	# ======================================================================
	# Step 10: Run Docling Spark E2E test (shell-based)
	# ======================================================================
	- name: Run Docling Spark E2E test
	env:
	CLEANUP: "false"
	run: \|
	echo "Running Docling Spark E2E test..."
	chmod +x examples/openshift/tests/test-docling-spark.sh
	CLEANUP=false examples/openshift/tests/test-docling-spark.sh

	# ======================================================================
	# Step 11: Download and show results
	# Multi-output format produces per-PDF files:
	# - {name}.md (markdown content)
	# - {name}.json (docling JSON format)
	# - {name}_metadata.json (processing metadata)
	# - {name}_error.json (for failed PDFs)
	# - summary.jsonl (combined summary)
	# ======================================================================
	- name: Download and show results
	if: success()
	run: \|
	echo "Downloading results from output PVC..."
	mkdir -p ./output

	cd examples/openshift
	chmod +x k8s/deploy.sh
	./k8s/deploy.sh download ../../output/

	echo ""
	echo "=== Output Directory Contents ==="
	ls -la ../../output/

	# Count different file types
	MD_COUNT=$(find ../../output/ -maxdepth 1 -name "*.md" 2>/dev/null \| wc -l \| tr -d ' ')
	JSON_COUNT=$(find ../../output/ -maxdepth 1 -name ".json" ! -name "_error.json" ! -name "*_metadata.json" 2>/dev/null \| wc -l \| tr -d ' ')
	METADATA_COUNT=$(find ../../output/ -maxdepth 1 -name "*_metadata.json" 2>/dev/null \| wc -l \| tr -d ' ')
	ERROR_COUNT=$(find ../../output/ -maxdepth 1 -name "*_error.json" 2>/dev/null \| wc -l \| tr -d ' ')

	echo ""
	echo "=== File Summary ==="
	echo "Markdown files (.md): $MD_COUNT"
	echo "JSON files (.json): $JSON_COUNT"
	echo "Metadata files: $METADATA_COUNT"
	echo "Error files (*_error.json): $ERROR_COUNT"

	# Check summary.jsonl
	if [ -s ../../output/summary.jsonl ]; then
	LINE_COUNT=$(wc -l < ../../output/summary.jsonl \| tr -d ' ')
	echo ""
	echo "=== Processing Summary (summary.jsonl: $LINE_COUNT documents) ==="
	python3 -c "
	import sys, json
	success_count = 0
	fail_count = 0
	for line in open('../../output/summary.jsonl'):
	d = json.loads(line.strip())
	doc_path = d.get('document_path', 'unknown')
	# Extract just filename
	doc_name = doc_path.split('/')[-1] if '/' in doc_path else doc_path
	if d.get('success', False):
	print(f' ✅ {doc_name}')
	success_count += 1
	else:
	error_msg = d.get('error_message', 'Unknown error')
	# Truncate long error messages
	if error_msg and len(error_msg) > 80:
	error_msg = error_msg[:77] + '...'
	print(f' ❌ {doc_name}: {error_msg}')
	fail_count += 1
	print(f'\nTotal: {success_count} succeeded, {fail_count} failed')
	"
	fi

	# Show sample markdown if any exist
	if [ "$MD_COUNT" -gt 0 ]; then
	echo ""
	echo "=== Sample Markdown Output ==="
	FIRST_MD=$(find ../../output/ -maxdepth 1 -name "*.md" \| head -1)
	echo "File: $(basename $FIRST_MD)"
	echo "Size: $(wc -c < "$FIRST_MD" \| tr -d ' ') bytes"
	echo "Preview (first 500 chars):"
	echo "---"
	head -c 500 "$FIRST_MD"
	echo ""
	echo "---"
	fi

	# Show error details if any failures
	if [ "$ERROR_COUNT" -gt 0 ]; then
	echo ""
	echo "=== Error Details ==="
	for err_file in ../../output/*_error.json; do
	if [ -f "$err_file" ]; then
	echo "File: $(basename $err_file)"
	python3 -c "
	import json
	with open('$err_file') as f:
	d = json.load(f)
	print(f' Source: {d.get(\"source_file\", \"unknown\")}')
	err = d.get('error_message', 'No error message')
	if len(err) > 200:
	err = err[:197] + '...'
	print(f' Error: {err}')
	"
	fi
	done
	fi

	# Determine success/failure
	echo ""
	if [ -s ../../output/summary.jsonl ]; then
	echo "✅ Download complete! Found $MD_COUNT markdown, $JSON_COUNT JSON, $ERROR_COUNT error files."
	else
	echo "❌ ERROR: summary.jsonl is empty or missing"
	exit 1
	fi

	# ======================================================================
	# Step 12: Cleanup namespaces
	# Since we skipped namespace cleanup in tests (SKIP_NAMESPACE_CLEANUP=true),
	# we clean them up here after downloading results
	# ======================================================================
	- name: Cleanup namespaces
	if: always()
	run: \|
	echo "Cleaning up namespaces..."
	kubectl delete namespace spark-operator --ignore-not-found --wait=false \|\| true
	echo "✅ Namespace cleanup initiated"

	# ======================================================================
	# Step 13: Collect logs on failure
	# ======================================================================
	- name: Collect logs on failure
	if: failure()
	run: \|
	echo "=== Pod Status ==="
	kubectl get pods -A

	echo ""
	echo "=== Spark Operator Logs ==="
	kubectl logs -n spark-operator -l app.kubernetes.io/name=spark-operator --tail=100 \|\| true

	echo ""
	echo "=== Spark Operator Namespace Pods ==="
	kubectl get pods -n spark-operator -o wide \|\| true

	echo ""
	echo "=== Driver Pod Details ==="
	kubectl describe pod -n spark-operator -l spark-role=driver \|\| true

	echo ""
	echo "=== Driver Pod Logs ==="
	kubectl logs -n spark-operator -l spark-role=driver --tail=100 \|\| true

	echo ""
	echo "=== SparkApplication Status ==="
	kubectl get sparkapplication -A -o yaml \|\| true

	echo ""
	echo "=== Events in spark-operator namespace ==="
	kubectl get events -n spark-operator --sort-by='.lastTimestamp' \|\| true

	echo ""
	echo "=== All Recent Events ==="
	kubectl get events -A --sort-by='.lastTimestamp' \| tail -50

	# ======================================================================
	# Step 14: Cleanup Kind cluster
	# ======================================================================
	- name: Cleanup Kind cluster
	if: always()
	run: \|
	echo "Cleaning up Kind cluster..."
	kind delete cluster --name spark-operator \|\| true

	# ============================================================================
	# Job 3: Stop EC2 Runner
	# ============================================================================
	stop-ec2-runner:
	permissions:
	id-token: write # Required for OIDC (AWS auth)
	contents: read
	needs:
	- launch-ec2-runner
	- openshift-e2e
	runs-on: ubuntu-latest
	if: ${{ always() && needs.launch-ec2-runner.outputs.ec2-instance-id != '' }}

	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1
	with:
	role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.SPARK_OPERATOR_IAM_ROLE }}"
	aws-region: us-east-2
	role-session-name: odh-spark-operator # For tracking in CloudTrail

	- name: Stop EC2 runner
	uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3
	with:
	mode: stop
	github-token: "${{ secrets.SPARK_OPERATOR_GH_PERSONAL_ACCESS_TOKEN }}"
	label: ${{ needs.launch-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

OpenShift Docling E2E Test #103

Workflow file

OpenShift Docling E2E Test #103

Uh oh!

Workflow file for this run