Skip to content

Initial commit: spark-history-cli #1

Initial commit: spark-history-cli

Initial commit: spark-history-cli #1

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
unit-tests:
name: Unit Tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -e .
pip install pytest
- name: Run unit tests
run: |
python -m pytest spark_history_cli/tests/test_core.py -v --tb=short
e2e-tests:
name: E2E Tests (with Spark History Server)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Set up Java 17
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: 17
- name: Download and set up Apache Spark
run: |
SPARK_VERSION="4.0.0"
SPARK_HADOOP="spark-${SPARK_VERSION}-bin-hadoop3"
SPARK_URL="https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/${SPARK_HADOOP}.tgz"
echo "Downloading Spark ${SPARK_VERSION}..."
curl -sL "${SPARK_URL}" | tar xz -C /opt
echo "SPARK_HOME=/opt/${SPARK_HADOOP}" >> $GITHUB_ENV
echo "/opt/${SPARK_HADOOP}/bin" >> $GITHUB_PATH
echo "/opt/${SPARK_HADOOP}/sbin" >> $GITHUB_PATH
- name: Generate sample event logs
run: |
mkdir -p /tmp/spark-events
python ci/generate_sample_logs.py
env:
SPARK_EVENT_LOG_DIR: /tmp/spark-events
- name: Start Spark History Server
run: |
export SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=/tmp/spark-events"
$SPARK_HOME/sbin/start-history-server.sh
# Wait for SHS to be ready (up to 30s)
echo "Waiting for History Server to start..."
for i in $(seq 1 30); do
if curl -sf http://localhost:18080/api/v1/version > /dev/null 2>&1; then
echo "History Server is ready!"
curl -s http://localhost:18080/api/v1/version
break
fi
sleep 1
done
# Verify apps are loaded (may take a few seconds for SHS to scan logs)
echo "Waiting for apps to be indexed..."
for i in $(seq 1 30); do
APPS=$(curl -sf http://localhost:18080/api/v1/applications 2>/dev/null | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0")
if [ "$APPS" -gt "0" ]; then
echo "Found $APPS applications"
break
fi
sleep 1
done
- name: Install spark-history-cli
run: |
pip install -e .
pip install pytest
- name: Run E2E tests
run: |
python -m pytest spark_history_cli/tests/ -v -s --tb=short
env:
SPARK_HISTORY_SERVER: http://localhost:18080
- name: Stop History Server
if: always()
run: |
$SPARK_HOME/sbin/stop-history-server.sh || true