Skip to content

build & test

build & test #295

name: build & test
on:
push:
branches:
- master
- releases/**
paths-ignore:
- "docs/**"
- "**.md"
pull_request:
branches:
- "**"
workflow_dispatch:
schedule:
- cron: "0 0 * * *" # Run at midnight UTC every day
release:
types: [published]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
jobs:
setup:
runs-on: ubuntu-latest
outputs:
frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' || github.event_name != 'pull_request' }}
ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' }}
backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' || github.event_name != 'pull_request'}}
docker_change: ${{ steps.ci-optimize.outputs.docker-change == 'true' || github.event_name != 'pull_request' }}
frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v4
- uses: ./.github/actions/ci-optimization
id: ci-optimize
build:
strategy:
fail-fast: false
matrix:
command: [
# metadata-ingestion and airflow-plugin each have dedicated build jobs
"except_metadata_ingestion",
"frontend",
]
timezone: ["UTC"]
include:
# We only need the timezone variation for frontend tests.
- command: "frontend"
timezone: "America/New_York"
runs-on: ${{ vars.DEPOT_PROJECT_ID != '' && 'depot-ubuntu-latest-4' || 'ubuntu-latest' }}
timeout-minutes: 60
needs: setup
steps:
- name: Free up disk space
run: |
sudo apt-get remove 'dotnet-*' azure-cli || true
sudo rm -rf /usr/local/.ghcup || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /usr/share/swift || true
sudo rm -rf /usr/local/julia* || true
sudo rm -rf /usr/local/share/powershell || true
sudo rm -rf /usr/share/miniconda || true
sudo rm -rf /usr/local/lib/android/ || true
sudo docker system prune -a -f || true
df -h
- uses: szenius/set-timezone@v2.0
with:
timezoneLinux: ${{ matrix.timezone }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@v4
with:
checkout-head-only: false
- uses: actions/setup-python@v6
with:
python-version: "3.10"
- uses: actions/cache@v4
with:
path: |
~/.cache/uv
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Set up JDK 17
uses: actions/setup-java@v5
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v4
- name: Disk Space Analysis
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Gradle build (and test) for NOT metadata ingestion
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
# datahub-schematron:cli excluded due to dependency on metadata-ingestion
run: |
./gradlew build \
-x :metadata-ingestion:build \
-x :metadata-ingestion:check \
-x :docs-website:build \
-x :metadata-integration:java:spark-lineage:test \
-x :metadata-io:test \
-x :metadata-ingestion-modules:airflow-plugin:build \
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:prefect-plugin:build \
-x :metadata-ingestion-modules:prefect-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
-x :metadata-integration:java:datahub-schematron:cli:test
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
- name: Disk Space Analysis
run: |
echo "=== Disk Usage Overview ==="
df -h
echo -e "\n=== Docker Disk Usage ==="
docker system df -v
- name: Gradle build (and test) for frontend
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
run: |
./gradlew :datahub-frontend:build :datahub-web-react:build
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
- name: Gather coverage files
run: |
{
echo "BACKEND_FILES=$(find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahub-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | tr ' ' ',')"
echo "FRONTEND_FILES=$(find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | tr ' ' ',')"
} >> "$GITHUB_ENV"
- name: Generate tz artifact name
run: echo "NAME_TZ=$(echo "${{ matrix.timezone }}" | tr '/' '-')" >> "$GITHUB_ENV"
- uses: actions/upload-artifact@v4
if: always()
with:
name: Test Results (build) - ${{ matrix.command}}-${{ env.NAME_TZ }}
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
!**/binary/**
- name: Send failed test metrics to PostHog
if: failure()
continue-on-error: true
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
run: |
if [ -z "$POSTHOG_API_KEY" ]; then
echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
exit 0
fi
TEMP_DIR=$(mktemp -d)
mkdir -p "$TEMP_DIR/test-results"
find . -name "*.xml" -path "*/build/test-results/*" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true
python3 .github/scripts/send_failed_tests_to_posthog.py \
--input-dir "$TEMP_DIR/test-results" \
--posthog-api-key "$POSTHOG_API_KEY" \
--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
--repository "${{ github.repository }}" \
--workflow-name "${{ github.workflow }}" \
--branch "${{ github.head_ref || github.ref_name }}" \
--run-id "${{ github.run_id }}" \
--run-attempt "${{ github.run_attempt }}" \
--command "${{ matrix.command }}" \
--timezone "${{ matrix.timezone }}"
rm -rf "$TEMP_DIR"
- name: Ensure codegen is updated
uses: ./.github/actions/ensure-codegen-updated
- name: Upload backend coverage to Codecov
if: ${{ (matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' && github.event_name != 'release') }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.BACKEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: backend
name: ${{ matrix.command }}
verbose: true
- name: Upload backend coverage to Codecov on release
if: ${{ (matrix.command == 'except_metadata_ingestion' && github.event_name == 'release' ) }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.BACKEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: backend
name: ${{ matrix.command }}
verbose: true
override_branch: ${{ github.head_ref || github.ref_name }}
- name: Upload frontend coverage to Codecov
if: ${{ (matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' && github.event_name != 'release') }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.FRONTEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: frontend
name: ${{ matrix.command }}
verbose: true
- name: Upload frontend coverage to Codecov on Release
if: ${{ (matrix.command == 'frontend' && github.event_name == 'release') }}
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.FRONTEND_FILES }}
disable_search: true
#handle_no_reports_found: true
fail_ci_if_error: false
flags: frontend
name: ${{ matrix.command }}
verbose: true
override_branch: ${{ github.head_ref || github.ref_name }}
- name: Upload test results to Codecov
if: ${{ !cancelled() && github.event_name != 'release' }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Upload test results to Codecov on release
if: ${{ !cancelled() && github.event_name == 'release' }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
override_branch: ${{ github.head_ref || github.ref_name }}
event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}