Skip to content

update readme

update readme #16

name: Data pipeline tests & coverage
on:
pull_request:
paths:
- "src/data_pipelines/**"
- "src/data_extraction/**"
- ".github/workflows/data_pipeline_coverage.yaml"
push:
branches:
- main
- a6
paths:
- "src/data_pipelines/**"
- "src/data_extraction/**"
- ".github/workflows/data_pipeline_coverage.yaml"
jobs:
test-data-pipelines:
runs-on: ubuntu-latest
permissions:
contents: write # needed to commit badge
pull-requests: write # needed to comment on PRs
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# install project deps if you keep a requirements file at repo root
if [ -f requirements.txt ]; then
pip install -r requirements.txt
fi
# make sure pytest + pytest-cov and libs used in tests are installed
pip install pytest pytest-cov boto3 requests pandas kagglehub psycopg2-binary openpyxl xlsxwriter
# Run BOTH data_pipelines and data_extraction tests with coverage in one go
- name: Run tests with coverage
run: |
pytest \
src/data_pipelines \
src/data_extraction \
--cov=src/data_pipelines \
--cov=src/data_extraction \
--cov-report=xml:coverage-data-pipelines.xml \
--cov-report=term \
| tee data-pipelines-pytest-output.txt
# Extract total line coverage % from coverage-data-pipelines.xml
- name: Extract Coverage Percentage
id: covpct
run: |
python - << 'EOF'
import xml.etree.ElementTree as ET
import os
tree = ET.parse('coverage-data-pipelines.xml')
root = tree.getroot()
pct = float(root.get('line-rate', 0.0)) * 100.0
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write(f"pct={pct:.2f}\n")
EOF
- name: Capture pytest output
id: covtext
run: |
echo "text<<EOF" >> $GITHUB_OUTPUT
cat data-pipelines-pytest-output.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Comment on PR
if: github.event_name == 'pull_request'
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ github.event.pull_request.number }}
body: |
**Data Pipeline Tests & Coverage**
| Metric | Value |
|--------|-------|
| Total Line Coverage | **${{ steps.covpct.outputs.pct }}%** |
<details>
<summary>Pytest Output</summary>
```
${{ steps.covtext.outputs.text }}
```
</details>
- name: Generate Badge
if: github.ref == 'refs/heads/main'
run: |
pct="${{ steps.covpct.outputs.pct }}"
color="red"
if (( $(echo "$pct >= 80" | bc -l) )); then color="orange"; fi
if (( $(echo "$pct >= 90" | bc -l) )); then color="green"; fi
mkdir -p .github/badges
cat > .github/badges/data-pipelines-coverage.svg <<EOF
<svg xmlns="http://www.w3.org/2000/svg" width="220" height="20">
<rect width="160" height="20" fill="#555"/>
<rect x="160" width="60" height="20" fill="$color"/>
<text x="80" y="14" fill="#fff" font-family="DejaVu Sans" font-size="11" text-anchor="middle">Data Pipelines Coverage</text>
<text x="190" y="14" fill="#fff" font-family="DejaVu Sans" font-size="11" text-anchor="middle">${pct}%</text>
</svg>
EOF
- name: Commit badge
if: github.ref == 'refs/heads/main'
run: |
git config user.name "github-actions"
git config user.email "github-actions@github.com"
git add .github/badges/data-pipelines-coverage.svg
git commit -m "Update data pipelines coverage badge" || exit 0
git push