Skip to content

new feat: add slot tagger #203

new feat: add slot tagger

new feat: add slot tagger #203

Workflow file for this run

name: Sync
on:
schedule:
- cron: "0 1 * * *" # 1am UTC daily
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
inputs:
force:
description: "Force fetch and re-render all notebooks"
required: false
default: false
type: boolean
permissions:
contents: read
pull-requests: write # For PR comments
concurrency:
group: "sync-${{ github.head_ref || github.ref_name }}"
cancel-in-progress: true
env:
CACHE_VERSION: v2 # Increment to invalidate all caches
FORCE_RENDER: ${{ github.event.inputs.force || 'false' }}
jobs:
sync:
runs-on: ubuntu-latest
outputs:
has_changes: ${{ steps.check.outputs.has_changes }}
steps:
- name: Checkout
uses: actions/checkout@v4
# ============================================
# Setup Tools
# ============================================
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.13
- name: Install Python dependencies
run: uv sync
- name: Install just
uses: extractions/setup-just@v2
- name: Setup pnpm
uses: pnpm/action-setup@v4
with:
version: 10
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
cache: "pnpm"
cache-dependency-path: site/pnpm-lock.yaml
- name: Install Node dependencies
run: cd site && pnpm install
# ============================================
# Restore Caches
# ============================================
- name: Get cache keys
id: cache-keys
run: |
# Hash of query source files for data cache
QUERY_HASH=$(find queries -name "*.py" -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-16)
echo "query_hash=$QUERY_HASH" >> $GITHUB_OUTPUT
# Hash of notebook source files for render cache
NOTEBOOK_HASH=$(find notebooks -name "*.ipynb" -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-16)
echo "notebook_hash=$NOTEBOOK_HASH" >> $GITHUB_OUTPUT
# Today's date for cache partitioning
echo "date=$(date -u +%Y-%m-%d)" >> $GITHUB_OUTPUT
- name: Restore data cache
uses: actions/cache/restore@v4
id: data-cache
with:
path: notebooks/data
key: ${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}-${{ steps.cache-keys.outputs.date }}
restore-keys: |
${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}-
${{ env.CACHE_VERSION }}-data-
- name: Restore rendered cache
uses: actions/cache/restore@v4
id: rendered-cache
with:
path: site/rendered
key: ${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}-${{ steps.cache-keys.outputs.date }}
restore-keys: |
${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}-
${{ env.CACHE_VERSION }}-rendered-
- name: Initialize directories
run: |
mkdir -p notebooks/data site/rendered
# Create empty manifest if none exists
if [ ! -f "notebooks/data/manifest.json" ]; then
echo '{"schema_version": "2.0", "dates": [], "latest": null, "query_hashes": {}, "date_queries": {}}' > notebooks/data/manifest.json
fi
if [ ! -f "site/rendered/manifest.json" ]; then
echo '{"latest_date": null, "dates": {}}' > site/rendered/manifest.json
fi
# ============================================
# Fetch Data
# ============================================
- name: Fetch data
env:
CLICKHOUSE_HOST: ${{ secrets.CLICKHOUSE_HOST }}
CLICKHOUSE_PORT: ${{ secrets.CLICKHOUSE_PORT }}
CLICKHOUSE_USER: ${{ secrets.CLICKHOUSE_USER }}
CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_PASSWORD }}
CONTRIBUTOOR_CLICKHOUSE_HOST: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_HOST }}
CONTRIBUTOOR_CLICKHOUSE_PORT: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_PORT }}
CONTRIBUTOOR_CLICKHOUSE_USER: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_USER }}
CONTRIBUTOOR_CLICKHOUSE_PASSWORD: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_PASSWORD }}
run: just fetch all ${{ env.FORCE_RENDER }}
# ============================================
# Render Notebooks
# ============================================
- name: Render notebooks
run: just render all ${{ env.FORCE_RENDER }}
# ============================================
# Build Site
# ============================================
- name: Build site
env:
# Production: observatory.ethp2p.dev, PRs: observatory-staging.ethp2p.dev/pr-{number}/
ASTRO_SITE: ${{ github.event_name == 'pull_request' && secrets.R2_STAGING_DOMAIN || secrets.R2_PROD_DOMAIN }}
ASTRO_BASE: ${{ github.event_name == 'pull_request' && format('/pr-{0}/', github.event.pull_request.number) || '/' }}
run: just build
- name: Copy data to dist
run: just copy-data
# ============================================
# Save Caches
# ============================================
- name: Save data cache
if: github.ref == 'refs/heads/main' || github.event_name == 'schedule'
uses: actions/cache/save@v4
with:
path: notebooks/data
key: ${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}-${{ steps.cache-keys.outputs.date }}
- name: Save rendered cache
if: github.ref == 'refs/heads/main' || github.event_name == 'schedule'
uses: actions/cache/save@v4
with:
path: site/rendered
key: ${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}-${{ steps.cache-keys.outputs.date }}
# ============================================
# Upload Artifacts (for traceability)
# ============================================
- name: Upload data artifact
if: github.ref == 'refs/heads/main' || github.event_name == 'schedule'
uses: actions/upload-artifact@v4
with:
name: data-${{ steps.cache-keys.outputs.date }}
path: notebooks/data
retention-days: 2
- name: Upload rendered artifact
if: github.ref == 'refs/heads/main' || github.event_name == 'schedule'
uses: actions/upload-artifact@v4
with:
name: rendered-${{ steps.cache-keys.outputs.date }}
path: site/rendered
retention-days: 2
# ============================================
# Deploy to R2 (Content-Addressed Storage)
# ============================================
- name: Determine manifest name
id: manifest
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "name=pr-${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT
echo "url=${{ secrets.R2_STAGING_DOMAIN }}/pr-${{ github.event.pull_request.number }}/" >> $GITHUB_OUTPUT
else
echo "name=main" >> $GITHUB_OUTPUT
echo "url=${{ secrets.R2_PROD_DOMAIN }}" >> $GITHUB_OUTPUT
fi
- name: Upload to R2 (CAS)
env:
R2_BUCKET_NAME: ${{ secrets.R2_BUCKET_NAME }}
R2_ENDPOINT: ${{ secrets.R2_ENDPOINT }}
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
run: |
uv run python scripts/r2.py upload \
--dist site/dist \
--manifest "${{ steps.manifest.outputs.name }}"
# ============================================
# Deploy Worker (if changed)
# ============================================
- name: Check if worker changed
id: worker-check
uses: dorny/paths-filter@v3
with:
filters: |
worker:
- 'worker/**'
- name: Deploy worker
if: (steps.worker-check.outputs.worker == 'true' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/main'
uses: cloudflare/wrangler-action@v3
with:
apiToken: ${{ secrets.WORKERS_API_TOKEN }}
workingDirectory: worker
- name: Comment preview URL on PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const prNumber = context.issue.number;
const previewUrl = '${{ steps.manifest.outputs.url }}';
// Find existing preview comment
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber
});
const marker = '<!-- r2-preview -->';
const botComment = comments.data.find(c =>
c.user.type === 'Bot' && c.body.includes(marker)
);
const body = `${marker}
## Preview Deployment
:rocket: Preview is ready!
**URL:** ${previewUrl}
_Updated: ${new Date().toISOString()}_`;
if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body
});
}