new feat: add slot tagger #203
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync | |
| on: | |
| schedule: | |
| - cron: "0 1 * * *" # 1am UTC daily | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| force: | |
| description: "Force fetch and re-render all notebooks" | |
| required: false | |
| default: false | |
| type: boolean | |
| permissions: | |
| contents: read | |
| pull-requests: write # For PR comments | |
| concurrency: | |
| group: "sync-${{ github.head_ref || github.ref_name }}" | |
| cancel-in-progress: true | |
| env: | |
| CACHE_VERSION: v2 # Increment to invalidate all caches | |
| FORCE_RENDER: ${{ github.event.inputs.force || 'false' }} | |
| jobs: | |
| sync: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| has_changes: ${{ steps.check.outputs.has_changes }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| # ============================================ | |
| # Setup Tools | |
| # ============================================ | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v4 | |
| with: | |
| enable-cache: true | |
| - name: Set up Python | |
| run: uv python install 3.13 | |
| - name: Install Python dependencies | |
| run: uv sync | |
| - name: Install just | |
| uses: extractions/setup-just@v2 | |
| - name: Setup pnpm | |
| uses: pnpm/action-setup@v4 | |
| with: | |
| version: 10 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: "22" | |
| cache: "pnpm" | |
| cache-dependency-path: site/pnpm-lock.yaml | |
| - name: Install Node dependencies | |
| run: cd site && pnpm install | |
| # ============================================ | |
| # Restore Caches | |
| # ============================================ | |
| - name: Get cache keys | |
| id: cache-keys | |
| run: | | |
| # Hash of query source files for data cache | |
| QUERY_HASH=$(find queries -name "*.py" -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-16) | |
| echo "query_hash=$QUERY_HASH" >> $GITHUB_OUTPUT | |
| # Hash of notebook source files for render cache | |
| NOTEBOOK_HASH=$(find notebooks -name "*.ipynb" -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-16) | |
| echo "notebook_hash=$NOTEBOOK_HASH" >> $GITHUB_OUTPUT | |
| # Today's date for cache partitioning | |
| echo "date=$(date -u +%Y-%m-%d)" >> $GITHUB_OUTPUT | |
| - name: Restore data cache | |
| uses: actions/cache/restore@v4 | |
| id: data-cache | |
| with: | |
| path: notebooks/data | |
| key: ${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}-${{ steps.cache-keys.outputs.date }} | |
| restore-keys: | | |
| ${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}- | |
| ${{ env.CACHE_VERSION }}-data- | |
| - name: Restore rendered cache | |
| uses: actions/cache/restore@v4 | |
| id: rendered-cache | |
| with: | |
| path: site/rendered | |
| key: ${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}-${{ steps.cache-keys.outputs.date }} | |
| restore-keys: | | |
| ${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}- | |
| ${{ env.CACHE_VERSION }}-rendered- | |
| - name: Initialize directories | |
| run: | | |
| mkdir -p notebooks/data site/rendered | |
| # Create empty manifest if none exists | |
| if [ ! -f "notebooks/data/manifest.json" ]; then | |
| echo '{"schema_version": "2.0", "dates": [], "latest": null, "query_hashes": {}, "date_queries": {}}' > notebooks/data/manifest.json | |
| fi | |
| if [ ! -f "site/rendered/manifest.json" ]; then | |
| echo '{"latest_date": null, "dates": {}}' > site/rendered/manifest.json | |
| fi | |
| # ============================================ | |
| # Fetch Data | |
| # ============================================ | |
| - name: Fetch data | |
| env: | |
| CLICKHOUSE_HOST: ${{ secrets.CLICKHOUSE_HOST }} | |
| CLICKHOUSE_PORT: ${{ secrets.CLICKHOUSE_PORT }} | |
| CLICKHOUSE_USER: ${{ secrets.CLICKHOUSE_USER }} | |
| CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_PASSWORD }} | |
| CONTRIBUTOOR_CLICKHOUSE_HOST: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_HOST }} | |
| CONTRIBUTOOR_CLICKHOUSE_PORT: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_PORT }} | |
| CONTRIBUTOOR_CLICKHOUSE_USER: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_USER }} | |
| CONTRIBUTOOR_CLICKHOUSE_PASSWORD: ${{ secrets.CONTRIBUTOOR_CLICKHOUSE_PASSWORD }} | |
| run: just fetch all ${{ env.FORCE_RENDER }} | |
| # ============================================ | |
| # Render Notebooks | |
| # ============================================ | |
| - name: Render notebooks | |
| run: just render all ${{ env.FORCE_RENDER }} | |
| # ============================================ | |
| # Build Site | |
| # ============================================ | |
| - name: Build site | |
| env: | |
| # Production: observatory.ethp2p.dev, PRs: observatory-staging.ethp2p.dev/pr-{number}/ | |
| ASTRO_SITE: ${{ github.event_name == 'pull_request' && secrets.R2_STAGING_DOMAIN || secrets.R2_PROD_DOMAIN }} | |
| ASTRO_BASE: ${{ github.event_name == 'pull_request' && format('/pr-{0}/', github.event.pull_request.number) || '/' }} | |
| run: just build | |
| - name: Copy data to dist | |
| run: just copy-data | |
| # ============================================ | |
| # Save Caches | |
| # ============================================ | |
| - name: Save data cache | |
| if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: notebooks/data | |
| key: ${{ env.CACHE_VERSION }}-data-${{ steps.cache-keys.outputs.query_hash }}-${{ steps.cache-keys.outputs.date }} | |
| - name: Save rendered cache | |
| if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' | |
| uses: actions/cache/save@v4 | |
| with: | |
| path: site/rendered | |
| key: ${{ env.CACHE_VERSION }}-rendered-${{ steps.cache-keys.outputs.notebook_hash }}-${{ steps.cache-keys.outputs.date }} | |
| # ============================================ | |
| # Upload Artifacts (for traceability) | |
| # ============================================ | |
| - name: Upload data artifact | |
| if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: data-${{ steps.cache-keys.outputs.date }} | |
| path: notebooks/data | |
| retention-days: 2 | |
| - name: Upload rendered artifact | |
| if: github.ref == 'refs/heads/main' || github.event_name == 'schedule' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: rendered-${{ steps.cache-keys.outputs.date }} | |
| path: site/rendered | |
| retention-days: 2 | |
| # ============================================ | |
| # Deploy to R2 (Content-Addressed Storage) | |
| # ============================================ | |
| - name: Determine manifest name | |
| id: manifest | |
| run: | | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| echo "name=pr-${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT | |
| echo "url=${{ secrets.R2_STAGING_DOMAIN }}/pr-${{ github.event.pull_request.number }}/" >> $GITHUB_OUTPUT | |
| else | |
| echo "name=main" >> $GITHUB_OUTPUT | |
| echo "url=${{ secrets.R2_PROD_DOMAIN }}" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Upload to R2 (CAS) | |
| env: | |
| R2_BUCKET_NAME: ${{ secrets.R2_BUCKET_NAME }} | |
| R2_ENDPOINT: ${{ secrets.R2_ENDPOINT }} | |
| R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} | |
| R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} | |
| run: | | |
| uv run python scripts/r2.py upload \ | |
| --dist site/dist \ | |
| --manifest "${{ steps.manifest.outputs.name }}" | |
| # ============================================ | |
| # Deploy Worker (if changed) | |
| # ============================================ | |
| - name: Check if worker changed | |
| id: worker-check | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| worker: | |
| - 'worker/**' | |
| - name: Deploy worker | |
| if: (steps.worker-check.outputs.worker == 'true' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/main' | |
| uses: cloudflare/wrangler-action@v3 | |
| with: | |
| apiToken: ${{ secrets.WORKERS_API_TOKEN }} | |
| workingDirectory: worker | |
| - name: Comment preview URL on PR | |
| if: github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const prNumber = context.issue.number; | |
| const previewUrl = '${{ steps.manifest.outputs.url }}'; | |
| // Find existing preview comment | |
| const comments = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: prNumber | |
| }); | |
| const marker = '<!-- r2-preview -->'; | |
| const botComment = comments.data.find(c => | |
| c.user.type === 'Bot' && c.body.includes(marker) | |
| ); | |
| const body = `${marker} | |
| ## Preview Deployment | |
| :rocket: Preview is ready! | |
| **URL:** ${previewUrl} | |
| _Updated: ${new Date().toISOString()}_`; | |
| if (botComment) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: botComment.id, | |
| body | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: prNumber, | |
| body | |
| }); | |
| } |