maintain-chemrxiv #34
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: maintain-chemrxiv | |
| on: | |
| schedule: | |
| - cron: "30 1 * * *" # 北京时间 09:30 | |
| workflow_dispatch: | |
| inputs: | |
| fetch_days: | |
| description: "回溯抓取天数(默认 400;镜像源)" | |
| required: false | |
| default: "400" | |
| force_full_window: | |
| description: "是否忽略 seen 状态并全量回补当前窗口(true/false)" | |
| required: false | |
| default: "false" | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: daily-paper-reader-maintain-chemrxiv | |
| cancel-in-progress: false | |
| jobs: | |
| maintain_chemrxiv: | |
| if: github.repository == 'ziwenhahaha/daily-paper-reader' || github.repository == '5-xj/daily-paper-reader' | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 240 | |
| env: | |
| SUPABASE_URL: https://lyucdwgefyfbmaiopjbk.supabase.co | |
| SUPABASE_SCHEMA: public | |
| SUPABASE_SERVICE_KEY: ${{ secrets.SUPABASE_SERVICE_KEY }} | |
| SUPABASE_BACKEND_KEY: chemrxiv | |
| SUPABASE_PAPERS_TABLE: chemrxiv_papers | |
| SUPABASE_RETENTION_DAYS: "45" | |
| DPR_ENABLE_CHEMRXIV_BACKEND: "1" | |
| DPR_CHEMRXIV_ENABLED: "1" | |
| DPR_CHEMRXIV_PAPERS_TABLE: chemrxiv_papers | |
| DPR_CHEMRXIV_VECTOR_RPC_EXACT: match_chemrxiv_papers_exact | |
| DPR_CHEMRXIV_BM25_RPC: match_chemrxiv_papers_bm25 | |
| PYTHONUNBUFFERED: "1" | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Setup Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| - name: Cache pip + torch | |
| uses: actions/cache@v5 | |
| with: | |
| path: | | |
| ~/.cache/pip | |
| ~/.cache/torch | |
| key: ${{ runner.os }}-dpr-chemrxiv-hf-v1-${{ hashFiles('requirements.txt') }} | |
| - name: Install deps (skip sqlite3) | |
| run: | | |
| python - <<'PY' | |
| import re | |
| lines = open("requirements.txt", "r", encoding="utf-8").read().splitlines() | |
| lines = [l for l in lines if l.strip() and not re.match(r"^sqlite3\\b", l)] | |
| open("/tmp/req.txt", "w", encoding="utf-8").write("\n".join(lines)) | |
| PY | |
| python -m pip install --upgrade pip | |
| python -m pip install uv | |
| uv pip install --system -r /tmp/req.txt | |
| - name: Run ChemRxiv Maintain Pipeline | |
| run: | | |
| set -euo pipefail | |
| set -x | |
| if [ -z "${SUPABASE_SERVICE_KEY}" ]; then | |
| echo "[WARN] 未配置 SUPABASE_SERVICE_KEY,已跳过 ChemRxiv 维护同步。" | |
| exit 0 | |
| fi | |
| FETCH_DAYS="${{ github.event.inputs.fetch_days }}" | |
| FORCE_FULL_WINDOW="${{ github.event.inputs.force_full_window }}" | |
| if [ -z "$FETCH_DAYS" ]; then | |
| FETCH_DAYS="400" | |
| fi | |
| ARGS=(--fetch-days "$FETCH_DAYS") | |
| if [ "${FORCE_FULL_WINDOW:-false}" = "true" ]; then | |
| ARGS+=(--force-full-window) | |
| fi | |
| python src/maintain/chemrxiv.py "${ARGS[@]}" |