Skip to content

maintain-chemrxiv

maintain-chemrxiv #43

name: maintain-chemrxiv
on:
schedule:
- cron: "30 1 * * *" # 北京时间 09:30
workflow_dispatch:
inputs:
fetch_days:
description: "回溯抓取天数(默认 400;镜像源)"
required: false
default: "400"
force_full_window:
description: "是否忽略 seen 状态并全量回补当前窗口(true/false)"
required: false
default: "false"
permissions:
contents: read
concurrency:
group: daily-paper-reader-maintain-chemrxiv
cancel-in-progress: false
jobs:
maintain_chemrxiv:
if: github.repository == 'ziwenhahaha/daily-paper-reader' || github.repository == '5-xj/daily-paper-reader'
runs-on: ubuntu-latest
timeout-minutes: 240
env:
SUPABASE_URL: https://lyucdwgefyfbmaiopjbk.supabase.co
SUPABASE_SCHEMA: public
SUPABASE_SERVICE_KEY: ${{ secrets.SUPABASE_SERVICE_KEY }}
SUPABASE_BACKEND_KEY: chemrxiv
SUPABASE_PAPERS_TABLE: chemrxiv_papers
SUPABASE_RETENTION_DAYS: "45"
DPR_ENABLE_CHEMRXIV_BACKEND: "1"
DPR_CHEMRXIV_ENABLED: "1"
DPR_CHEMRXIV_PAPERS_TABLE: chemrxiv_papers
DPR_CHEMRXIV_VECTOR_RPC_EXACT: match_chemrxiv_papers_exact
DPR_CHEMRXIV_BM25_RPC: match_chemrxiv_papers_bm25
PYTHONUNBUFFERED: "1"
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Cache pip + torch
uses: actions/cache@v5
with:
path: |
~/.cache/pip
~/.cache/torch
key: ${{ runner.os }}-dpr-chemrxiv-hf-v1-${{ hashFiles('requirements.txt') }}
- name: Install deps (skip sqlite3)
run: |
python - <<'PY'
import re
lines = open("requirements.txt", "r", encoding="utf-8").read().splitlines()
lines = [l for l in lines if l.strip() and not re.match(r"^sqlite3\\b", l)]
open("/tmp/req.txt", "w", encoding="utf-8").write("\n".join(lines))
PY
python -m pip install --upgrade pip
python -m pip install uv
uv pip install --system -r /tmp/req.txt
- name: Run ChemRxiv Maintain Pipeline
run: |
set -euo pipefail
set -x
if [ -z "${SUPABASE_SERVICE_KEY}" ]; then
echo "[WARN] 未配置 SUPABASE_SERVICE_KEY,已跳过 ChemRxiv 维护同步。"
exit 0
fi
FETCH_DAYS="${{ github.event.inputs.fetch_days }}"
FORCE_FULL_WINDOW="${{ github.event.inputs.force_full_window }}"
if [ -z "$FETCH_DAYS" ]; then
FETCH_DAYS="400"
fi
ARGS=(--fetch-days "$FETCH_DAYS")
if [ "${FORCE_FULL_WINDOW:-false}" = "true" ]; then
ARGS+=(--force-full-window)
fi
python src/maintain/chemrxiv.py "${ARGS[@]}"