Skip to content

maintain-medrxiv

maintain-medrxiv #44

name: maintain-medrxiv
on:
schedule:
- cron: "15 1 * * *" # 北京时间 09:15
workflow_dispatch:
inputs:
fetch_days:
description: "回溯抓取天数(默认 30)"
required: false
default: "30"
force_full_window:
description: "是否忽略 seen 状态并全量回补当前窗口(true/false)"
required: false
default: "false"
permissions:
contents: read
concurrency:
group: daily-paper-reader-maintain-medrxiv
cancel-in-progress: false
jobs:
maintain_medrxiv:
if: github.repository == 'ziwenhahaha/daily-paper-reader' || github.repository == '5-xj/daily-paper-reader'
runs-on: ubuntu-latest
timeout-minutes: 240
env:
SUPABASE_URL: https://lyucdwgefyfbmaiopjbk.supabase.co
SUPABASE_SCHEMA: public
SUPABASE_SERVICE_KEY: ${{ secrets.SUPABASE_SERVICE_KEY }}
SUPABASE_BACKEND_KEY: medrxiv
SUPABASE_PAPERS_TABLE: medrxiv_papers
SUPABASE_RETENTION_DAYS: "45"
DPR_ENABLE_MEDRXIV_BACKEND: "1"
DPR_MEDRXIV_ENABLED: "1"
DPR_MEDRXIV_PAPERS_TABLE: medrxiv_papers
DPR_MEDRXIV_VECTOR_RPC_EXACT: match_medrxiv_papers_exact
DPR_MEDRXIV_BM25_RPC: match_medrxiv_papers_bm25
PYTHONUNBUFFERED: "1"
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Cache pip + torch
uses: actions/cache@v5
with:
path: |
~/.cache/pip
~/.cache/torch
key: ${{ runner.os }}-dpr-medrxiv-hf-v1-${{ hashFiles('requirements.txt') }}
- name: Install deps (skip sqlite3)
run: |
python - <<'PY'
import re
lines = open("requirements.txt", "r", encoding="utf-8").read().splitlines()
lines = [l for l in lines if l.strip() and not re.match(r"^sqlite3\\b", l)]
open("/tmp/req.txt", "w", encoding="utf-8").write("\n".join(lines))
PY
python -m pip install --upgrade pip
python -m pip install uv
uv pip install --system -r /tmp/req.txt
- name: Run medRxiv Maintain Pipeline
run: |
set -euo pipefail
set -x
if [ -z "${SUPABASE_SERVICE_KEY}" ]; then
echo "[WARN] 未配置 SUPABASE_SERVICE_KEY,已跳过 medRxiv 维护同步。"
exit 0
fi
FETCH_DAYS="${{ github.event.inputs.fetch_days }}"
FORCE_FULL_WINDOW="${{ github.event.inputs.force_full_window }}"
if [ -z "$FETCH_DAYS" ]; then
FETCH_DAYS="30"
fi
ARGS=(--fetch-days "$FETCH_DAYS")
if [ "${FORCE_FULL_WINDOW:-false}" = "true" ]; then
ARGS+=(--force-full-window)
fi
python src/maintain/medrxiv.py "${ARGS[@]}"