Skip to content

crawl2

crawl2 #75

Workflow file for this run

name: crawl2
on:
workflow_dispatch:
schedule:
- cron: "0 2 * * *"
jobs:
crawl:
runs-on: ubuntu-latest
env:
RENV_PATHS_ROOT: ~/.local/share/renv
steps:
- name: Check out repository
uses: actions/checkout@v4
with:
path: main_folder
ref: main
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: 'release'
- name: Setup renv
uses: r-lib/actions/setup-renv@v2
with:
cache-version: 1
working-directory: main_folder
- name: Install packages
working-directory: ./main_folder
run: |
install.packages(c("httr", "jsonlite", "rvest", "dplyr", "purrr"))
shell: Rscript {0}
- name: Create and populate credentials file with secrets
working-directory: ./main_folder
run: |
echo 'openai_apikey="'"$MAPPED_OPENAI_APIKEY"'"' >> credentials.R
echo 'crossref_email="'"$MAPPED_CROSSREF_EMAIL"'"' >> credentials.R
echo 'gemini_apikey="'"$MAPPED_GEMINI_APIKEY"'"' >> credentials.R
shell: bash
env:
MAPPED_OPENAI_APIKEY : ${{ secrets.OPENAI_APIKEY }}
MAPPED_CROSSREF_EMAIL : ${{ secrets.CROSSREF_EMAIL }}
MAPPED_GEMINI_APIKEY : ${{ secrets.GEMINI_APIKEY }}
- name: Get data and produce JSON
working-directory: ./main_folder
run: |
Rscript crawl.R "sociology"
Rscript crawl.R "communication"
Rscript crawl.R "politics"
Rscript crawl.R "po"
Rscript crawl.R "psych"
Rscript crawl.R "multidisciplinary"
Rscript preprints.R
Rscript must_read.R
- name: Commit results main branch
working-directory: ./main_folder
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Actions"
git add . && git commit -m "Data updated"
git push origin