Skip to content

Fix CRAN link check condition and clean up trailing commas in package… #92

Fix CRAN link check condition and clean up trailing commas in package…

Fix CRAN link check condition and clean up trailing commas in package… #92

# Start with a list of packages in packages.csv, and get external data about them (e.g. last commit date) to generate an HTML page with the package information. This workflow is triggered manually from the Actions tab.
# Olly Butters
# 15/5/26
name: Package list
# Controls when the workflow will run
on:
schedule:
- cron: '30 5 * * 0'
timezone: "Europe/London"
push:
branches: [ "main" ]
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-slim
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Parse the package list, get the external data, and write to a new CSV file
- name: Build package data
run: |
echo $GITHUB_WORKSPACE
ls $GITHUB_WORKSPACE
# Make a cache directory for some info from the GitHub clones.
mkdir cache
# Create new output file
echo "Name,Description,CRAN link,CRAN version,CRAN license,GitHub link,Last commit,GitHub version,GitHub license,Github owner,Status" > output.csv
# Read the data in from package_list.csv. The name and gh_link fields are required.
while IFS="," read -r name description gh_link cran_link status
do
echo "-----------------------------------"
echo "name: $name"
echo "desc: $description"
echo "github_link: $gh_link"
echo "cran_link: $cran_link"
echo "status: $status"
echo ""
##### GitHub info
echo -e "\nGitHub repo information"
# Extract the repo name from the GitHub link
gh_repo_name=$(echo $gh_link | awk -F/ '{print $NF}')
echo $gh_repo_name
# clone the GitHub repo and process it
if [ -z "${gh_link}" ]; then
echo "No GitHub link provided, skipping GitHub info for this package."
gh_last_commit=""
gh_last_release=""
gh_license=""
gh_functions=""
gh_owner=""
gh_code_of_conduct=""
else
echo "Cloning GitHub repo: $gh_link"
git clone $gh_link
cd $gh_repo_name
# Last commit date of repo
gh_last_commit=$(git log -1 --format=%cs)
echo "last commit: $gh_last_commit"
# Last release
if [ $(git rev-list --tags --max-count=1) ]; then
gh_last_release=$(git describe --tags $(git rev-list --tags --max-count=1))
else
gh_last_release=""
fi
echo "last GitHub release: $gh_last_release"
# If there is a NAMES file then parse it to get the functino list and save the output
if [ -f "NAMESPACE" ]; then
echo "NAMESPACE file found"
gh_functions=$(sed -n "s/^export(\(.*\))$/\1/p" NAMESPACE | tr '\n' ',')
echo "GitHub functions: $gh_functions"
if [ -n "${gh_functions}" ]; then
echo "${name}: ${gh_functions}" >> ../functions.txt
else
echo $name >> ../functions.txt
fi
fi
# Use the GitHub API to pull in info about the repoisitory
echo "\nGitHub API information"
gh repo view --json codeOfConduct,description,homepageUrl,licenseInfo,owner,parent,updatedAt > gh_repo_info.json
echo "API response:"
cat gh_repo_info.json
# Get the license as it appears in GitHub
gh_license=$(cat gh_repo_info.json | jq '.licenseInfo.nickname')
if [ -z "${gh_license}" ] || [ "${gh_license}" = '""' ]; then
gh_license=$(cat gh_repo_info.json | jq '.licenseInfo.name')
fi
# If the license is "Other", check for a LICENSE.md file and see if it contains actual license
if [ "${gh_license}" = '"Other"' ]; then
if [ -f "LICENSE.md" ]; then
echo "License file found: LICENSE.md"
if $(grep -q "MIT License" LICENSE.md); then
gh_license="MIT License"
fi
fi
fi
echo "GitHub license: $gh_license"
gh_owner=$(cat gh_repo_info.json | jq '.owner.login')
echo "GitHub owner: $gh_owner"
gh_code_of_conduct=$(cat gh_repo_info.json | jq '.codeOfConduct.name')
echo "GitHub code of conduct: $gh_code_of_conduct"
# Keep some files for processing later
cd ..
mkdir cache/$gh_repo_name
cp $gh_repo_name/DESCRIPTION cache/$gh_repo_name/ 2>/dev/null || true
cp $gh_repo_name/NAMESPACE cache/$gh_repo_name/ 2>/dev/null || true
cp $gh_repo_name/README.md cache/$gh_repo_name/ 2>/dev/null || true
cp $gh_repo_name/LICENSE* cache/$gh_repo_name/ 2>/dev/null || true
# tidy up
rm -rf $gh_repo_name
fi
##### CRAN info
echo "CRAN information"
if [[ -z "${cran_link}" || "${cran_link}" == " "]]; then
cran_version=""
cran_license=""
else
# Get CRAN package name from cran_link
CRAN_package_name=$(echo $cran_link | awk -F= '{print $NF}')
echo "CRAN package name: $CRAN_package_name"
cran_description_file_link="https://cran.r-project.org/web/packages/${CRAN_package_name}/DESCRIPTION"
wget --content-on-error $cran_description_file_link
# might not be on the CRAN
if [ -f DESCRIPTION ]; then
cran_version=$(grep -oP '^Version:\s*\K([a-zA-Z0-9\. _-]+)\s*$' DESCRIPTION)
cran_license=$(grep -oP '^License:\s*\K([a-zA-Z0-9\. _\-\(\)><=\+]+)\s*$' DESCRIPTION)
# tidy up
rm DESCRIPTION
else
cran_version=""
cran_license=""
fi
fi
echo "CRAN version: $cran_version"
echo "CRAN license: $cran_license"
# write to file
echo "$name,$description,$cran_link,$cran_version,$cran_license,$gh_link,$gh_last_commit,$gh_last_release,$gh_license,$gh_owner,$status" >> output.csv
echo "-----------------------------------"
done < <(tail -n +2 package_list.csv)
# copy files to a new directory for deployment
mkdir gh-pages
cp output.csv cache/
cp functions.txt cache/
# print the output files to the console for debugging
cat output.csv
cat functions.txt
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Parse the output CSV file and generate an HTML page
- name: build html page
run: |
python3 build_web_pages/build_packages_summary_page.py
python3 build_web_pages/build_packages_long_page.py
mv output/* gh-pages/
mv cache/* gh-pages/
- name: Upload static files as artifact
id: deployment
uses: actions/upload-pages-artifact@v3
with:
path: gh-pages
# Deployment job
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-slim
permissions:
pages: write
id-token: write
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4