Fix CRAN link check condition and clean up trailing commas in package… #92

Workflow file for this run

.github/workflows/build_packages_info.yml at 5074ab6

	# Start with a list of packages in packages.csv, and get external data about them (e.g. last commit date) to generate an HTML page with the package information. This workflow is triggered manually from the Actions tab.
	# Olly Butters
	# 15/5/26
	name: Package list

	# Controls when the workflow will run
	on:
	schedule:
	- cron: '30 5 * * 0'
	timezone: "Europe/London"
	push:
	branches: [ "main" ]
	workflow_dispatch:

	jobs:
	build:
	runs-on: ubuntu-slim

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	# Parse the package list, get the external data, and write to a new CSV file
	- name: Build package data
	run: \|
	echo $GITHUB_WORKSPACE
	ls $GITHUB_WORKSPACE

	# Make a cache directory for some info from the GitHub clones.
	mkdir cache

	# Create new output file
	echo "Name,Description,CRAN link,CRAN version,CRAN license,GitHub link,Last commit,GitHub version,GitHub license,Github owner,Status" > output.csv

	# Read the data in from package_list.csv. The name and gh_link fields are required.
	while IFS="," read -r name description gh_link cran_link status
	do
	echo "-----------------------------------"
	echo "name: $name"
	echo "desc: $description"
	echo "github_link: $gh_link"
	echo "cran_link: $cran_link"
	echo "status: $status"
	echo ""

	##### GitHub info
	echo -e "\nGitHub repo information"

	# Extract the repo name from the GitHub link
	gh_repo_name=$(echo $gh_link \| awk -F/ '{print $NF}')
	echo $gh_repo_name

	# clone the GitHub repo and process it
	if [ -z "${gh_link}" ]; then
	echo "No GitHub link provided, skipping GitHub info for this package."
	gh_last_commit=""
	gh_last_release=""
	gh_license=""
	gh_functions=""
	gh_owner=""
	gh_code_of_conduct=""
	else
	echo "Cloning GitHub repo: $gh_link"
	git clone $gh_link
	cd $gh_repo_name

	# Last commit date of repo
	gh_last_commit=$(git log -1 --format=%cs)
	echo "last commit: $gh_last_commit"

	# Last release
	if [ $(git rev-list --tags --max-count=1) ]; then
	gh_last_release=$(git describe --tags $(git rev-list --tags --max-count=1))
	else
	gh_last_release=""
	fi

	echo "last GitHub release: $gh_last_release"

	# If there is a NAMES file then parse it to get the functino list and save the output
	if [ -f "NAMESPACE" ]; then
	echo "NAMESPACE file found"
	gh_functions=$(sed -n "s/^export($.*$)$/\1/p" NAMESPACE \| tr '\n' ',')
	echo "GitHub functions: $gh_functions"
	if [ -n "${gh_functions}" ]; then
	echo "${name}: ${gh_functions}" >> ../functions.txt
	else
	echo $name >> ../functions.txt
	fi
	fi

	# Use the GitHub API to pull in info about the repoisitory
	echo "\nGitHub API information"
	gh repo view --json codeOfConduct,description,homepageUrl,licenseInfo,owner,parent,updatedAt > gh_repo_info.json
	echo "API response:"
	cat gh_repo_info.json

	# Get the license as it appears in GitHub
	gh_license=$(cat gh_repo_info.json \| jq '.licenseInfo.nickname')
	if [ -z "${gh_license}" ] \|\| [ "${gh_license}" = '""' ]; then
	gh_license=$(cat gh_repo_info.json \| jq '.licenseInfo.name')
	fi

	# If the license is "Other", check for a LICENSE.md file and see if it contains actual license
	if [ "${gh_license}" = '"Other"' ]; then
	if [ -f "LICENSE.md" ]; then
	echo "License file found: LICENSE.md"
	if $(grep -q "MIT License" LICENSE.md); then
	gh_license="MIT License"
	fi
	fi
	fi

	echo "GitHub license: $gh_license"

	gh_owner=$(cat gh_repo_info.json \| jq '.owner.login')
	echo "GitHub owner: $gh_owner"

	gh_code_of_conduct=$(cat gh_repo_info.json \| jq '.codeOfConduct.name')
	echo "GitHub code of conduct: $gh_code_of_conduct"

	# Keep some files for processing later
	cd ..
	mkdir cache/$gh_repo_name
	cp $gh_repo_name/DESCRIPTION cache/$gh_repo_name/ 2>/dev/null \|\| true
	cp $gh_repo_name/NAMESPACE cache/$gh_repo_name/ 2>/dev/null \|\| true
	cp $gh_repo_name/README.md cache/$gh_repo_name/ 2>/dev/null \|\| true
	cp $gh_repo_name/LICENSE* cache/$gh_repo_name/ 2>/dev/null \|\| true

	# tidy up
	rm -rf $gh_repo_name
	fi


	##### CRAN info
	echo "CRAN information"
	if [[ -z "${cran_link}" \|\| "${cran_link}" == " "]]; then
	cran_version=""
	cran_license=""
	else

	# Get CRAN package name from cran_link
	CRAN_package_name=$(echo $cran_link \| awk -F= '{print $NF}')
	echo "CRAN package name: $CRAN_package_name"

	cran_description_file_link="https://cran.r-project.org/web/packages/${CRAN_package_name}/DESCRIPTION"
	wget --content-on-error $cran_description_file_link

	# might not be on the CRAN
	if [ -f DESCRIPTION ]; then
	cran_version=$(grep -oP '^Version:\s\K([a-zA-Z0-9\. _-]+)\s$' DESCRIPTION)
	cran_license=$(grep -oP '^License:\s\K([a-zA-Z0-9\. _\-><=\+]+)\s$' DESCRIPTION)

	# tidy up
	rm DESCRIPTION
	else
	cran_version=""
	cran_license=""
	fi
	fi

	echo "CRAN version: $cran_version"
	echo "CRAN license: $cran_license"

	# write to file
	echo "$name,$description,$cran_link,$cran_version,$cran_license,$gh_link,$gh_last_commit,$gh_last_release,$gh_license,$gh_owner,$status" >> output.csv

	echo "-----------------------------------"

	done < <(tail -n +2 package_list.csv)

	# copy files to a new directory for deployment
	mkdir gh-pages
	cp output.csv cache/
	cp functions.txt cache/

	# print the output files to the console for debugging
	cat output.csv
	cat functions.txt

	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	# Parse the output CSV file and generate an HTML page
	- name: build html page
	run: \|
	python3 build_web_pages/build_packages_summary_page.py
	python3 build_web_pages/build_packages_long_page.py
	mv output/* gh-pages/
	mv cache/* gh-pages/

	- name: Upload static files as artifact
	id: deployment
	uses: actions/upload-pages-artifact@v3
	with:
	path: gh-pages

	# Deployment job
	deploy:
	environment:
	name: github-pages
	url: ${{ steps.deployment.outputs.page_url }}
	runs-on: ubuntu-slim
	permissions:
	pages: write
	id-token: write
	needs: build
	steps:
	- name: Deploy to GitHub Pages
	id: deployment
	uses: actions/deploy-pages@v4

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix CRAN link check condition and clean up trailing commas in package… #92

Workflow file

Fix CRAN link check condition and clean up trailing commas in package… #92

Uh oh!

Workflow file for this run