Skip to content

Crawl OMSCS Catalog #22

Crawl OMSCS Catalog

Crawl OMSCS Catalog #22

Workflow file for this run

name: Crawl OMSCS Catalog
on:
# Run weekly on Mondays at 9am UTC
schedule:
- cron: '0 9 * * 1'
# Allow manual trigger
workflow_dispatch:
jobs:
crawl:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install tsx
run: npm install -g tsx
- name: Save current catalog state
run: |
if [ -f static/courses.json ]; then
cp static/courses.json /tmp/courses-before.json
else
echo '{}' > /tmp/courses-before.json
fi
- name: Run catalog crawler
run: npx tsx catalog/crawler.ts
- name: Detect new courses
id: detect-changes
run: |
NEW_COURSES=$(node -e "
const fs = require('fs');
let before = {};
let after = {};
try {
before = JSON.parse(fs.readFileSync('/tmp/courses-before.json', 'utf8'));
} catch (e) {
before = {};
}
try {
after = JSON.parse(fs.readFileSync('static/courses.json', 'utf8'));
} catch (e) {
after = {};
}
const beforeIds = new Set(Object.keys(before));
const newCourses = Object.keys(after)
.filter(id => !beforeIds.has(id))
.map(id => '- **' + id + '** ' + after[id].name);
if (newCourses.length > 0) {
console.log(newCourses.join('\n'));
}
")
echo "$NEW_COURSES" > /tmp/new-courses.txt
if [ -n "$NEW_COURSES" ]; then
echo "has_new_courses=true" >> $GITHUB_OUTPUT
else
echo "has_new_courses=false" >> $GITHUB_OUTPUT
fi
- name: Create PR if changed
env:
GH_TOKEN: ${{ github.token }}
run: |
git config user.name 'Christian Tran'
git config user.email 'ctran4347@gmail.com'
git add static/
if git diff --staged --quiet; then
echo "No changes to commit"
else
BRANCH_NAME="catalog-update-$(date -u +%Y%m%d-%H%M%S)"
TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
git checkout -b "$BRANCH_NAME"
git commit -m "Update OMSCS catalog $TIMESTAMP"
git push -u origin "$BRANCH_NAME"
NEW_COURSES=$(cat /tmp/new-courses.txt)
if [ -n "$NEW_COURSES" ]; then
{
echo "Automated update of OMSCS catalog data."
echo ""
echo "## New Courses"
echo "$NEW_COURSES"
echo ""
echo "## Changes"
echo "- Updated course and specialization data from omscs.gatech.edu"
echo ""
echo "## Auto-generated"
echo "This PR was automatically created by the OMSCS Catalog crawler GitHub Action."
} > /tmp/pr-body.md
else
{
echo "Automated update of OMSCS catalog data."
echo ""
echo "## Changes"
echo "- Updated course and specialization data from omscs.gatech.edu"
echo "- No new courses detected (metadata updates only)"
echo ""
echo "## Auto-generated"
echo "This PR was automatically created by the OMSCS Catalog crawler GitHub Action."
} > /tmp/pr-body.md
fi
gh pr create \
--title "Update OMSCS catalog $TIMESTAMP" \
--body-file /tmp/pr-body.md \
--base main \
--head "$BRANCH_NAME"
fi