From f2af70a79a1b7ea292622db0dbfe6a8f2ce757f0 Mon Sep 17 00:00:00 2001 From: Laura Weber Date: Wed, 4 Feb 2026 02:25:44 -0800 Subject: [PATCH] Major data update script and workflow refactoring. Customizable data updates via text files listing scripts. Renamed MASTER script to UPDATE, reflecting the new versatility. Moved most workflow logic into yml files directly. Divided data script runs for different credentials. Set workflow concurrency groups. Removed unused file-by-year breakdowns. --- .github/scripts/cache.sh | 23 ------ .github/scripts/{update.sh => validate.sh} | 32 +------- .github/workflows/cache.yml | 41 +++++++--- .github/workflows/update.yml | 80 ++++++++++++++---- .gitignore | 1 - ...MASTER_UPDATE.txt => LAST_FULL_UPDATE.txt} | 0 _visualize/README.md | 12 ++- _visualize/scripts/CACHE.sh | 50 ------------ _visualize/scripts/MASTER.sh | 81 ------------------- _visualize/scripts/README.md | 2 +- _visualize/scripts/UPDATE.sh | 62 ++++++++++++++ _visualize/scripts/UPDATE_CACHE.txt | 2 + _visualize/scripts/UPDATE_CORE.txt | 19 +++++ _visualize/scripts/UPDATE_FULL.txt | 22 +++++ _visualize/scripts/UPDATE_MEMBERS.txt | 3 + _visualize/scripts/build_yearlist.py | 34 -------- visualize/github-data/YEARS.json | 1 - 17 files changed, 212 insertions(+), 253 deletions(-) delete mode 100755 .github/scripts/cache.sh rename .github/scripts/{update.sh => validate.sh} (64%) mode change 100755 => 100644 rename _visualize/{LAST_MASTER_UPDATE.txt => LAST_FULL_UPDATE.txt} (100%) delete mode 100755 _visualize/scripts/CACHE.sh delete mode 100755 _visualize/scripts/MASTER.sh create mode 100755 _visualize/scripts/UPDATE.sh create mode 100644 _visualize/scripts/UPDATE_CACHE.txt create mode 100644 _visualize/scripts/UPDATE_CORE.txt create mode 100644 _visualize/scripts/UPDATE_FULL.txt create mode 100644 _visualize/scripts/UPDATE_MEMBERS.txt delete mode 100644 _visualize/scripts/build_yearlist.py delete mode 100644 visualize/github-data/YEARS.json diff --git a/.github/scripts/cache.sh b/.github/scripts/cache.sh deleted file mode 100755 index 371a3cb5b..000000000 --- a/.github/scripts/cache.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh -l - -set -eu - -### VARIABLES ### - -# From action env: -# REPO_DIR - -ACT_SCRIPT_PATH=_visualize/scripts - -### SETUP ### - -# Store absolute path -cd $REPO_DIR -REPO_ROOT=$(pwd) - -### RUN CACHE SCRIPT ### - -cd $REPO_ROOT/$ACT_SCRIPT_PATH -./CACHE.sh - -exit 0 diff --git a/.github/scripts/update.sh b/.github/scripts/validate.sh old mode 100755 new mode 100644 similarity index 64% rename from .github/scripts/update.sh rename to .github/scripts/validate.sh index 7639407a8..dfd69ff98 --- a/.github/scripts/update.sh +++ b/.github/scripts/validate.sh @@ -6,30 +6,15 @@ set -eu # From action env: # REPO_DIR +# TAG -ACT_LOG_PATH=_visualize/LAST_MASTER_UPDATE.txt +ACT_LOG_PATH=_visualize/LAST_${TAG}_UPDATE.txt ACT_INPUT_PATH=_visualize ACT_DATA_PATH=visualize/github-data -ACT_SCRIPT_PATH=_visualize/scripts - -### SETUP ### - -# Store absolute path -cd $REPO_DIR -REPO_ROOT=$(pwd) - -# Store previous END timestamp -OLD_END=$(cat $ACT_LOG_PATH | grep END | cut -f 2) -OLD_END=$(date --date="$OLD_END" "+%s") - -### RUN MASTER SCRIPT ### - -cd $REPO_ROOT/$ACT_SCRIPT_PATH -./MASTER.sh ### VALIDATE UPDATE ### -cd $REPO_ROOT +cd $REPO_DIR # Timestamp log changed cat $ACT_LOG_PATH @@ -50,17 +35,6 @@ if [ $(cat $ACT_LOG_PATH | grep -c FAILED) -ne "0" ] || [ $(cat $ACT_LOG_PATH | echo "Timestamp log valid" fi -# New START is later than previous END -NEW_START=$(cat $ACT_LOG_PATH | grep START | cut -f 2) -NEW_START=$(date --date="$NEW_START" "+%s") -if [ "$OLD_END" -gt "$NEW_START" ] - then - echo "UPDATE FAILED - New START is earlier than previous END" - exit 1 - else - echo "START timestamp valid" -fi - # All changes are to valid files only git diff --name-only HEAD CHANGE_COUNT=$(git diff --name-only HEAD | grep -c -E ".+") diff --git a/.github/workflows/cache.yml b/.github/workflows/cache.yml index 89b01a204..cf848e2e6 100644 --- a/.github/workflows/cache.yml +++ b/.github/workflows/cache.yml @@ -3,12 +3,18 @@ name: Routine Data Cache Request on: workflow_dispatch: schedule: - - cron: "45 8 * * *" + - cron: '45 8 * * *' + +concurrency: + group: data-cache defaults: run: shell: bash +env: + TAG: CACHE + jobs: runDataUpdate: name: Run Cache Request @@ -20,35 +26,44 @@ jobs: - name: Store timestamp run: | echo "TIMESTAMP=$(date -u +"%F-%H")" >> "$GITHUB_ENV" + - name: Checkout uses: actions/checkout@v6 with: path: ${{ env.REPO_DIR }} token: ${{ secrets.GITHUB_TOKEN }} + - name: Setup python uses: actions/setup-python@v6 with: - python-version: "3.11" - cache: "pip" - cache-dependency-path: "${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt" + python-version: '3.11' + cache: 'pip' + cache-dependency-path: '${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt' + - name: Install dependencies - run: pip install -r ${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt - - name: Run cache script - run: ./${{ env.REPO_DIR }}/.github/scripts/cache.sh + run: pip install -r $REPO_DIR/_visualize/scripts/requirements.txt + + - name: Run data collection script + run: | + set -eu + cd $REPO_DIR/_visualize/scripts + ./UPDATE.sh $TAG env: GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Show health stats if: ${{ always() }} run: | - cat ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.txt || true - echo "Warning Count: $(grep -c 'Warning' ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.log)" - echo "From Timeouts: $(grep -c 'but failed' ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.log)" - echo "Limit Reached: $(grep -c 'rate limit exceeded' ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.log)" + cat $REPO_DIR/_visualize/LAST_$TAG_UPDATE.txt || true + echo "Warning Count: $(grep -c 'Warning' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + echo "From Timeouts: $(grep -c 'but failed' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + echo "Limit Reached: $(grep -c 'rate limit exceeded' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + - name: Save log files if: ${{ always() }} uses: actions/upload-artifact@v6 with: name: logfiles_${{ env.TIMESTAMP }}_cache path: | - ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.txt - ${{ env.REPO_DIR }}/_visualize/LAST_CACHE_REQUEST.log + ${{ env.REPO_DIR }}/_visualize/LAST_${{ env.TAG }}_UPDATE.txt + ${{ env.REPO_DIR }}/_visualize/LAST_${{ env.TAG }}_UPDATE.log diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 1e2574a5c..d6b860e56 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -3,7 +3,11 @@ name: Routine Data Update on: workflow_dispatch: schedule: - - cron: "45 10 * * *" + - cron: '45 10 * * *' + +concurrency: + group: data-updates + cancel-in-progress: true defaults: run: @@ -20,35 +24,72 @@ jobs: - name: Store timestamp run: | echo "TIMESTAMP=$(date -u +"%F-%H")" >> "$GITHUB_ENV" + - name: Checkout uses: actions/checkout@v6 with: path: ${{ env.REPO_DIR }} token: ${{ secrets.GITHUB_TOKEN }} persist-credentials: false + - name: Setup python uses: actions/setup-python@v6 with: - python-version: "3.11" - cache: "pip" - cache-dependency-path: "${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt" + python-version: '3.11' + cache: 'pip' + cache-dependency-path: '${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt' + - name: Install dependencies - run: pip install -r ${{ env.REPO_DIR }}/_visualize/scripts/requirements.txt - - name: Run update script + run: pip install -r $REPO_DIR/_visualize/scripts/requirements.txt + + - name: Create GitHub App Installation Token1 + uses: actions/create-github-app-token@v2 + id: app-token1 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.PRIVATE_KEY }} + + - name: Run data collection script with App Installation Token + run: | + set -eu + cd $REPO_DIR/_visualize/scripts + ./UPDATE.sh $TAG + env: + GITHUB_API_TOKEN: ${{ steps.app-token1.outputs.token }} + TAG: MEMBERS + + - name: Validate members data updates + run: ./$REPO_DIR/.github/scripts/validate.sh + env: + TAG: MEMBERS + + - name: Run data collection script with Action Token + run: | + set -eu + cd $REPO_DIR/_visualize/scripts + ./UPDATE.sh $TAG env: GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: ./${{ env.REPO_DIR }}/.github/scripts/update.sh + TAG: CORE + + - name: Validate core data updates + run: ./$REPO_DIR/.github/scripts/validate.sh + env: + TAG: CORE + - name: Create GitHub App Installation Token uses: actions/create-github-app-token@v2 id: app-token with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.PRIVATE_KEY }} + - name: Get GitHub App User ID id: get-user-id env: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: echo "user-id=$(gh api "/users/${{ steps.app-token.outputs.app-slug }}[bot]" --jq .id)" >> "$GITHUB_OUTPUT" + - name: Configure git env: GH_TOKEN: ${{ steps.app-token.outputs.token }} @@ -56,30 +97,37 @@ jobs: gh auth setup-git git config --global user.name '${{ steps.app-token.outputs.app-slug }}[bot]' git config --global user.email '${{ steps.get-user-id.outputs.user-id }}+${{ steps.app-token.outputs.app-slug }}[bot]@users.noreply.github.com' + - name: Commit updated data env: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: | - pushd ${{ env.REPO_DIR }} + set -eu + cd $REPO_DIR git stash git pull --ff-only git stash pop git add -A . - git commit -m "${{ env.TIMESTAMP }} Data Update by ${{ steps.app-token.outputs.app-slug }}" + git commit -m "$TIMESTAMP Data Update by ${{ steps.app-token.outputs.app-slug }}" git push - popd + - name: Show health stats if: ${{ always() }} run: | - cat ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.txt || true - echo "Warning Count: $(grep -c 'Warning' ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.log)" - echo "From Timeouts: $(grep -c 'but failed' ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.log)" - echo "Limit Reached: $(grep -c 'rate limit exceeded' ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.log)" + for TAG in MEMBERS CORE; do + cat $REPO_DIR/_visualize/LAST_$TAG_UPDATE.txt || true + echo "Warning Count: $(grep -c 'Warning' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + echo "From Timeouts: $(grep -c 'but failed' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + echo "Limit Reached: $(grep -c 'rate limit exceeded' $REPO_DIR/_visualize/LAST_$TAG_UPDATE.log)" + done + - name: Save log files if: ${{ always() }} uses: actions/upload-artifact@v6 with: name: logfiles_${{ env.TIMESTAMP }}_update path: | - ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.txt - ${{ env.REPO_DIR }}/_visualize/LAST_MASTER_UPDATE.log + ${{ env.REPO_DIR }}/_visualize/LAST_MEMBERS_UPDATE.txt + ${{ env.REPO_DIR }}/_visualize/LAST_MEMBERS_UPDATE.log + ${{ env.REPO_DIR }}/_visualize/LAST_CORE_UPDATE.txt + ${{ env.REPO_DIR }}/_visualize/LAST_CORE_UPDATE.log diff --git a/.gitignore b/.gitignore index fed18566a..0348c2908 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ _site Gemfile.lock *.pyc _visualize/*.log -_visualize/LAST_CACHE_REQUEST.txt .DS_Store .vscode/ .bundle diff --git a/_visualize/LAST_MASTER_UPDATE.txt b/_visualize/LAST_FULL_UPDATE.txt similarity index 100% rename from _visualize/LAST_MASTER_UPDATE.txt rename to _visualize/LAST_FULL_UPDATE.txt diff --git a/_visualize/README.md b/_visualize/README.md index 893125338..68e2495e3 100644 --- a/_visualize/README.md +++ b/_visualize/README.md @@ -2,7 +2,7 @@ ```bash cd _visualize/scripts/ -./MASTER.sh +./UPDATE.sh ``` _(Additional script functionality detailed in the [`./scripts` section below][jump2 scripts].)_ @@ -29,7 +29,11 @@ New files are created for each type of data structure. For most files, data is overwritten each time the scripts are run. Other scripts may collect cumulative data with a daily timestamp. If one of these scripts is run multiple times in a single day, the entry for that day will be overwritten. -Running [`MASTER.sh`][mastersh] will run all of the necessary scripts in the appropriate order to fetch the latest data. It will also update [`LAST_MASTER_UPDATE.txt`][lastmasterup] to record when this complete data update was last run. +Running [`UPDATE.sh`][updatesh] will run all of the necessary scripts in the appropriate order to fetch the latest data. It will also update `LAST_FULL_UPDATE.txt` to record when this complete data update was last run. + +You can also run the script with an argument `UPDATE.sh ` to select a custom set of scripts defined in `UPDATE_.txt`. +(See [`UPDATE_FULL.txt`][updatefull] for the default set of scripts.) +Timestamps will also be recorded in `LAST__UPDATE.txt` instead. The scripts are only for gathering new data. You do not need them to run in order to view the webpage visualizations. @@ -39,8 +43,8 @@ The scripts are only for gathering new data. You do not need them to run in orde [queries dir]: queries [scripts dir]: scripts [requires]: scripts/requirements.txt -[mastersh]: scripts/MASTER.sh -[lastmasterup]: LAST_MASTER_UPDATE.txt +[updatesh]: scripts/UPDATE.sh +[updatefull]: scripts/UPDATE_FULL.txt [gitgraphql]: https://developer.github.com/v4/ [oauth]: https://github.com/settings/developers [personaltoken]: https://github.com/settings/tokens diff --git a/_visualize/scripts/CACHE.sh b/_visualize/scripts/CACHE.sh deleted file mode 100755 index 54429ccca..000000000 --- a/_visualize/scripts/CACHE.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# Run this script to trigger GitHub's data caching -# https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#best-practices-for-caching - -exec &> ../LAST_CACHE_REQUEST.log - -export GITHUB_DATA=../../visualize/github-data -DATELOG=../LAST_CACHE_REQUEST.txt - -# On exit -function finish { - # Log end time - echo -e "END\t$(date -u)" >> $DATELOG -} -trap finish EXIT - -# Stop and Log for failed scripts -function errorCheck() { - if [ $ret -ne 0 ]; then - echo "FAILED - $1" - echo -e "FAILED\t$1" >> $DATELOG - exit 1 - fi -} - -# Basic script run procedure -function runScript() { - echo "Run - $1" - python -u $1 - ret=$? - errorCheck "$1" -} - - -# Check Python requirements -runScript python_check.py - - -echo "RUNNING CACHE REQUEST SCRIPT" - -# Log start time -echo -e "$(date -u '+%F-%H')" > $DATELOG -echo -e "START\t$(date -u)" >> $DATELOG - - -# --- CHACHEABLE QUERIES --- -runScript cache_repos_activitycommits.py - - -echo "CACHE REQUEST COMPLETE" diff --git a/_visualize/scripts/MASTER.sh b/_visualize/scripts/MASTER.sh deleted file mode 100755 index 6827e7884..000000000 --- a/_visualize/scripts/MASTER.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -# Run this script to refresh all data - -exec &> ../LAST_MASTER_UPDATE.log - -export GITHUB_DATA=../../visualize/github-data -DATELOG=../LAST_MASTER_UPDATE.txt - -# On exit -function finish { - # Log end time - echo -e "END\t$(date -u)" >> $DATELOG -} -trap finish EXIT - -# Stop and Log for failed scripts -function errorCheck() { - if [ $ret -ne 0 ]; then - echo "FAILED - $1" - echo -e "FAILED\t$1" >> $DATELOG - exit 1 - fi -} - -# Basic script run procedure -function runScript() { - echo "Run - $1" - python -u $1 - ret=$? - errorCheck "$1" -} - - -# Check Python requirements -runScript python_check.py - - -echo "RUNNING MASTER UPDATE SCRIPT" - -# Log start time -echo -e "$(date -u '+%F-%H')" > $DATELOG -echo -e "START\t$(date -u)" >> $DATELOG - - -# RUN THIS FIRST -runScript cleanup_inputs.py - - -# --- BASIC DATA --- -# Required before any other repo scripts (output used as repo list) -runScript get_repos_info.py -# Required before any other member scripts (output used as member list) -runScript get_internal_members.py - - -# --- EXTERNAL V INTERNAL --- -runScript get_members_extrepos.py -runScript get_repos_users.py - - -# --- ADDITIONAL REPO DETAILS --- -runScript get_repos_languages.py -runScript get_repos_topics.py -runScript get_repos_activitycommits.py -runScript get_repos_dependencies.py -runScript get_dependency_info.py - - -# --- HISTORY FOR ALL TIME --- -runScript get_repos_starhistory.py -runScript get_repos_releases.py -runScript get_repos_creationhistory.py - - -# RUN THIS LAST -runScript build_yearlist.py # Used in case of long term cumulative data - -runScript gather_repo_metadata.py # Generate simplified metadata file - - -echo "MASTER UPDATE COMPLETE" diff --git a/_visualize/scripts/README.md b/_visualize/scripts/README.md index 70a4681e4..8bfe6cf3a 100644 --- a/_visualize/scripts/README.md +++ b/_visualize/scripts/README.md @@ -15,5 +15,5 @@ source venv/bin/activate pip install -r requirements.txt # Run the collection script -./MASTER.sh +./UPDATE.sh ``` diff --git a/_visualize/scripts/UPDATE.sh b/_visualize/scripts/UPDATE.sh new file mode 100755 index 000000000..34a6ab97b --- /dev/null +++ b/_visualize/scripts/UPDATE.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Run this script to refresh data. +# Script selection can be customized through a text file UPDATE_.txt +# Providing the argument to this script will select UPDATE_.txt +# The input list from UPDATE_FULL.txt is selected by default. + +if [ -z "$1" ]; then + TAG=FULL +else + TAG="$1" +fi + +exec &> ../LAST_${TAG}_UPDATE.log + +export GITHUB_DATA=../../visualize/github-data +DATELOG=../LAST_${TAG}_UPDATE.txt + +# On exit +function finish { + # Log end time + echo -e "END\t$(date -u)" >> $DATELOG +} +trap finish EXIT + +# Stop and Log for failed scripts +function errorCheck() { + if [ $ret -ne 0 ]; then + echo "FAILED - $1" + echo -e "FAILED\t$1" >> $DATELOG + exit 1 + fi +} + +# Basic script run procedure +function runScript() { + echo "Run - $1" + python -u $1 + ret=$? + errorCheck "$1" +} + + +# Check Python requirements +runScript python_check.py + +echo "RUNNING ${TAG} UPDATE SCRIPT" + +# Log start time +echo -e "$(date -u '+%F-%H')" > $DATELOG +echo -e "START\t$(date -u)" >> $DATELOG + +# RUN THIS FIRST +runScript cleanup_inputs.py + +# DATA COLLECTION +readarray -t script_array < <(grep -v '^#' UPDATE_${TAG}.txt) +echo "Data scripts queued: (${script_array[*]})" +for datascript in "${script_array[@]}"; do + runScript ${datascript}.py +done + +echo "${TAG} UPDATE COMPLETE" diff --git a/_visualize/scripts/UPDATE_CACHE.txt b/_visualize/scripts/UPDATE_CACHE.txt new file mode 100644 index 000000000..e01af22a0 --- /dev/null +++ b/_visualize/scripts/UPDATE_CACHE.txt @@ -0,0 +1,2 @@ +# --- CHACHEABLE QUERIES --- +cache_repos_activitycommits diff --git a/_visualize/scripts/UPDATE_CORE.txt b/_visualize/scripts/UPDATE_CORE.txt new file mode 100644 index 000000000..d84c73af9 --- /dev/null +++ b/_visualize/scripts/UPDATE_CORE.txt @@ -0,0 +1,19 @@ +# --- BASIC DATA --- +get_repos_info +# Required before any other repo scripts (output used as repo list) +# --- EXTERNAL V INTERNAL --- +get_members_extrepos +get_repos_users +# --- ADDITIONAL REPO DETAILS --- +get_repos_languages +get_repos_topics +get_repos_activitycommits +get_repos_dependencies +get_dependency_info +# --- HISTORY FOR ALL TIME --- +get_repos_starhistory +get_repos_releases +get_repos_creationhistory +# --- ADDITIONAL DATA PROCESSING --- +gather_repo_metadata +# Generates simplified metadata file diff --git a/_visualize/scripts/UPDATE_FULL.txt b/_visualize/scripts/UPDATE_FULL.txt new file mode 100644 index 000000000..eb35ebb11 --- /dev/null +++ b/_visualize/scripts/UPDATE_FULL.txt @@ -0,0 +1,22 @@ +# DEFAULT +# --- BASIC DATA --- +get_repos_info +# Required before any other repo scripts (output used as repo list) +get_internal_members +# Required before any other member scripts (output used as member list) +# --- EXTERNAL V INTERNAL --- +get_members_extrepos +get_repos_users +# --- ADDITIONAL REPO DETAILS --- +get_repos_languages +get_repos_topics +get_repos_activitycommits +get_repos_dependencies +get_dependency_info +# --- HISTORY FOR ALL TIME --- +get_repos_starhistory +get_repos_releases +get_repos_creationhistory +# --- ADDITIONAL DATA PROCESSING --- +gather_repo_metadata +# Generates simplified metadata file diff --git a/_visualize/scripts/UPDATE_MEMBERS.txt b/_visualize/scripts/UPDATE_MEMBERS.txt new file mode 100644 index 000000000..76db534a2 --- /dev/null +++ b/_visualize/scripts/UPDATE_MEMBERS.txt @@ -0,0 +1,3 @@ +# --- BASIC DATA --- +get_internal_members +# Required before any other member scripts (output used as member list) diff --git a/_visualize/scripts/build_yearlist.py b/_visualize/scripts/build_yearlist.py deleted file mode 100644 index 422c4957a..000000000 --- a/_visualize/scripts/build_yearlist.py +++ /dev/null @@ -1,34 +0,0 @@ -from scraper.github import queryManager as qm -from os import environ as env -import os.path - -ghDataDir = env.get("GITHUB_DATA", "../github-data") -yearDict = {} - -# Gather all file name data -print("Checking GitHub data file names with year stamps...") -if not os.path.exists(ghDataDir): - raise FileNotFoundError("Directory path '%s' does not exist." % (ghDataDir)) -for file in os.listdir(ghDataDir): - if file.endswith(".json"): - nameSplit = file.split(".") - # Must have format "somePrefix.0000.json" - if not nameSplit[0] == "YEARS" and nameSplit[1].isdigit(): - prefix = nameSplit[0] - yearX = int(nameSplit[1]) - if prefix not in yearDict: - yearDict[prefix] = [] - yearDict[prefix].append(yearX) - -print("Sorting year data...") -# Remove duplicate years (though shouldn't be possible) and sort list -for prefix in yearDict.keys(): - yearList = yearDict[prefix] - yearList = list(set(yearList)) - yearList.sort() - yearDict[prefix] = yearList - -yearData = qm.DataManager("%s/YEARS.json" % ghDataDir, False) -yearData.fileSave(newline="\n") - -print("Done!\n") diff --git a/visualize/github-data/YEARS.json b/visualize/github-data/YEARS.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/visualize/github-data/YEARS.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file