process #1317
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: process | |
| on: | |
| workflow_run: | |
| workflows: ["fetch-marketdata"] | |
| types: [completed] | |
| workflow_dispatch: | |
| inputs: | |
| start_date: | |
| description: 'Start date, YYYY/MM/DD' | |
| required: true | |
| type: string | |
| default: '2024/12/09' | |
| end_date: | |
| description: 'End date, YYYY/MM/DD' | |
| required: true | |
| type: string | |
| default: '2025/10/08' | |
| permissions: | |
| contents: write | |
| jobs: | |
| process: | |
| if: github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' | |
| runs-on: ubuntu-latest | |
| env: | |
| CI_COMMIT_AUTHOR: github-actions[bot] | |
| CI_COMMIT_AUTHOR_EMAIL: 41898282+github-actions[bot]@users.noreply.github.com | |
| REMOTE_REPO: data-us | |
| steps: | |
| - name : Set up environment variables | |
| if: github.event_name == 'workflow_run' | |
| run: | | |
| echo "START_DATE=$(TZ='US/Eastern' date --date yesterday +'%Y/%m/%d')" >> $GITHUB_ENV | |
| echo "END_DATE=$(TZ='US/Eastern' date +'%Y/%m/%d')" >> $GITHUB_ENV | |
| - name : Set up environment variables (manual run) | |
| if: github.event_name == 'workflow_dispatch' | |
| run: | | |
| echo "START_DATE=${{ inputs.start_date }}" >> $GITHUB_ENV | |
| echo "END_DATE=${{ inputs.end_date }}" >> $GITHUB_ENV | |
| - name: Sparse checkout current repo | |
| if: github.event_name == 'workflow_run' | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| filter: blob:none | |
| sparse-checkout: | | |
| ${{ env.DATA_DIR }} | |
| sparse-checkout-cone-mode: true | |
| - name: Checkout current repo on manual trigger | |
| if: github.event_name == 'workflow_dispatch' | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| filter: blob:none | |
| - name: Sparse checkout remote repo | |
| if: github.event_name == 'workflow_run' | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: finmap-org/${{ env.REMOTE_REPO }} | |
| token: ${{ secrets.REMOTE_REPO_TOKEN }} | |
| path: ${{ env.REMOTE_REPO }} | |
| fetch-depth: 1 | |
| filter: blob:none | |
| sparse-checkout: | | |
| ${{ env.DATA_DIR }} | |
| sparse-checkout-cone-mode: true | |
| - name: Checkout remote repo on manual trigger | |
| if: github.event_name == 'workflow_dispatch' | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: finmap-org/${{ env.REMOTE_REPO }} | |
| token: ${{ secrets.REMOTE_REPO_TOKEN }} | |
| path: ${{ env.REMOTE_REPO }} | |
| fetch-depth: 1 | |
| filter: blob:none | |
| sparse-checkout: | | |
| ${{ env.DATA_DIR }} | |
| sparse-checkout-cone-mode: true | |
| - name: Normalize raw data | |
| run: | | |
| set -euo pipefail | |
| # Enable nullglob so that if no *.json files exist in "$data_dir/raw/", | |
| # the glob expands to an empty list and the loop is skipped instead of | |
| # processing the literal pattern string | |
| shopt -s nullglob | |
| function process() { | |
| local data_dir=$1 | |
| mkdir -p "${REMOTE_REPO}/${data_dir}" | |
| for file in $data_dir/raw/*.json; do | |
| exchange=$(basename "$file" .json) | |
| if [ "$exchange" = "us-etf" ]; then | |
| continue | |
| fi | |
| jq --compact-output \ | |
| --arg exchange "$exchange" '{ | |
| securities: { | |
| columns: [ | |
| "exchange", "country", "type", "sector", "industry", | |
| "currencyId", "ticker", "nameEng", "nameEngShort", | |
| "nameOriginal", "nameOriginalShort", "priceOpen", | |
| "priceLastSale", "priceChangePct", "volume", "value", | |
| "numTrades", "marketCap", "listedFrom", "listedTill", | |
| "wikiPageIdEng", "wikiPageIdOriginal", "nestedItemsCount" | |
| ], | |
| data: [ | |
| .[] | [ | |
| $exchange, | |
| .country // "", | |
| "", | |
| (if .sector == "" then "Miscellaneous" else .sector end), | |
| (if .industry == "" then "Miscellaneous" else .industry end), | |
| "USD", | |
| .symbol, | |
| .name, | |
| .name, | |
| "", | |
| "", | |
| (if .lastsale == "" then 0 | |
| else if .netchange == "" then (.lastsale | gsub("\\$"; "") | tonumber) | |
| else ((.lastsale | gsub("\\$"; "") | tonumber) - (.netchange | gsub("\\$"; "") | tonumber)) | |
| end | |
| end), | |
| (if .lastsale == "" then 0 else (.lastsale | gsub("\\$"; "") | tonumber) end), | |
| (if .pctchange == "" then 0 else (.pctchange | gsub("%"; "") | tonumber) end), | |
| try (.volume | tonumber) catch 0, | |
| 0, | |
| 0, | |
| try (.marketCap | tonumber) catch 0, | |
| .ipoyear // "", | |
| "", | |
| "", | |
| "", | |
| 0 | |
| ] | |
| ] | |
| } | |
| }' "$file" > "${REMOTE_REPO}/${data_dir}/${exchange}.json" | |
| done | |
| jq -s '{ | |
| securities: { | |
| columns: .[0].securities.columns, | |
| data: (map(.securities.data) | add) | |
| } | |
| }' $REMOTE_REPO/$data_dir/*.json > "${REMOTE_REPO}/${data_dir}/us-all.json" | |
| } | |
| # Convert dates to seconds since epoch for comparison | |
| start_seconds=$(date -d "$START_DATE" +%s) | |
| end_seconds=$(date -d "$END_DATE" +%s) | |
| max_jobs=10 | |
| # Loop through the date range | |
| current_seconds=$start_seconds | |
| while [ $current_seconds -le $end_seconds ]; do | |
| current_date=$(date -d "@$current_seconds" +"%Y/%m/%d") | |
| # Increment the current date by one day (86400 seconds) | |
| current_seconds=$((current_seconds + 86400)) | |
| # Skip weekends | |
| if [ $(date -d "$current_date" +%u) -ge 6 ]; then | |
| continue | |
| fi | |
| data_dir="marketdata/${current_date}" | |
| process "$data_dir" & | |
| while (( $(jobs -r | wc -l) >= max_jobs )); do | |
| wait -n # Wait for any one job to finish if max concurrent jobs reached | |
| done | |
| done | |
| # Wait for all background jobs to finish | |
| wait | |
| - name: Aggregate data | |
| continue-on-error: false | |
| uses: finmap-org/actions/aggregate-data@main | |
| with: | |
| repo: ${{ env.REMOTE_REPO }} | |
| start_date: ${{ env.START_DATE }} | |
| end_date: ${{ env.END_DATE }} | |
| # // - name: Update histogram | |
| # // continue-on-error: true | |
| # // uses: finmap-org/actions/update-histogram-data@ref | |
| # // with: | |
| # // repo: ${{ env.REMOTE_REPO }} | |
| # // start_date: ${{ env.START_DATE }} | |
| # // end_date: ${{ env.END_DATE }} | |
| - name: Commit and push | |
| run: | | |
| cd "$REMOTE_REPO" | |
| git config --global user.name "$CI_COMMIT_AUTHOR" | |
| git config --global user.email "$CI_COMMIT_AUTHOR_EMAIL" | |
| git add --all | |
| git diff-index --quiet HEAD || git commit -m "Update: $(TZ='US/Eastern' date)" | |
| git push |