Skip to content

process

process #1317

Workflow file for this run

name: process
on:
workflow_run:
workflows: ["fetch-marketdata"]
types: [completed]
workflow_dispatch:
inputs:
start_date:
description: 'Start date, YYYY/MM/DD'
required: true
type: string
default: '2024/12/09'
end_date:
description: 'End date, YYYY/MM/DD'
required: true
type: string
default: '2025/10/08'
permissions:
contents: write
jobs:
process:
if: github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
env:
CI_COMMIT_AUTHOR: github-actions[bot]
CI_COMMIT_AUTHOR_EMAIL: 41898282+github-actions[bot]@users.noreply.github.com
REMOTE_REPO: data-us
steps:
- name : Set up environment variables
if: github.event_name == 'workflow_run'
run: |
echo "START_DATE=$(TZ='US/Eastern' date --date yesterday +'%Y/%m/%d')" >> $GITHUB_ENV
echo "END_DATE=$(TZ='US/Eastern' date +'%Y/%m/%d')" >> $GITHUB_ENV
- name : Set up environment variables (manual run)
if: github.event_name == 'workflow_dispatch'
run: |
echo "START_DATE=${{ inputs.start_date }}" >> $GITHUB_ENV
echo "END_DATE=${{ inputs.end_date }}" >> $GITHUB_ENV
- name: Sparse checkout current repo
if: github.event_name == 'workflow_run'
uses: actions/checkout@v4
with:
fetch-depth: 1
filter: blob:none
sparse-checkout: |
${{ env.DATA_DIR }}
sparse-checkout-cone-mode: true
- name: Checkout current repo on manual trigger
if: github.event_name == 'workflow_dispatch'
uses: actions/checkout@v4
with:
fetch-depth: 1
filter: blob:none
- name: Sparse checkout remote repo
if: github.event_name == 'workflow_run'
uses: actions/checkout@v4
with:
repository: finmap-org/${{ env.REMOTE_REPO }}
token: ${{ secrets.REMOTE_REPO_TOKEN }}
path: ${{ env.REMOTE_REPO }}
fetch-depth: 1
filter: blob:none
sparse-checkout: |
${{ env.DATA_DIR }}
sparse-checkout-cone-mode: true
- name: Checkout remote repo on manual trigger
if: github.event_name == 'workflow_dispatch'
uses: actions/checkout@v4
with:
repository: finmap-org/${{ env.REMOTE_REPO }}
token: ${{ secrets.REMOTE_REPO_TOKEN }}
path: ${{ env.REMOTE_REPO }}
fetch-depth: 1
filter: blob:none
sparse-checkout: |
${{ env.DATA_DIR }}
sparse-checkout-cone-mode: true
- name: Normalize raw data
run: |
set -euo pipefail
# Enable nullglob so that if no *.json files exist in "$data_dir/raw/",
# the glob expands to an empty list and the loop is skipped instead of
# processing the literal pattern string
shopt -s nullglob
function process() {
local data_dir=$1
mkdir -p "${REMOTE_REPO}/${data_dir}"
for file in $data_dir/raw/*.json; do
exchange=$(basename "$file" .json)
if [ "$exchange" = "us-etf" ]; then
continue
fi
jq --compact-output \
--arg exchange "$exchange" '{
securities: {
columns: [
"exchange", "country", "type", "sector", "industry",
"currencyId", "ticker", "nameEng", "nameEngShort",
"nameOriginal", "nameOriginalShort", "priceOpen",
"priceLastSale", "priceChangePct", "volume", "value",
"numTrades", "marketCap", "listedFrom", "listedTill",
"wikiPageIdEng", "wikiPageIdOriginal", "nestedItemsCount"
],
data: [
.[] | [
$exchange,
.country // "",
"",
(if .sector == "" then "Miscellaneous" else .sector end),
(if .industry == "" then "Miscellaneous" else .industry end),
"USD",
.symbol,
.name,
.name,
"",
"",
(if .lastsale == "" then 0
else if .netchange == "" then (.lastsale | gsub("\\$"; "") | tonumber)
else ((.lastsale | gsub("\\$"; "") | tonumber) - (.netchange | gsub("\\$"; "") | tonumber))
end
end),
(if .lastsale == "" then 0 else (.lastsale | gsub("\\$"; "") | tonumber) end),
(if .pctchange == "" then 0 else (.pctchange | gsub("%"; "") | tonumber) end),
try (.volume | tonumber) catch 0,
0,
0,
try (.marketCap | tonumber) catch 0,
.ipoyear // "",
"",
"",
"",
0
]
]
}
}' "$file" > "${REMOTE_REPO}/${data_dir}/${exchange}.json"
done
jq -s '{
securities: {
columns: .[0].securities.columns,
data: (map(.securities.data) | add)
}
}' $REMOTE_REPO/$data_dir/*.json > "${REMOTE_REPO}/${data_dir}/us-all.json"
}
# Convert dates to seconds since epoch for comparison
start_seconds=$(date -d "$START_DATE" +%s)
end_seconds=$(date -d "$END_DATE" +%s)
max_jobs=10
# Loop through the date range
current_seconds=$start_seconds
while [ $current_seconds -le $end_seconds ]; do
current_date=$(date -d "@$current_seconds" +"%Y/%m/%d")
# Increment the current date by one day (86400 seconds)
current_seconds=$((current_seconds + 86400))
# Skip weekends
if [ $(date -d "$current_date" +%u) -ge 6 ]; then
continue
fi
data_dir="marketdata/${current_date}"
process "$data_dir" &
while (( $(jobs -r | wc -l) >= max_jobs )); do
wait -n # Wait for any one job to finish if max concurrent jobs reached
done
done
# Wait for all background jobs to finish
wait
- name: Aggregate data
continue-on-error: false
uses: finmap-org/actions/aggregate-data@main
with:
repo: ${{ env.REMOTE_REPO }}
start_date: ${{ env.START_DATE }}
end_date: ${{ env.END_DATE }}
# // - name: Update histogram
# // continue-on-error: true
# // uses: finmap-org/actions/update-histogram-data@ref
# // with:
# // repo: ${{ env.REMOTE_REPO }}
# // start_date: ${{ env.START_DATE }}
# // end_date: ${{ env.END_DATE }}
- name: Commit and push
run: |
cd "$REMOTE_REPO"
git config --global user.name "$CI_COMMIT_AUTHOR"
git config --global user.email "$CI_COMMIT_AUTHOR_EMAIL"
git add --all
git diff-index --quiet HEAD || git commit -m "Update: $(TZ='US/Eastern' date)"
git push