Skip to content

Commit e3f7ae7

Browse files
committed
fix(ci): tighten trigger regex to only extract Start:/End: 4-digit years
The previous regex `grep -oE '[0-9]+' | head -1` extracted ANY digit run from the upstream commit message. Multi-line commit bodies with embedded numbers (error codes, PR refs, line-of-code references) produced nonsense like `START_YEAR=169` from a body that mentions `position 169`. The processor then ran `bash daily_wbb_R_processor.sh -s 169 -e 169` and processed empty (or errored on) a "season 169". Anchor the extraction to the canonical `Start: YYYY` / `End: YYYY` tokens that the umbrella workflow's commit message uses (`WBB Raw Update (Start: 2026 End: 2026)`). PCRE `Start:\s*\K[0-9]{4}` matches 4-digit years only — falls through to the empty-input fallback that calls `wehoop::most_recent_wbb_season()` when no Start/End match is found (e.g., for direct pushes whose commits don't carry the canonical token). Move the commit message into a step-level `env: COMMIT_MESSAGE` variable rather than inline `${{ ... }}` so the bash here-string sees a properly-escaped value.
1 parent da7d8c7 commit e3f7ae7

1 file changed

Lines changed: 88 additions & 74 deletions

File tree

.github/workflows/daily_wbb.yml

Lines changed: 88 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,88 @@
1-
name: Update WBB Data
2-
3-
on:
4-
schedule:
5-
- cron: '0 7 18-31 10 *'
6-
- cron: '0 7 * 11-12 *'
7-
- cron: '0 7 * 1-3 *'
8-
- cron: '0 7 1-12 4 *'
9-
repository_dispatch:
10-
types: [daily_wbb_data]
11-
workflow_dispatch:
12-
inputs:
13-
start_year:
14-
required: false
15-
type: string
16-
end_year:
17-
required: false
18-
type: string
19-
20-
jobs:
21-
update_wbb_data:
22-
runs-on: ${{ matrix.config.os }}
23-
name: Update WBB Data ${{ inputs.start_year }}-${{ inputs.end_year }}
24-
strategy:
25-
fail-fast: false
26-
matrix:
27-
config:
28-
- {os: "windows-latest", r: "release"}
29-
30-
env:
31-
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
32-
R_KEEP_PKG_SOURCE: yes
33-
START_YEAR: ${{ inputs.start_year }}
34-
END_YEAR: ${{ inputs.end_year }}
35-
36-
steps:
37-
38-
- name: Checkout repo
39-
uses: actions/checkout@v4
40-
- name: Setting up R
41-
uses: r-lib/actions/setup-r@v2
42-
with:
43-
r-version: ${{ matrix.config.r }}
44-
http-user-agent: ${{ matrix.config.http-user-agent }}
45-
- uses: r-lib/actions/setup-pandoc@v2
46-
- name: Setting up R dependencies
47-
uses: r-lib/actions/setup-r-dependencies@v2
48-
with:
49-
cache-version: 1
50-
extra-packages: |
51-
sportsdataverse/wehoop
52-
sportsdataverse/sportsdataverse-data
53-
ropensci/piggyback
54-
- name: Check wehoop_wbb_data_trigger for inputs
55-
if: ${{ github.event.client_payload.event_name == 'daily_wbb_data'}}
56-
shell: bash
57-
run: |
58-
echo START_YEAR=$(echo "${{ github.event.client_payload.commit_message}}" | grep -o -E '[0-9]+' | head -1) >> $GITHUB_ENV
59-
echo END_YEAR=$(echo "${{ github.event.client_payload.commit_message}}" | grep -o -E '[0-9]+' | tail -1) >> $GITHUB_ENV
60-
- name: Check for empty inputs
61-
if: ${{ env.START_YEAR == ''}}
62-
shell: bash
63-
run: |
64-
echo START_YEAR=$(Rscript -e "cat(wehoop::most_recent_wbb_season())") >> $GITHUB_ENV
65-
echo END_YEAR=$(Rscript -e "cat(wehoop::most_recent_wbb_season())") >> $GITHUB_ENV
66-
- name: Update WBB Data ${{ env.START_YEAR }}-${{ env.END_YEAR }}
67-
shell: bash
68-
env:
69-
GITHUB_PAT: ${{ secrets.SDV_GH_TOKEN }}
70-
SPORTSDATAVERSE.UPLOAD.QUIET: FALSE
71-
SPORTSDATAVERSE.UPLOAD.MAX_TIMES: 20
72-
run: |
73-
echo $(pwd)
74-
bash scripts/daily_wbb_R_processor.sh -s ${{ env.START_YEAR }} -e ${{ env.END_YEAR }}
1+
name: Update WBB Data
2+
3+
on:
4+
schedule:
5+
- cron: '0 7 18-31 10 *'
6+
- cron: '0 7 * 11-12 *'
7+
- cron: '0 7 * 1-3 *'
8+
- cron: '0 7 1-12 4 *'
9+
repository_dispatch:
10+
types: [daily_wbb_data]
11+
workflow_dispatch:
12+
inputs:
13+
start_year:
14+
required: false
15+
type: string
16+
end_year:
17+
required: false
18+
type: string
19+
20+
jobs:
21+
update_wbb_data:
22+
runs-on: ${{ matrix.config.os }}
23+
name: Update WBB Data ${{ inputs.start_year }}-${{ inputs.end_year }}
24+
strategy:
25+
fail-fast: false
26+
matrix:
27+
config:
28+
- {os: "windows-latest", r: "release"}
29+
30+
env:
31+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
32+
R_KEEP_PKG_SOURCE: yes
33+
START_YEAR: ${{ inputs.start_year }}
34+
END_YEAR: ${{ inputs.end_year }}
35+
36+
steps:
37+
38+
- name: Checkout repo
39+
uses: actions/checkout@v4
40+
- name: Setting up R
41+
uses: r-lib/actions/setup-r@v2
42+
with:
43+
r-version: ${{ matrix.config.r }}
44+
http-user-agent: ${{ matrix.config.http-user-agent }}
45+
- uses: r-lib/actions/setup-pandoc@v2
46+
- name: Setting up R dependencies
47+
uses: r-lib/actions/setup-r-dependencies@v2
48+
with:
49+
cache-version: 1
50+
extra-packages: |
51+
sportsdataverse/wehoop
52+
sportsdataverse/sportsdataverse-data
53+
ropensci/piggyback
54+
- name: Check wehoop_wbb_data_trigger for inputs
55+
if: ${{ github.event.client_payload.event_name == 'daily_wbb_data'}}
56+
shell: bash
57+
env:
58+
COMMIT_MESSAGE: ${{ github.event.client_payload.commit_message }}
59+
run: |
60+
# Extract `Start: YYYY` and `End: YYYY` from the upstream commit
61+
# message (the umbrella scrape workflow produces messages like
62+
# `WBB Raw Update (Start: 2026 End: 2026)`). The previous
63+
# `grep -oE '[0-9]+' | head -1` was too permissive — it grabbed
64+
# any digit run, so a multi-line commit message with embedded
65+
# numbers (PR refs, error codes, etc.) produced bogus
66+
# START_YEAR / END_YEAR values. The narrower
67+
# `Start:\s*\K[0-9]{4}` keeps the regex anchored to the
68+
# canonical token and only matches 4-digit years; falls
69+
# through to the empty-input fallback when no match is found.
70+
start=$(printf '%s' "$COMMIT_MESSAGE" | grep -oP 'Start:\s*\K[0-9]{4}' | head -1)
71+
end=$(printf '%s' "$COMMIT_MESSAGE" | grep -oP 'End:\s*\K[0-9]{4}' | head -1)
72+
echo "START_YEAR=$start" >> $GITHUB_ENV
73+
echo "END_YEAR=$end" >> $GITHUB_ENV
74+
- name: Check for empty inputs
75+
if: ${{ env.START_YEAR == ''}}
76+
shell: bash
77+
run: |
78+
echo START_YEAR=$(Rscript -e "cat(wehoop::most_recent_wbb_season())") >> $GITHUB_ENV
79+
echo END_YEAR=$(Rscript -e "cat(wehoop::most_recent_wbb_season())") >> $GITHUB_ENV
80+
- name: Update WBB Data ${{ env.START_YEAR }}-${{ env.END_YEAR }}
81+
shell: bash
82+
env:
83+
GITHUB_PAT: ${{ secrets.SDV_GH_TOKEN }}
84+
SPORTSDATAVERSE.UPLOAD.QUIET: FALSE
85+
SPORTSDATAVERSE.UPLOAD.MAX_TIMES: 20
86+
run: |
87+
echo $(pwd)
88+
bash scripts/daily_wbb_R_processor.sh -s ${{ env.START_YEAR }} -e ${{ env.END_YEAR }}

0 commit comments

Comments
 (0)