Skip to content

Commit 3207b3b

Browse files
authored
Make the changelog workflow's PR lookup robust (#131)
Run 24579805689 silently missed the changelog + skill-version update because the API's commits/{sha}/pulls endpoint is indexer-backed and lagged past the 3 x 10s retry budget. Fast-path the lookup by parsing (#N) from the squash-merge commit subject and calling pulls/N directly — a primary-record endpoint that isn't subject to the indexer race. The PR is only accepted if its merge_commit_sha matches the commit we were asked about, so a commit whose subject happens to end with (#N) can't be misattributed. The old commits/{sha}/pulls lookup remains as a fallback with a wider budget (6 x 20s). The lookup logic moved from inline bash to a Python script under .ci-scripts/ (matching changelog.py / update_skill_versions.py). The script also formats the changelog entry and emits it as a step output, replacing the bash string assembly downstream. Checkout was added as the first workflow step — previously conditional and after the lookup, which would have broken the new script.
1 parent 1aa4ecf commit 3207b3b

2 files changed

Lines changed: 239 additions & 44 deletions

File tree

.ci-scripts/find_merged_pr.py

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#!/usr/bin/env python3
2+
"""Find the merged PR associated with a commit on main.
3+
4+
Stdlib-only; safe to invoke with the system python3 before an
5+
`actions/setup-python` step has run.
6+
7+
Uses two strategies, in order:
8+
9+
1. Fast path: parse the PR number from the squash-merge commit subject
10+
(which ends with "(#N)") and look up the PR directly. Avoids the
11+
commits/{sha}/pulls indexer, which lags behind the push event. The
12+
looked-up PR is only accepted if its merge_commit_sha matches the
13+
commit we were asked about — otherwise a commit whose subject
14+
happens to end with "(#N)" (a cherry-pick, a manually-edited
15+
subject) could be misattributed.
16+
17+
2. Fallback: retry commits/{sha}/pulls to cover indexer lag for
18+
commits whose subject doesn't carry a PR ref (e.g. a direct push
19+
to main).
20+
21+
Exit code is 0 in all normal cases, including "no PR found" and any
22+
unexpected error. Diagnostics are written to stderr.
23+
24+
Writes GitHub Actions step-output lines when a PR is found:
25+
26+
number=<N>
27+
entry=<formatted changelog entry line>
28+
has_changelog=<"changelog - ..." label name, or empty>
29+
breaking=<"changelog - breaking", or empty>
30+
31+
When no PR is found, no output keys are emitted — downstream steps
32+
should gate on `steps.<id>.outputs.has_changelog != ''`.
33+
34+
Output is written to `$GITHUB_OUTPUT` when that env var is set
35+
(normal GitHub Actions behavior), otherwise to stdout.
36+
37+
Usage:
38+
39+
python3 .ci-scripts/find_merged_pr.py lookup --sha "$COMMIT_SHA" --repo "$REPO"
40+
"""
41+
42+
from __future__ import annotations
43+
44+
import argparse
45+
import contextlib
46+
import json
47+
import os
48+
import re
49+
import subprocess
50+
import sys
51+
import time
52+
import uuid
53+
54+
# Fallback retry budget: ~100s total. The motivating incident
55+
# (workflow run 24579805689) had 3 x 10s = 30s, which wasn't enough.
56+
FALLBACK_ATTEMPTS = 6
57+
FALLBACK_DELAY_S = 20
58+
GH_API_TIMEOUT_S = 30
59+
PR_REF_RE = re.compile(r"\(#(\d+)\)$")
60+
WHITESPACE_RE = re.compile(r"\s+")
61+
62+
63+
def _gh_api(path: str):
64+
"""Call `gh api <path>`; return parsed JSON or None on failure."""
65+
try:
66+
result = subprocess.run(
67+
["gh", "api", path],
68+
capture_output=True,
69+
text=True,
70+
timeout=GH_API_TIMEOUT_S,
71+
)
72+
except FileNotFoundError:
73+
print("gh CLI not found on PATH", file=sys.stderr)
74+
return None
75+
except subprocess.TimeoutExpired:
76+
print(f"gh api {path} timed out after {GH_API_TIMEOUT_S}s", file=sys.stderr)
77+
return None
78+
if result.returncode != 0:
79+
if result.stderr:
80+
print(result.stderr.rstrip(), file=sys.stderr)
81+
return None
82+
try:
83+
return json.loads(result.stdout)
84+
except json.JSONDecodeError:
85+
return None
86+
87+
88+
def _pr_number_from_subject(repo: str, sha: str) -> int | None:
89+
"""Parse the squash-merge PR reference from the commit subject."""
90+
payload = _gh_api(f"repos/{repo}/commits/{sha}")
91+
if not isinstance(payload, dict):
92+
return None
93+
message = payload.get("commit", {}).get("message", "") or ""
94+
subject = message.splitlines()[0].rstrip() if message else ""
95+
match = PR_REF_RE.search(subject)
96+
if not match:
97+
return None
98+
return int(match.group(1))
99+
100+
101+
def _fetch_pr(repo: str, number: int, sha: str) -> dict | None:
102+
"""Fetch a PR, but only if it merged this specific commit."""
103+
payload = _gh_api(f"repos/{repo}/pulls/{number}")
104+
if not isinstance(payload, dict):
105+
return None
106+
if not payload.get("merged"):
107+
return None
108+
if payload.get("merge_commit_sha") != sha:
109+
return None
110+
return payload
111+
112+
113+
def _fetch_pr_for_sha(repo: str, sha: str) -> dict | None:
114+
"""Retry commits/{sha}/pulls to cover indexer lag."""
115+
for attempt in range(1, FALLBACK_ATTEMPTS + 1):
116+
payload = _gh_api(f"repos/{repo}/commits/{sha}/pulls")
117+
if isinstance(payload, list) and payload:
118+
return payload[0]
119+
if attempt < FALLBACK_ATTEMPTS:
120+
print(
121+
f"Attempt {attempt}/{FALLBACK_ATTEMPTS}: no PR found, "
122+
f"retrying in {FALLBACK_DELAY_S}s...",
123+
file=sys.stderr,
124+
)
125+
time.sleep(FALLBACK_DELAY_S)
126+
return None
127+
128+
129+
def _changelog_label(pr: dict) -> str:
130+
for label in pr.get("labels") or []:
131+
name = label.get("name", "")
132+
if name.startswith("changelog - "):
133+
return name
134+
return ""
135+
136+
137+
def _format_entry(pr: dict) -> str:
138+
# Collapse any internal whitespace (including stray newlines from an API
139+
# that allows them) to a single space so the entry stays on one line in
140+
# CHANGELOG.md and in GITHUB_OUTPUT.
141+
title = WHITESPACE_RE.sub(" ", pr["title"]).strip()
142+
return f"{title} ([PR #{pr['number']}]({pr['html_url']}))"
143+
144+
145+
def _emit(key: str, value, *, stream) -> None:
146+
"""Emit a step-output line; use a heredoc when value contains newlines."""
147+
text = str(value)
148+
if "\n" in text:
149+
delim = f"EOF_{uuid.uuid4().hex}"
150+
print(f"{key}<<{delim}", file=stream)
151+
print(text, file=stream)
152+
print(delim, file=stream)
153+
else:
154+
print(f"{key}={text}", file=stream)
155+
156+
157+
@contextlib.contextmanager
158+
def _output_stream():
159+
"""Write to $GITHUB_OUTPUT when set, else stdout."""
160+
path = os.environ.get("GITHUB_OUTPUT")
161+
if path:
162+
with open(path, "a", encoding="utf-8") as stream:
163+
yield stream
164+
else:
165+
yield sys.stdout
166+
167+
168+
def cmd_lookup(sha: str, repo: str) -> dict[str, str]:
169+
"""Look up the merged PR for a commit and return the output keys."""
170+
pr: dict | None = None
171+
172+
number = _pr_number_from_subject(repo, sha)
173+
if number is not None:
174+
print(f"Parsed PR #{number} from commit subject", file=sys.stderr)
175+
pr = _fetch_pr(repo, number, sha)
176+
if pr is None:
177+
print(
178+
f"PR #{number} did not merge {sha[:7]} — falling back",
179+
file=sys.stderr,
180+
)
181+
182+
if pr is None:
183+
print("Falling back to commit-to-PR lookup", file=sys.stderr)
184+
pr = _fetch_pr_for_sha(repo, sha)
185+
186+
if pr is None:
187+
print("No PR found for commit — skipping", file=sys.stderr)
188+
return {}
189+
190+
has_changelog = _changelog_label(pr)
191+
breaking = has_changelog if has_changelog == "changelog - breaking" else ""
192+
193+
return {
194+
"number": str(pr["number"]),
195+
"entry": _format_entry(pr),
196+
"has_changelog": has_changelog,
197+
"breaking": breaking,
198+
}
199+
200+
201+
def main() -> int:
202+
parser = argparse.ArgumentParser(
203+
description="Find the merged PR for a commit on main"
204+
)
205+
sub = parser.add_subparsers(dest="command")
206+
207+
lookup = sub.add_parser("lookup", help="Look up the merged PR for a commit")
208+
lookup.add_argument("--sha", required=True, help="Commit SHA on main")
209+
lookup.add_argument("--repo", required=True, help="owner/name")
210+
211+
args = parser.parse_args()
212+
if args.command is None:
213+
parser.print_help()
214+
return 1
215+
216+
try:
217+
outputs = cmd_lookup(args.sha, args.repo)
218+
except Exception as exc:
219+
print(f"Unexpected error: {exc}", file=sys.stderr)
220+
outputs = {}
221+
222+
with _output_stream() as stream:
223+
for key, value in outputs.items():
224+
_emit(key, value, stream=stream)
225+
return 0
226+
227+
228+
if __name__ == "__main__":
229+
raise SystemExit(main())

.github/workflows/pr-merge-changelog.yml

Lines changed: 10 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,70 +14,36 @@ jobs:
1414
update:
1515
runs-on: ubuntu-latest
1616
steps:
17+
- name: Check out main
18+
uses: actions/checkout@v4
19+
1720
- name: Find merged PR
1821
id: pr
1922
env:
2023
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2124
COMMIT_SHA: ${{ github.sha }}
2225
REPO: ${{ github.repository }}
2326
run: |
24-
# Retry loop for GitHub API eventual consistency.
25-
for attempt in 1 2 3; do
26-
PR_JSON=$(gh api "repos/${REPO}/commits/${COMMIT_SHA}/pulls" \
27-
--jq '.[0] // empty' 2>/dev/null || echo "")
28-
if [ -n "$PR_JSON" ]; then
29-
break
30-
fi
31-
echo "Attempt ${attempt}: no PR found, retrying in 10s..."
32-
sleep 10
33-
done
34-
35-
if [ -z "$PR_JSON" ]; then
36-
echo "No PR found for commit — skipping"
37-
echo "skip=true" >> "$GITHUB_OUTPUT"
38-
exit 0
39-
fi
40-
41-
NUMBER=$(echo "$PR_JSON" | jq -r '.number')
42-
TITLE=$(echo "$PR_JSON" | jq -r '.title')
43-
URL=$(echo "$PR_JSON" | jq -r '.html_url')
44-
45-
HAS_CHANGELOG=$(echo "$PR_JSON" | jq -r \
46-
'[.labels[].name] | map(select(startswith("changelog - "))) | first // empty')
47-
BREAKING=$(echo "$PR_JSON" | jq -r \
48-
'[.labels[].name] | map(select(. == "changelog - breaking")) | first // empty')
49-
50-
echo "number=$NUMBER" >> "$GITHUB_OUTPUT"
51-
echo "title=$TITLE" >> "$GITHUB_OUTPUT"
52-
echo "url=$URL" >> "$GITHUB_OUTPUT"
53-
echo "has_changelog=$HAS_CHANGELOG" >> "$GITHUB_OUTPUT"
54-
echo "breaking=$BREAKING" >> "$GITHUB_OUTPUT"
55-
echo "skip=false" >> "$GITHUB_OUTPUT"
56-
57-
- name: Check out main
58-
if: steps.pr.outputs.skip != 'true' && steps.pr.outputs.has_changelog != ''
59-
uses: actions/checkout@v4
27+
python3 .ci-scripts/find_merged_pr.py lookup \
28+
--sha "$COMMIT_SHA" --repo "$REPO"
6029
6130
- name: Set up Python
62-
if: steps.pr.outputs.skip != 'true' && steps.pr.outputs.has_changelog != ''
31+
if: steps.pr.outputs.has_changelog != ''
6332
uses: actions/setup-python@v5
6433
with:
6534
python-version: "3.x"
6635

6736
- name: Add changelog entry
6837
if: steps.pr.outputs.has_changelog != ''
6938
env:
70-
TITLE: ${{ steps.pr.outputs.title }}
71-
NUMBER: ${{ steps.pr.outputs.number }}
72-
URL: ${{ steps.pr.outputs.url }}
39+
ENTRY: ${{ steps.pr.outputs.entry }}
7340
BREAKING: ${{ steps.pr.outputs.breaking }}
7441
run: |
75-
ENTRY="${TITLE} ([PR #${NUMBER}](${URL}))"
76-
ARGS=""
7742
if [ -n "$BREAKING" ]; then
78-
ARGS="--breaking"
43+
python3 .ci-scripts/changelog.py add-entry --breaking "$ENTRY"
44+
else
45+
python3 .ci-scripts/changelog.py add-entry "$ENTRY"
7946
fi
80-
python3 .ci-scripts/changelog.py add-entry $ARGS "$ENTRY"
8147
8248
- name: Update skill versions
8349
if: steps.pr.outputs.has_changelog != ''

0 commit comments

Comments
 (0)