Skip to content

Commit 2dd7aef

Browse files
committed
deterministic
1 parent d02b8db commit 2dd7aef

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

tools/python/compile_contributors.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def check_preflight():
8080
# Constants
8181
PR_CACHE = {} # Cache for PR details to speed up multiple rounds referencing same PRs
8282
NAME_TO_LOGIN = {} # Map full names to GitHub logins for consolidation
83+
VERIFIED_LOGINS = set() # Track IDs known to be valid GitHub logins (vs free-form names)
8384

8485
# Bots to exclude from contributor lists
8586
BOT_NAMES = {
@@ -160,6 +161,7 @@ def extract_authors_from_pr(details):
160161
if details.get("author"):
161162
pr_login = details["author"]["login"]
162163
authors.add(pr_login)
164+
VERIFIED_LOGINS.add(pr_login.lower())
163165

164166
# Add authors from all commits in the PR
165167
if "commits" in details:
@@ -169,6 +171,7 @@ def extract_authors_from_pr(details):
169171
name = author_info.get("name")
170172
if login:
171173
authors.add(login)
174+
VERIFIED_LOGINS.add(login.lower())
172175
if name:
173176
NAME_TO_LOGIN[name] = login
174177
elif name:
@@ -285,9 +288,7 @@ def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100):
285288
for op_num in set(all_extracted_nums):
286289
if op_num == current_pr_int:
287290
continue
288-
# Only accept reasonably recent PRs to avoid noise
289-
if abs(op_num - current_pr_int) < 5000:
290-
valid_pr_ints.append(op_num)
291+
valid_pr_ints.append(op_num)
291292

292293
# Sorting results numerically (100 > 99)
293294
original_pr_ints = sorted(valid_pr_ints)
@@ -311,12 +312,22 @@ def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100):
311312
"cherry_pick_pr": pr_num_str,
312313
}
313314
else:
314-
# If we can't resolve this number as a PR, do not fabricate an entry.
315-
# It may be an issue reference or an inaccessible/deleted PR.
315+
# If we can't resolve this number as a PR (e.g., issue reference or inaccessible/deleted PR),
316+
# do not invent new authors, but still attribute it to the known meta-PR to avoid losing credit.
316317
log_event(
317-
f" - Warning: Unable to resolve PR #{op_num_str} via GitHub CLI; skipping.",
318+
f" - Warning: Unable to resolve PR #{op_num_str} via GitHub CLI; attributing via meta-PR #{pr_num_str}.",
318319
log_file,
319320
)
321+
if op_num_str not in all_prs:
322+
fallback_title = (
323+
f"Unresolved sub-PR #{op_num_str} (attributed via meta-PR #{pr_num_str})"
324+
)
325+
all_prs[op_num_str] = {
326+
"title": fallback_title,
327+
"authors": list(extract_authors_from_pr(details)),
328+
"cherry_pick_commit": commit_id,
329+
"cherry_pick_pr": pr_num_str,
330+
}
320331
else:
321332
log_event(" - No sub-PRs found, treating meta-PR as a normal PR.", log_file)
322333
all_prs[pr_num_str] = {
@@ -359,7 +370,7 @@ def main():
359370
parser.add_argument("--base", default="origin/rel-1.23.2", help="Base branch/commit to compare from")
360371
parser.add_argument("--target", default="origin/rel-1.24.1", help="Target branch/commit to compare to")
361372
parser.add_argument("--dir", default="contributors", help="Output directory for reports and logs")
362-
parser.add_argument("--scan-depth", type=int, default=100, help="Depth to scan base/meta-PRs for deduplication")
373+
parser.add_argument("--scan-depth", type=int, default=200, help="Depth to scan base/meta-PRs for deduplication")
363374
args = parser.parse_args()
364375

365376
# Early validation
@@ -449,15 +460,15 @@ def main():
449460
if not is_bot(final_author) and not is_invalid(final_author):
450461
consolidated_contributors[author_lower] = consolidated_contributors.get(author_lower, 0) + count
451462

452-
# Sort human contributors by count descending for summary
453-
sorted_contributors = sorted(consolidated_contributors.items(), key=lambda x: x[1], reverse=True)
463+
# Sort human contributors by count descending, then alphabetically by identity for determinism
464+
sorted_contributors = sorted(consolidated_contributors.items(), key=lambda x: (-x[1], x[0]))
454465

455466
log_event("\n--- Summary ---", log_file)
456-
# Prefix only identified github logins (no spaces) and format as markdown links
467+
# Prefix only identified github logins and format as markdown links
457468
output_users = []
458469
for login_lower, _login in sorted_contributors:
459470
u = display_names[login_lower]
460-
if " " not in u:
471+
if login_lower in VERIFIED_LOGINS:
461472
output_users.append(f"[@{u}](https://github.com/{u})")
462473
else:
463474
output_users.append(u)

0 commit comments

Comments
 (0)