@@ -80,6 +80,7 @@ def check_preflight():
8080# Constants
8181PR_CACHE = {} # Cache for PR details to speed up multiple rounds referencing same PRs
8282NAME_TO_LOGIN = {} # Map full names to GitHub logins for consolidation
83+ VERIFIED_LOGINS = set () # Track IDs known to be valid GitHub logins (vs free-form names)
8384
8485# Bots to exclude from contributor lists
8586BOT_NAMES = {
@@ -160,6 +161,7 @@ def extract_authors_from_pr(details):
160161 if details .get ("author" ):
161162 pr_login = details ["author" ]["login" ]
162163 authors .add (pr_login )
164+ VERIFIED_LOGINS .add (pr_login .lower ())
163165
164166 # Add authors from all commits in the PR
165167 if "commits" in details :
@@ -169,6 +171,7 @@ def extract_authors_from_pr(details):
169171 name = author_info .get ("name" )
170172 if login :
171173 authors .add (login )
174+ VERIFIED_LOGINS .add (login .lower ())
172175 if name :
173176 NAME_TO_LOGIN [name ] = login
174177 elif name :
@@ -285,9 +288,7 @@ def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100):
285288 for op_num in set (all_extracted_nums ):
286289 if op_num == current_pr_int :
287290 continue
288- # Only accept reasonably recent PRs to avoid noise
289- if abs (op_num - current_pr_int ) < 5000 :
290- valid_pr_ints .append (op_num )
291+ valid_pr_ints .append (op_num )
291292
292293 # Sorting results numerically (100 > 99)
293294 original_pr_ints = sorted (valid_pr_ints )
@@ -311,12 +312,22 @@ def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100):
311312 "cherry_pick_pr" : pr_num_str ,
312313 }
313314 else :
314- # If we can't resolve this number as a PR, do not fabricate an entry.
315- # It may be an issue reference or an inaccessible/deleted PR .
315+ # If we can't resolve this number as a PR (e.g., issue reference or inaccessible/deleted PR),
316+ # do not invent new authors, but still attribute it to the known meta-PR to avoid losing credit .
316317 log_event (
317- f" - Warning: Unable to resolve PR #{ op_num_str } via GitHub CLI; skipping ." ,
318+ f" - Warning: Unable to resolve PR #{ op_num_str } via GitHub CLI; attributing via meta-PR # { pr_num_str } ." ,
318319 log_file ,
319320 )
321+ if op_num_str not in all_prs :
322+ fallback_title = (
323+ f"Unresolved sub-PR #{ op_num_str } (attributed via meta-PR #{ pr_num_str } )"
324+ )
325+ all_prs [op_num_str ] = {
326+ "title" : fallback_title ,
327+ "authors" : list (extract_authors_from_pr (details )),
328+ "cherry_pick_commit" : commit_id ,
329+ "cherry_pick_pr" : pr_num_str ,
330+ }
320331 else :
321332 log_event (" - No sub-PRs found, treating meta-PR as a normal PR." , log_file )
322333 all_prs [pr_num_str ] = {
@@ -359,7 +370,7 @@ def main():
359370 parser .add_argument ("--base" , default = "origin/rel-1.23.2" , help = "Base branch/commit to compare from" )
360371 parser .add_argument ("--target" , default = "origin/rel-1.24.1" , help = "Target branch/commit to compare to" )
361372 parser .add_argument ("--dir" , default = "contributors" , help = "Output directory for reports and logs" )
362- parser .add_argument ("--scan-depth" , type = int , default = 100 , help = "Depth to scan base/meta-PRs for deduplication" )
373+ parser .add_argument ("--scan-depth" , type = int , default = 200 , help = "Depth to scan base/meta-PRs for deduplication" )
363374 args = parser .parse_args ()
364375
365376 # Early validation
@@ -449,15 +460,15 @@ def main():
449460 if not is_bot (final_author ) and not is_invalid (final_author ):
450461 consolidated_contributors [author_lower ] = consolidated_contributors .get (author_lower , 0 ) + count
451462
452- # Sort human contributors by count descending for summary
453- sorted_contributors = sorted (consolidated_contributors .items (), key = lambda x : x [1 ], reverse = True )
463+ # Sort human contributors by count descending, then alphabetically by identity for determinism
464+ sorted_contributors = sorted (consolidated_contributors .items (), key = lambda x : ( - x [1 ], x [ 0 ]) )
454465
455466 log_event ("\n --- Summary ---" , log_file )
456- # Prefix only identified github logins (no spaces) and format as markdown links
467+ # Prefix only identified github logins and format as markdown links
457468 output_users = []
458469 for login_lower , _login in sorted_contributors :
459470 u = display_names [login_lower ]
460- if " " not in u :
471+ if login_lower in VERIFIED_LOGINS :
461472 output_users .append (f"[@{ u } ](https://github.com/{ u } )" )
462473 else :
463474 output_users .append (u )
0 commit comments