1+ #!/usr/bin/env python
2+ """
3+ Script to list contributors since a specified tag and identify first-time contributors.
4+
5+ This script queries the git history to find all contributors since a given tag.
6+ It then checks if any of these contributors have made prior contributions
7+ (based on their email address) before that tag.
8+ """
9+
10+ import subprocess
11+ import sys
12+ import argparse
13+ from typing import List , Set , Tuple , Dict
14+
15+ def get_git_output (args : List [str ]) -> str :
16+ """Runs a git command and returns its output as a string.
17+
18+ Args:
19+ args: A list of strings representing the git command and its arguments.
20+
21+ Returns:
22+ The standard output of the command as a string, stripped of leading/trailing whitespace.
23+
24+ Raises:
25+ SystemExit: If the git command returns a non-zero exit code.
26+ """
27+ try :
28+ # Run git command and return output as string
29+ # using errors='replace' to avoid utf-8 decoding issues
30+ result = subprocess .check_output (args , stderr = subprocess .STDOUT )
31+ return result .decode ('utf-8' , errors = 'replace' ).strip ()
32+ except subprocess .CalledProcessError as e :
33+ print (f"Error running git command: { ' ' .join (args )} " )
34+ print (e .output .decode ('utf-8' , errors = 'replace' ))
35+ sys .exit (1 )
36+
37+ def get_contributors_since (tag : str ) -> Set [Tuple [str , str ]]:
38+ """Retrieves a set of contributors who have committed since the specified tag.
39+
40+ Args:
41+ tag: The git tag to compare against (e.g., '4.7.0').
42+
43+ Returns:
44+ A set of tuples, where each tuple contains (name, email).
45+ """
46+ # Get all authors since the tag
47+ # Format: Name|Email
48+ cmd = ['git' , 'log' , f'{ tag } ..HEAD' , '--format=%aN|%aE' ]
49+ output = get_git_output (cmd )
50+
51+ contributors = set ()
52+ if output :
53+ for line in output .split ('\n ' ):
54+ if line .strip ():
55+ parts = line .split ('|' )
56+ if len (parts ) >= 2 :
57+ name = parts [0 ].strip ()
58+ email = parts [1 ].strip ()
59+ contributors .add ((name , email ))
60+ return contributors
61+
62+ def get_prior_emails (tag : str ) -> Set [str ]:
63+ """Retrieves a set of email addresses for all contributors prior to the specified tag.
64+
65+ Args:
66+ tag: The git tag to look back from.
67+
68+ Returns:
69+ A set of lowercased email address strings for all prior contributors.
70+ """
71+ # Get all author emails reachable from the tag
72+ print ("Gathering prior contributors (this may take a moment)..." )
73+ cmd = ['git' , 'log' , tag , '--format=%aE' ]
74+ output = get_git_output (cmd )
75+
76+ prior_emails = set ()
77+ if output :
78+ for line in output .split ('\n ' ):
79+ if line .strip ():
80+ # Store lowercase email for consistent comparison
81+ prior_emails .add (line .strip ().lower ())
82+ return prior_emails
83+
84+ def main () -> None :
85+ """Main function to parse arguments and print the contributor report."""
86+ parser = argparse .ArgumentParser (description = "List contributors since a specified tag and identify first-time contributors." )
87+ parser .add_argument ("tag" , help = "The git tag to start from (e.g., 4.7.0)" )
88+ args = parser .parse_args ()
89+
90+ # Verify tag exists
91+ try :
92+ subprocess .check_call (['git' , 'rev-parse' , args .tag ], stdout = subprocess .DEVNULL , stderr = subprocess .DEVNULL )
93+ except subprocess .CalledProcessError :
94+ print (f"Error: Tag '{ args .tag } ' not found." )
95+ sys .exit (1 )
96+
97+ # Get new contributors
98+ recent_contributors = get_contributors_since (args .tag )
99+ if not recent_contributors :
100+ print (f"No contributors found since { args .tag } ." )
101+ return
102+
103+ # Get all prior emails
104+ prior_emails = get_prior_emails (args .tag )
105+
106+ # Prepare data for display with deduplication
107+ # Map: display_name -> is_new (boolean)
108+ contributor_status : Dict [str , bool ] = {}
109+
110+ for name , email in recent_contributors :
111+ display_name = name if name else email
112+ is_new_email = email .lower () not in prior_emails
113+
114+ if display_name not in contributor_status :
115+ contributor_status [display_name ] = is_new_email
116+ else :
117+ # If the contributor was previously marked as new, but this email
118+ # is NOT new, then the contributor is not new.
119+ # If they were already marked as not new, they stay not new.
120+ if contributor_status [display_name ] and not is_new_email :
121+ contributor_status [display_name ] = False
122+
123+ # Convert to list for sorting and display
124+ display_list = [(name , is_new ) for name , is_new in contributor_status .items ()]
125+
126+ # Sort by display name (case insensitive)
127+ display_list .sort (key = lambda x : x [0 ].lower ())
128+
129+ # Calculate max length for alignment
130+ if display_list :
131+ max_length = max (len (x [0 ]) for x in display_list )
132+ else :
133+ max_length = 0
134+
135+ print (f"\n Contributors since { args .tag } :" )
136+ print ("-" * 40 )
137+
138+ for display_name , is_new in display_list :
139+ # Align left with padding
140+ if is_new :
141+ print (f"{ display_name :<{max_length }} Made their first contribution" )
142+ else :
143+ print (f"{ display_name } " )
144+
145+ if __name__ == "__main__" :
146+ main ()
0 commit comments