@@ -22,10 +22,14 @@ class ExtractUpstreamRepositoryInput(BaseModel):
2222class UpstreamRepository (BaseModel ):
2323 """Represents an upstream git repository and commit information."""
2424 repo_url : str = Field (description = "Git clone URL of the upstream repository" )
25- commit_hash : str = Field (description = "Commit hash to cherry-pick" )
25+ commit_hash : str = Field (description = "Commit hash to cherry-pick (for single commits) or target ref (for compare URLs) " )
2626 original_url : str = Field (description = "Original upstream fix URL" )
2727 pr_number : str | None = Field (default = None , description = "Pull request or merge request number if this is a PR/MR URL, None otherwise" )
2828 is_pr : bool = Field (default = False , description = "True if this is a pull request or merge request URL" )
29+ is_compare : bool = Field (default = False , description = "True if this is a compare/diff URL between two refs" )
30+ base_ref : str | None = Field (default = None , description = "Base reference for compare URLs (e.g., v3.7.0)" )
31+ target_ref : str | None = Field (default = None , description = "Target reference for compare URLs (e.g., v3.7.1)" )
32+ compare_commits : list [str ] | None = Field (default = None , description = "List of commit hashes in the compare range (ordered oldest to newest)" )
2933
3034
3135class ExtractUpstreamRepositoryOutput (JSONToolOutput [UpstreamRepository ]):
@@ -35,15 +39,17 @@ class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
3539class ExtractUpstreamRepositoryTool (Tool [ExtractUpstreamRepositoryInput , ToolRunOptions , ExtractUpstreamRepositoryOutput ]):
3640 name = "extract_upstream_repository"
3741 description = """
38- Extract upstream repository URL and commit hash from a commit or pull request URL.
42+ Extract upstream repository URL and commit information from a commit, pull request, or compare URL.
3943
4044 Supports common formats:
4145 - GitHub/GitLab commit: https://domain.com/owner/repo/commit/hash or /-/commit/hash
4246 - GitHub/GitLab PR: https://domain.com/owner/repo/pull/123 or /merge_requests/123
47+ - GitHub/GitLab compare: https://domain.com/owner/repo/compare/ref1...ref2 or /-/compare/ref1...ref2
4348 - Query param formats: ?id=hash or ?h=hash (for cgit/gitweb)
4449
4550 For pull requests, fetches the head commit SHA from the PR.
46- Returns the git clone URL and commit hash needed for cherry-picking.
51+ For compare URLs, fetches all commits in the range and returns them ordered oldest to newest.
52+ Returns the git clone URL and commit information needed for cherry-picking.
4753 """
4854 input_schema = ExtractUpstreamRepositoryInput
4955
@@ -59,7 +65,7 @@ async def _run(
5965 try :
6066 parsed = urlparse (tool_input .upstream_fix_url )
6167
62- # Check if this is a pull request URL and extract owner/repo/PR number in one match
68+ # Check if this is a pull request URL and extract owner/repo/PR number in one match.
6369 pr_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/pull/(\d+)(?:\.patch)?' , parsed .path )
6470 mr_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/-/merge_requests/(\d+)(?:\.patch)?' , parsed .path )
6571
@@ -117,49 +123,132 @@ async def _run(
117123 )
118124 )
119125
120- else :
121- # Handle regular commit URLs
122- commit_hash = None
123- repo_path = None
124-
125- # Pattern 1: /commit/hash or /-/commit/hash in the path (capture repo path and commit hash together)
126- commit_match = re .search (r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)' , parsed .path )
127- if commit_match :
128- repo_path = commit_match .group (1 ).strip ('/' )
129- commit_hash = commit_match .group (2 )
130-
131- # Pattern 2: query parameters (?id=hash or &h=hash for cgit/gitweb, ?p=repo for repo path)
132- if not commit_hash and parsed .query :
133- query_match = re .search (r'(?:id|h)=([a-f0-9]{7,40})' , parsed .query )
134- if query_match :
135- commit_hash = query_match .group (1 )
136- # Extract repo from ?p= parameter
137- repo_query_match = re .search (r'[?&]p=([^;&]+)' , parsed .query )
138- if repo_query_match :
139- repo_path = repo_query_match .group (1 )
140-
141- if not commit_hash :
142- raise ToolError (f"Could not extract commit hash from URL: { tool_input .upstream_fix_url } " )
143-
144- if not repo_path :
145- raise ToolError (f"Could not extract repository path from URL: { tool_input .upstream_fix_url } " )
126+ # Try to match compare URL
127+ compare_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/(?:-/)?compare/(.+?)(\.{2,3})([^\s\?#]+)' , parsed .path )
128+ if compare_match :
129+ # Handle GitHub/GitLab Compare URLs
130+ owner = compare_match .group (1 )
131+ repo = compare_match .group (2 )
132+ base_ref = compare_match .group (3 )
133+ # Group 4 is the separator (.. or ...) - not used, we always use ... for APIs
134+ target_ref = compare_match .group (5 ).rstrip ('.patch' ) # Remove .patch if present
135+
136+ # Construct repository URL
137+ repo_url = f"https://{ parsed .netloc } /{ owner } /{ repo } .git"
138+
139+ # Determine if this is GitHub or GitLab based on the URL pattern
140+ is_github = '/-/' not in parsed .path
141+
142+ # Fetch compare information to get the list of commits
143+ if is_github :
144+ # GitHub API - note: GitHub uses ... for compare
145+ api_url = f"https://api.github.com/repos/{ owner } /{ repo } /compare/{ base_ref } ...{ target_ref } "
146+ else :
147+ # GitLab API
148+ api_url = f"https://{ parsed .netloc } /api/v4/projects/{ owner } %2F{ repo } /repository/compare?from={ base_ref } &to={ target_ref } "
149+
150+ headers = {
151+ 'Accept' : 'application/json' ,
152+ 'User-Agent' : 'RHEL-Backport-Agent'
153+ }
154+
155+ try :
156+ async with aiohttp .ClientSession () as session :
157+ async with session .get (api_url , headers = headers , timeout = aiohttp .ClientTimeout (total = 15 )) as response :
158+ response .raise_for_status ()
159+ data = await response .json ()
160+
161+ # Extract commits from API response
162+ if is_github :
163+ # GitHub: commits are in 'commits' array (oldest first)
164+ commits = [commit ['sha' ] for commit in data .get ('commits' , [])]
165+ else :
166+ # GitLab: commits are in 'commits' array (newest first)
167+ commits = [commit ['id' ] for commit in data .get ('commits' , [])]
168+ # Reverse to get oldest first
169+ commits = list (reversed (commits ))
170+
171+ # Use the last commit (newest) as the default commit_hash
172+ commit_hash = commits [- 1 ] if commits else target_ref
173+
174+ except (aiohttp .ClientError , KeyError ) as e :
175+ # If API fails, fall back to using target_ref as commit_hash
176+ # This allows the tool to still work even if API is unavailable
177+ commit_hash = target_ref
178+ commits = []
179+
180+ # Return with compare information
181+ return ExtractUpstreamRepositoryOutput (
182+ result = UpstreamRepository (
183+ repo_url = repo_url ,
184+ commit_hash = commit_hash ,
185+ original_url = tool_input .upstream_fix_url ,
186+ pr_number = None ,
187+ is_pr = False ,
188+ is_compare = True ,
189+ base_ref = base_ref ,
190+ target_ref = target_ref ,
191+ compare_commits = commits if commits else None
192+ )
193+ )
146194
195+ # Try to match regular commit URL
196+ commit_match = re .search (r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)' , parsed .path )
197+ if commit_match :
198+ repo_path = commit_match .group (1 ).strip ('/' )
199+ commit_hash = commit_match .group (2 )
200+
147201 # Construct clone URL
148202 scheme = parsed .scheme or 'https'
149203 repo_url = f"{ scheme } ://{ parsed .netloc } /{ repo_path } "
150204 if not repo_url .endswith ('.git' ):
151205 repo_url += '.git'
152206
153- # Return for non-PR commits
154- return ExtractUpstreamRepositoryOutput (
155- result = UpstreamRepository (
156- repo_url = repo_url ,
157- commit_hash = commit_hash ,
158- original_url = tool_input .upstream_fix_url ,
159- pr_number = None ,
160- is_pr = False
207+ # Return for regular commit URLs
208+ return ExtractUpstreamRepositoryOutput (
209+ result = UpstreamRepository (
210+ repo_url = repo_url ,
211+ commit_hash = commit_hash ,
212+ original_url = tool_input .upstream_fix_url ,
213+ pr_number = None ,
214+ is_pr = False
215+ )
161216 )
162- )
217+
218+ # Try to match query parameter format (fourth priority - cgit/gitweb)
219+ if parsed .query :
220+ query_match = re .search (r'(?:id|h)=([a-f0-9]{7,40})' , parsed .query )
221+ if query_match :
222+ commit_hash = query_match .group (1 )
223+
224+ # Extract repo from ?p= parameter
225+ repo_path = None
226+ repo_query_match = re .search (r'[?&]p=([^;&]+)' , parsed .query )
227+ if repo_query_match :
228+ repo_path = repo_query_match .group (1 )
229+
230+ if not repo_path :
231+ raise ToolError (f"Could not extract repository path from URL: { tool_input .upstream_fix_url } " )
232+
233+ # Construct clone URL
234+ scheme = parsed .scheme or 'https'
235+ repo_url = f"{ scheme } ://{ parsed .netloc } /{ repo_path } "
236+ if not repo_url .endswith ('.git' ):
237+ repo_url += '.git'
238+
239+ # Return for query parameter URLs
240+ return ExtractUpstreamRepositoryOutput (
241+ result = UpstreamRepository (
242+ repo_url = repo_url ,
243+ commit_hash = commit_hash ,
244+ original_url = tool_input .upstream_fix_url ,
245+ pr_number = None ,
246+ is_pr = False
247+ )
248+ )
249+
250+ # If we got here, we couldn't match any pattern
251+ raise ToolError (f"Could not extract commit hash from URL: { tool_input .upstream_fix_url } " )
163252
164253 except ToolError :
165254 raise
0 commit comments