22
33import re
44from pathlib import Path
5- from urllib .parse import urlparse
5+ from urllib .parse import urlparse , quote
66
77import aiohttp
88from pydantic import BaseModel , Field
@@ -22,10 +22,14 @@ class ExtractUpstreamRepositoryInput(BaseModel):
2222class UpstreamRepository (BaseModel ):
2323 """Represents an upstream git repository and commit information."""
2424 repo_url : str = Field (description = "Git clone URL of the upstream repository" )
25- commit_hash : str = Field (description = "Commit hash to cherry-pick" )
25+ commit_hash : str = Field (description = "Commit hash to cherry-pick (for single commits) or target ref (for compare URLs) " )
2626 original_url : str = Field (description = "Original upstream fix URL" )
2727 pr_number : str | None = Field (default = None , description = "Pull request or merge request number if this is a PR/MR URL, None otherwise" )
2828 is_pr : bool = Field (default = False , description = "True if this is a pull request or merge request URL" )
29+ is_compare : bool = Field (default = False , description = "True if this is a compare/diff URL between two refs" )
30+ base_ref : str | None = Field (default = None , description = "Base reference for compare URLs (e.g., v3.7.0)" )
31+ target_ref : str | None = Field (default = None , description = "Target reference for compare URLs (e.g., v3.7.1)" )
32+ compare_commits : list [str ] | None = Field (default = None , description = "List of commit hashes in the compare range (ordered oldest to newest)" )
2933
3034
3135class ExtractUpstreamRepositoryOutput (JSONToolOutput [UpstreamRepository ]):
@@ -35,15 +39,17 @@ class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
3539class ExtractUpstreamRepositoryTool (Tool [ExtractUpstreamRepositoryInput , ToolRunOptions , ExtractUpstreamRepositoryOutput ]):
3640 name = "extract_upstream_repository"
3741 description = """
38- Extract upstream repository URL and commit hash from a commit or pull request URL.
42+ Extract upstream repository URL and commit information from a commit, pull request, or compare URL.
3943
4044 Supports common formats:
4145 - GitHub/GitLab commit: https://domain.com/owner/repo/commit/hash or /-/commit/hash
4246 - GitHub/GitLab PR: https://domain.com/owner/repo/pull/123 or /merge_requests/123
47+ - GitHub/GitLab compare: https://domain.com/owner/repo/compare/ref1...ref2 or /-/compare/ref1...ref2
4348 - Query param formats: ?id=hash or ?h=hash (for cgit/gitweb)
4449
4550 For pull requests, fetches the head commit SHA from the PR.
46- Returns the git clone URL and commit hash needed for cherry-picking.
51+ For compare URLs, fetches all commits in the range and returns them ordered oldest to newest.
52+ Returns the git clone URL and commit information needed for cherry-picking.
4753 """
4854 input_schema = ExtractUpstreamRepositoryInput
4955
@@ -59,7 +65,7 @@ async def _run(
5965 try :
6066 parsed = urlparse (tool_input .upstream_fix_url )
6167
62- # Check if this is a pull request URL and extract owner/repo/PR number in one match
68+ # Check if this is a pull request URL and extract owner/repo/PR number in one match.
6369 pr_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/pull/(\d+)(?:\.patch)?' , parsed .path )
6470 mr_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/-/merge_requests/(\d+)(?:\.patch)?' , parsed .path )
6571
@@ -117,50 +123,103 @@ async def _run(
117123 )
118124 )
119125
120- else :
126+ # Try to match compare URL
127+ compare_match = re .search (r'/([\w\-\.]+)/([\w\-\.]+)/(?:-/)?compare/(.+?)(\.{2,3})([^\s\?#]+)' , parsed .path )
128+ if compare_match :
129+ # Handle GitHub/GitLab Compare URLs
130+ owner = compare_match .group (1 )
131+ repo = compare_match .group (2 )
132+ base_ref = compare_match .group (3 )
133+ # Group 4 is the separator (.. or ...) - not used, we always use ... for APIs
134+ target_ref = compare_match .group (5 ).rstrip ('.patch' ) # Remove .patch if present
135+ # Construct repository URL
136+ repo_url = f"https://{ parsed .netloc } /{ owner } /{ repo } .git"
137+ # Fetch compare information to get the list of commits
138+ headers = {
139+ 'Accept' : 'application/json' ,
140+ 'User-Agent' : 'RHEL-Backport-Agent'
141+ }
142+ commits = []
143+ commit_hash = target_ref
144+ try :
145+ async with aiohttp .ClientSession () as session :
146+ # Determine if this is GitHub or GitLab based on the URL pattern
147+ if '/-/' not in parsed .path :
148+ # GitHub API - URL-encode refs to handle special characters like / in branch names
149+ api_url = f"https://api.github.com/repos/{ owner } /{ repo } /compare/{ quote (base_ref , safe = '' )} ...{ quote (target_ref , safe = '' )} "
150+ async with session .get (api_url , headers = headers , timeout = aiohttp .ClientTimeout (total = 15 )) as response :
151+ response .raise_for_status ()
152+ data = await response .json ()
153+ # GitHub: commits are in 'commits' array (oldest first)
154+ commits = [commit ['sha' ] for commit in data .get ('commits' , [])]
155+ else :
156+ # GitLab API - use params dict for automatic URL encoding
157+ api_url = f"https://{ parsed .netloc } /api/v4/projects/{ owner } %2F{ repo } /repository/compare"
158+ params = {'from' : base_ref , 'to' : target_ref }
159+ async with session .get (api_url , params = params , headers = headers , timeout = aiohttp .ClientTimeout (total = 15 )) as response :
160+ response .raise_for_status ()
161+ data = await response .json ()
162+ # GitLab: commits are in 'commits' array (newest first)
163+ commits = [commit ['id' ] for commit in data .get ('commits' , [])]
164+ # Reverse to get oldest first
165+ commits = list (reversed (commits ))
166+ # Use the last commit (newest) as the commit_hash
167+ commit_hash = commits [- 1 ] if commits else target_ref
168+ except (aiohttp .ClientError , KeyError ) as e :
169+ # If API fails, fall back to using target_ref as commit_hash
170+ # This allows the tool to still work even if API is unavailable
171+ commit_hash = target_ref
172+ commits = []
173+ # Return with compare information
174+ return ExtractUpstreamRepositoryOutput (
175+ result = UpstreamRepository (
176+ repo_url = repo_url ,
177+ commit_hash = commit_hash ,
178+ original_url = tool_input .upstream_fix_url ,
179+ pr_number = None ,
180+ is_pr = False ,
181+ is_compare = True ,
182+ base_ref = base_ref ,
183+ target_ref = target_ref ,
184+ compare_commits = commits if commits else None
185+ )
186+ )
187+ # Try to match regular commit URL or query parameter format
188+ repo_path = None
189+ commit_hash = None
190+ commit_match = re .search (r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)' , parsed .path )
191+ if commit_match :
121192 # Handle regular commit URLs
122- commit_hash = None
123- repo_path = None
124-
125- # Pattern 1: /commit/hash or /-/commit/hash in the path (capture repo path and commit hash together)
126- commit_match = re .search (r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)' , parsed .path )
127- if commit_match :
128- repo_path = commit_match .group (1 ).strip ('/' )
129- commit_hash = commit_match .group (2 )
130-
131- # Pattern 2: query parameters (?id=hash or &h=hash for cgit/gitweb, ?p=repo for repo path)
132- if not commit_hash and parsed .query :
133- query_match = re .search (r'(?:id|h)=([a-f0-9]{7,40})' , parsed .query )
134- if query_match :
135- commit_hash = query_match .group (1 )
136- # Extract repo from ?p= parameter
137- repo_query_match = re .search (r'[?&]p=([^;&]+)' , parsed .query )
138- if repo_query_match :
139- repo_path = repo_query_match .group (1 )
140-
141- if not commit_hash :
142- raise ToolError (f"Could not extract commit hash from URL: { tool_input .upstream_fix_url } " )
143-
193+ repo_path = commit_match .group (1 ).strip ('/' )
194+ commit_hash = commit_match .group (2 )
195+ elif parsed .query :
196+ # Handle query parameter format (cgit/gitweb)
197+ query_match = re .search (r'(?:id|h)=([a-f0-9]{7,40})' , parsed .query )
198+ if query_match :
199+ commit_hash = query_match .group (1 )
200+ repo_query_match = re .search (r'[?&]p=([^;&]+)' , parsed .query )
201+ if repo_query_match :
202+ repo_path = repo_query_match .group (1 )
203+ if commit_hash :
144204 if not repo_path :
145205 raise ToolError (f"Could not extract repository path from URL: { tool_input .upstream_fix_url } " )
146-
147206 # Construct clone URL
148207 scheme = parsed .scheme or 'https'
149208 repo_url = f"{ scheme } ://{ parsed .netloc } /{ repo_path } "
150209 if not repo_url .endswith ('.git' ):
151210 repo_url += '.git'
152-
153- # Return for non-PR commits
154- return ExtractUpstreamRepositoryOutput (
155- result = UpstreamRepository (
156- repo_url = repo_url ,
157- commit_hash = commit_hash ,
158- original_url = tool_input . upstream_fix_url ,
159- pr_number = None ,
160- is_pr = False
211+ # Return for non-PR/non-compare URLs
212+ return ExtractUpstreamRepositoryOutput (
213+ result = UpstreamRepository (
214+ repo_url = repo_url ,
215+ commit_hash = commit_hash ,
216+ original_url = tool_input . upstream_fix_url ,
217+ pr_number = None ,
218+ is_pr = False
219+ )
161220 )
162- )
163-
221+ # If we got here, we couldn't match any pattern
222+ raise ToolError ( f"Could not extract commit hash from URL: { tool_input . upstream_fix_url } " )
164223 except ToolError :
165224 raise
166225 except Exception as e :
0 commit comments