Skip to content

Commit cdd4654

Browse files
committed
Support compare URLs in extract_upstream_repository
Fetch commit lists from compare URLs for cherry-pick workflow.
1 parent b296d5b commit cdd4654

File tree

1 file changed

+128
-39
lines changed

1 file changed

+128
-39
lines changed

agents/tools/upstream_tools.py

Lines changed: 128 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,14 @@ class ExtractUpstreamRepositoryInput(BaseModel):
2222
class UpstreamRepository(BaseModel):
2323
"""Represents an upstream git repository and commit information."""
2424
repo_url: str = Field(description="Git clone URL of the upstream repository")
25-
commit_hash: str = Field(description="Commit hash to cherry-pick")
25+
commit_hash: str = Field(description="Commit hash to cherry-pick (for single commits) or target ref (for compare URLs)")
2626
original_url: str = Field(description="Original upstream fix URL")
2727
pr_number: str | None = Field(default=None, description="Pull request or merge request number if this is a PR/MR URL, None otherwise")
2828
is_pr: bool = Field(default=False, description="True if this is a pull request or merge request URL")
29+
is_compare: bool = Field(default=False, description="True if this is a compare/diff URL between two refs")
30+
base_ref: str | None = Field(default=None, description="Base reference for compare URLs (e.g., v3.7.0)")
31+
target_ref: str | None = Field(default=None, description="Target reference for compare URLs (e.g., v3.7.1)")
32+
compare_commits: list[str] | None = Field(default=None, description="List of commit hashes in the compare range (ordered oldest to newest)")
2933

3034

3135
class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
@@ -35,15 +39,17 @@ class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
3539
class ExtractUpstreamRepositoryTool(Tool[ExtractUpstreamRepositoryInput, ToolRunOptions, ExtractUpstreamRepositoryOutput]):
3640
name = "extract_upstream_repository"
3741
description = """
38-
Extract upstream repository URL and commit hash from a commit or pull request URL.
42+
Extract upstream repository URL and commit information from a commit, pull request, or compare URL.
3943
4044
Supports common formats:
4145
- GitHub/GitLab commit: https://domain.com/owner/repo/commit/hash or /-/commit/hash
4246
- GitHub/GitLab PR: https://domain.com/owner/repo/pull/123 or /merge_requests/123
47+
- GitHub/GitLab compare: https://domain.com/owner/repo/compare/ref1...ref2 or /-/compare/ref1...ref2
4348
- Query param formats: ?id=hash or ?h=hash (for cgit/gitweb)
4449
4550
For pull requests, fetches the head commit SHA from the PR.
46-
Returns the git clone URL and commit hash needed for cherry-picking.
51+
For compare URLs, fetches all commits in the range and returns them ordered oldest to newest.
52+
Returns the git clone URL and commit information needed for cherry-picking.
4753
"""
4854
input_schema = ExtractUpstreamRepositoryInput
4955

@@ -59,7 +65,7 @@ async def _run(
5965
try:
6066
parsed = urlparse(tool_input.upstream_fix_url)
6167

62-
# Check if this is a pull request URL and extract owner/repo/PR number in one match
68+
# Check if this is a pull request URL and extract owner/repo/PR number in one match.
6369
pr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/pull/(\d+)(?:\.patch)?', parsed.path)
6470
mr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/-/merge_requests/(\d+)(?:\.patch)?', parsed.path)
6571

@@ -117,49 +123,132 @@ async def _run(
117123
)
118124
)
119125

120-
else:
121-
# Handle regular commit URLs
122-
commit_hash = None
123-
repo_path = None
124-
125-
# Pattern 1: /commit/hash or /-/commit/hash in the path (capture repo path and commit hash together)
126-
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
127-
if commit_match:
128-
repo_path = commit_match.group(1).strip('/')
129-
commit_hash = commit_match.group(2)
130-
131-
# Pattern 2: query parameters (?id=hash or &h=hash for cgit/gitweb, ?p=repo for repo path)
132-
if not commit_hash and parsed.query:
133-
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
134-
if query_match:
135-
commit_hash = query_match.group(1)
136-
# Extract repo from ?p= parameter
137-
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
138-
if repo_query_match:
139-
repo_path = repo_query_match.group(1)
140-
141-
if not commit_hash:
142-
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
143-
144-
if not repo_path:
145-
raise ToolError(f"Could not extract repository path from URL: {tool_input.upstream_fix_url}")
126+
# Try to match compare URL
127+
compare_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/(?:-/)?compare/(.+?)(\.{2,3})([^\s\?#]+)', parsed.path)
128+
if compare_match:
129+
# Handle GitHub/GitLab Compare URLs
130+
owner = compare_match.group(1)
131+
repo = compare_match.group(2)
132+
base_ref = compare_match.group(3)
133+
# Group 4 is the separator (.. or ...) - not used, we always use ... for APIs
134+
target_ref = compare_match.group(5).rstrip('.patch') # Remove .patch if present
135+
136+
# Construct repository URL
137+
repo_url = f"https://{parsed.netloc}/{owner}/{repo}.git"
138+
139+
# Determine if this is GitHub or GitLab based on the URL pattern
140+
is_github = '/-/' not in parsed.path
141+
142+
# Fetch compare information to get the list of commits
143+
if is_github:
144+
# GitHub API - note: GitHub uses ... for compare
145+
api_url = f"https://api.github.com/repos/{owner}/{repo}/compare/{base_ref}...{target_ref}"
146+
else:
147+
# GitLab API
148+
api_url = f"https://{parsed.netloc}/api/v4/projects/{owner}%2F{repo}/repository/compare?from={base_ref}&to={target_ref}"
149+
150+
headers = {
151+
'Accept': 'application/json',
152+
'User-Agent': 'RHEL-Backport-Agent'
153+
}
154+
155+
try:
156+
async with aiohttp.ClientSession() as session:
157+
async with session.get(api_url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as response:
158+
response.raise_for_status()
159+
data = await response.json()
160+
161+
# Extract commits from API response
162+
if is_github:
163+
# GitHub: commits are in 'commits' array (oldest first)
164+
commits = [commit['sha'] for commit in data.get('commits', [])]
165+
else:
166+
# GitLab: commits are in 'commits' array (newest first)
167+
commits = [commit['id'] for commit in data.get('commits', [])]
168+
# Reverse to get oldest first
169+
commits = list(reversed(commits))
170+
171+
# Use the last commit (newest) as the default commit_hash
172+
commit_hash = commits[-1] if commits else target_ref
173+
174+
except (aiohttp.ClientError, KeyError) as e:
175+
# If API fails, fall back to using target_ref as commit_hash
176+
# This allows the tool to still work even if API is unavailable
177+
commit_hash = target_ref
178+
commits = []
179+
180+
# Return with compare information
181+
return ExtractUpstreamRepositoryOutput(
182+
result=UpstreamRepository(
183+
repo_url=repo_url,
184+
commit_hash=commit_hash,
185+
original_url=tool_input.upstream_fix_url,
186+
pr_number=None,
187+
is_pr=False,
188+
is_compare=True,
189+
base_ref=base_ref,
190+
target_ref=target_ref,
191+
compare_commits=commits if commits else None
192+
)
193+
)
146194

195+
# Try to match regular commit URL
196+
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
197+
if commit_match:
198+
repo_path = commit_match.group(1).strip('/')
199+
commit_hash = commit_match.group(2)
200+
147201
# Construct clone URL
148202
scheme = parsed.scheme or 'https'
149203
repo_url = f"{scheme}://{parsed.netloc}/{repo_path}"
150204
if not repo_url.endswith('.git'):
151205
repo_url += '.git'
152206

153-
# Return for non-PR commits
154-
return ExtractUpstreamRepositoryOutput(
155-
result=UpstreamRepository(
156-
repo_url=repo_url,
157-
commit_hash=commit_hash,
158-
original_url=tool_input.upstream_fix_url,
159-
pr_number=None,
160-
is_pr=False
207+
# Return for regular commit URLs
208+
return ExtractUpstreamRepositoryOutput(
209+
result=UpstreamRepository(
210+
repo_url=repo_url,
211+
commit_hash=commit_hash,
212+
original_url=tool_input.upstream_fix_url,
213+
pr_number=None,
214+
is_pr=False
215+
)
161216
)
162-
)
217+
218+
# Try to match query parameter format (fourth priority - cgit/gitweb)
219+
if parsed.query:
220+
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
221+
if query_match:
222+
commit_hash = query_match.group(1)
223+
224+
# Extract repo from ?p= parameter
225+
repo_path = None
226+
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
227+
if repo_query_match:
228+
repo_path = repo_query_match.group(1)
229+
230+
if not repo_path:
231+
raise ToolError(f"Could not extract repository path from URL: {tool_input.upstream_fix_url}")
232+
233+
# Construct clone URL
234+
scheme = parsed.scheme or 'https'
235+
repo_url = f"{scheme}://{parsed.netloc}/{repo_path}"
236+
if not repo_url.endswith('.git'):
237+
repo_url += '.git'
238+
239+
# Return for query parameter URLs
240+
return ExtractUpstreamRepositoryOutput(
241+
result=UpstreamRepository(
242+
repo_url=repo_url,
243+
commit_hash=commit_hash,
244+
original_url=tool_input.upstream_fix_url,
245+
pr_number=None,
246+
is_pr=False
247+
)
248+
)
249+
250+
# If we got here, we couldn't match any pattern
251+
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
163252

164253
except ToolError:
165254
raise

0 commit comments

Comments
 (0)