Skip to content

Commit 86ef5d6

Browse files
committed
Support compare URLs in extract_upstream_repository
Fetch commit lists from compare URLs for cherry-pick workflow.
1 parent e53874d commit 86ef5d6

File tree

1 file changed

+100
-40
lines changed

1 file changed

+100
-40
lines changed

agents/tools/upstream_tools.py

Lines changed: 100 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import re
44
from pathlib import Path
5-
from urllib.parse import urlparse
5+
from urllib.parse import urlparse, quote
66

77
import aiohttp
88
from pydantic import BaseModel, Field
@@ -22,10 +22,14 @@ class ExtractUpstreamRepositoryInput(BaseModel):
2222
class UpstreamRepository(BaseModel):
2323
"""Represents an upstream git repository and commit information."""
2424
repo_url: str = Field(description="Git clone URL of the upstream repository")
25-
commit_hash: str = Field(description="Commit hash to cherry-pick")
25+
commit_hash: str = Field(description="Commit hash to cherry-pick (for single commits) or target ref (for compare URLs)")
2626
original_url: str = Field(description="Original upstream fix URL")
2727
pr_number: str | None = Field(default=None, description="Pull request or merge request number if this is a PR/MR URL, None otherwise")
2828
is_pr: bool = Field(default=False, description="True if this is a pull request or merge request URL")
29+
is_compare: bool = Field(default=False, description="True if this is a compare/diff URL between two refs")
30+
base_ref: str | None = Field(default=None, description="Base reference for compare URLs (e.g., v3.7.0)")
31+
target_ref: str | None = Field(default=None, description="Target reference for compare URLs (e.g., v3.7.1)")
32+
compare_commits: list[str] | None = Field(default=None, description="List of commit hashes in the compare range (ordered oldest to newest)")
2933

3034

3135
class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
@@ -35,15 +39,17 @@ class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
3539
class ExtractUpstreamRepositoryTool(Tool[ExtractUpstreamRepositoryInput, ToolRunOptions, ExtractUpstreamRepositoryOutput]):
3640
name = "extract_upstream_repository"
3741
description = """
38-
Extract upstream repository URL and commit hash from a commit or pull request URL.
42+
Extract upstream repository URL and commit information from a commit, pull request, or compare URL.
3943
4044
Supports common formats:
4145
- GitHub/GitLab commit: https://domain.com/owner/repo/commit/hash or /-/commit/hash
4246
- GitHub/GitLab PR: https://domain.com/owner/repo/pull/123 or /merge_requests/123
47+
- GitHub/GitLab compare: https://domain.com/owner/repo/compare/ref1...ref2 or /-/compare/ref1...ref2
4348
- Query param formats: ?id=hash or ?h=hash (for cgit/gitweb)
4449
4550
For pull requests, fetches the head commit SHA from the PR.
46-
Returns the git clone URL and commit hash needed for cherry-picking.
51+
For compare URLs, fetches all commits in the range and returns them ordered oldest to newest.
52+
Returns the git clone URL and commit information needed for cherry-picking.
4753
"""
4854
input_schema = ExtractUpstreamRepositoryInput
4955

@@ -59,7 +65,7 @@ async def _run(
5965
try:
6066
parsed = urlparse(tool_input.upstream_fix_url)
6167

62-
# Check if this is a pull request URL and extract owner/repo/PR number in one match
68+
# Check if this is a pull request URL and extract owner/repo/PR number in one match.
6369
pr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/pull/(\d+)(?:\.patch)?', parsed.path)
6470
mr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/-/merge_requests/(\d+)(?:\.patch)?', parsed.path)
6571

@@ -117,50 +123,104 @@ async def _run(
117123
)
118124
)
119125

120-
else:
126+
# Try to match compare URL
127+
compare_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/(?:-/)?compare/(.+?)(\.{2,3})([^\s\?#]+)', parsed.path)
128+
if compare_match:
129+
# Handle GitHub/GitLab Compare URLs
130+
owner = compare_match.group(1)
131+
repo = compare_match.group(2)
132+
base_ref = compare_match.group(3)
133+
# Group 4 is the separator (.. or ...) - not used, we always use ... for APIs
134+
target_ref = compare_match.group(5).rstrip('.patch') # Remove .patch if present
135+
# Construct repository URL
136+
repo_url = f"https://{parsed.netloc}/{owner}/{repo}.git"
137+
# Determine if this is GitHub or GitLab based on the URL pattern
138+
is_github = '/-/' not in parsed.path
139+
# Fetch compare information to get the list of commits
140+
headers = {
141+
'Accept': 'application/json',
142+
'User-Agent': 'RHEL-Backport-Agent'
143+
}
144+
commits = []
145+
commit_hash = target_ref
146+
try:
147+
async with aiohttp.ClientSession() as session:
148+
if is_github:
149+
# GitHub API - URL-encode refs to handle special characters like / in branch names
150+
api_url = f"https://api.github.com/repos/{owner}/{repo}/compare/{quote(base_ref, safe='')}...{quote(target_ref, safe='')}"
151+
async with session.get(api_url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as response:
152+
response.raise_for_status()
153+
data = await response.json()
154+
# GitHub: commits are in 'commits' array (oldest first)
155+
commits = [commit['sha'] for commit in data.get('commits', [])]
156+
else:
157+
# GitLab API - use params dict for automatic URL encoding
158+
api_url = f"https://{parsed.netloc}/api/v4/projects/{owner}%2F{repo}/repository/compare"
159+
params = {'from': base_ref, 'to': target_ref}
160+
async with session.get(api_url, params=params, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as response:
161+
response.raise_for_status()
162+
data = await response.json()
163+
# GitLab: commits are in 'commits' array (newest first)
164+
commits = [commit['id'] for commit in data.get('commits', [])]
165+
# Reverse to get oldest first
166+
commits = list(reversed(commits))
167+
# Use the last commit (newest) as the commit_hash
168+
commit_hash = commits[-1] if commits else target_ref
169+
except (aiohttp.ClientError, KeyError) as e:
170+
# If API fails, fall back to using target_ref as commit_hash
171+
# This allows the tool to still work even if API is unavailable
172+
commit_hash = target_ref
173+
commits = []
174+
# Return with compare information
175+
return ExtractUpstreamRepositoryOutput(
176+
result=UpstreamRepository(
177+
repo_url=repo_url,
178+
commit_hash=commit_hash,
179+
original_url=tool_input.upstream_fix_url,
180+
pr_number=None,
181+
is_pr=False,
182+
is_compare=True,
183+
base_ref=base_ref,
184+
target_ref=target_ref,
185+
compare_commits=commits if commits else None
186+
)
187+
)
188+
# Try to match regular commit URL or query parameter format
189+
repo_path = None
190+
commit_hash = None
191+
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
192+
if commit_match:
121193
# Handle regular commit URLs
122-
commit_hash = None
123-
repo_path = None
124-
125-
# Pattern 1: /commit/hash or /-/commit/hash in the path (capture repo path and commit hash together)
126-
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
127-
if commit_match:
128-
repo_path = commit_match.group(1).strip('/')
129-
commit_hash = commit_match.group(2)
130-
131-
# Pattern 2: query parameters (?id=hash or &h=hash for cgit/gitweb, ?p=repo for repo path)
132-
if not commit_hash and parsed.query:
133-
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
134-
if query_match:
135-
commit_hash = query_match.group(1)
136-
# Extract repo from ?p= parameter
137-
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
138-
if repo_query_match:
139-
repo_path = repo_query_match.group(1)
140-
141-
if not commit_hash:
142-
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
143-
194+
repo_path = commit_match.group(1).strip('/')
195+
commit_hash = commit_match.group(2)
196+
elif parsed.query:
197+
# Handle query parameter format (cgit/gitweb)
198+
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
199+
if query_match:
200+
commit_hash = query_match.group(1)
201+
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
202+
if repo_query_match:
203+
repo_path = repo_query_match.group(1)
204+
if commit_hash:
144205
if not repo_path:
145206
raise ToolError(f"Could not extract repository path from URL: {tool_input.upstream_fix_url}")
146-
147207
# Construct clone URL
148208
scheme = parsed.scheme or 'https'
149209
repo_url = f"{scheme}://{parsed.netloc}/{repo_path}"
150210
if not repo_url.endswith('.git'):
151211
repo_url += '.git'
152-
153-
# Return for non-PR commits
154-
return ExtractUpstreamRepositoryOutput(
155-
result=UpstreamRepository(
156-
repo_url=repo_url,
157-
commit_hash=commit_hash,
158-
original_url=tool_input.upstream_fix_url,
159-
pr_number=None,
160-
is_pr=False
212+
# Return for non-PR/non-compare URLs
213+
return ExtractUpstreamRepositoryOutput(
214+
result=UpstreamRepository(
215+
repo_url=repo_url,
216+
commit_hash=commit_hash,
217+
original_url=tool_input.upstream_fix_url,
218+
pr_number=None,
219+
is_pr=False
220+
)
161221
)
162-
)
163-
222+
# If we got here, we couldn't match any pattern
223+
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
164224
except ToolError:
165225
raise
166226
except Exception as e:

0 commit comments

Comments
 (0)