Skip to content

Commit 04c6412

Browse files
committed
Support compare URLs in extract_upstream_repository
Fetch commit lists from compare URLs for cherry-pick workflow.
1 parent e53874d commit 04c6412

File tree

1 file changed

+99
-40
lines changed

1 file changed

+99
-40
lines changed

agents/tools/upstream_tools.py

Lines changed: 99 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import re
44
from pathlib import Path
5-
from urllib.parse import urlparse
5+
from urllib.parse import urlparse, quote
66

77
import aiohttp
88
from pydantic import BaseModel, Field
@@ -22,10 +22,14 @@ class ExtractUpstreamRepositoryInput(BaseModel):
2222
class UpstreamRepository(BaseModel):
2323
"""Represents an upstream git repository and commit information."""
2424
repo_url: str = Field(description="Git clone URL of the upstream repository")
25-
commit_hash: str = Field(description="Commit hash to cherry-pick")
25+
commit_hash: str = Field(description="Commit hash to cherry-pick (for single commits) or target ref (for compare URLs)")
2626
original_url: str = Field(description="Original upstream fix URL")
2727
pr_number: str | None = Field(default=None, description="Pull request or merge request number if this is a PR/MR URL, None otherwise")
2828
is_pr: bool = Field(default=False, description="True if this is a pull request or merge request URL")
29+
is_compare: bool = Field(default=False, description="True if this is a compare/diff URL between two refs")
30+
base_ref: str | None = Field(default=None, description="Base reference for compare URLs (e.g., v3.7.0)")
31+
target_ref: str | None = Field(default=None, description="Target reference for compare URLs (e.g., v3.7.1)")
32+
compare_commits: list[str] | None = Field(default=None, description="List of commit hashes in the compare range (ordered oldest to newest)")
2933

3034

3135
class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
@@ -35,15 +39,17 @@ class ExtractUpstreamRepositoryOutput(JSONToolOutput[UpstreamRepository]):
3539
class ExtractUpstreamRepositoryTool(Tool[ExtractUpstreamRepositoryInput, ToolRunOptions, ExtractUpstreamRepositoryOutput]):
3640
name = "extract_upstream_repository"
3741
description = """
38-
Extract upstream repository URL and commit hash from a commit or pull request URL.
42+
Extract upstream repository URL and commit information from a commit, pull request, or compare URL.
3943
4044
Supports common formats:
4145
- GitHub/GitLab commit: https://domain.com/owner/repo/commit/hash or /-/commit/hash
4246
- GitHub/GitLab PR: https://domain.com/owner/repo/pull/123 or /merge_requests/123
47+
- GitHub/GitLab compare: https://domain.com/owner/repo/compare/ref1...ref2 or /-/compare/ref1...ref2
4348
- Query param formats: ?id=hash or ?h=hash (for cgit/gitweb)
4449
4550
For pull requests, fetches the head commit SHA from the PR.
46-
Returns the git clone URL and commit hash needed for cherry-picking.
51+
For compare URLs, fetches all commits in the range and returns them ordered oldest to newest.
52+
Returns the git clone URL and commit information needed for cherry-picking.
4753
"""
4854
input_schema = ExtractUpstreamRepositoryInput
4955

@@ -59,7 +65,7 @@ async def _run(
5965
try:
6066
parsed = urlparse(tool_input.upstream_fix_url)
6167

62-
# Check if this is a pull request URL and extract owner/repo/PR number in one match
68+
# Check if this is a pull request URL and extract owner/repo/PR number in one match.
6369
pr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/pull/(\d+)(?:\.patch)?', parsed.path)
6470
mr_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/-/merge_requests/(\d+)(?:\.patch)?', parsed.path)
6571

@@ -117,50 +123,103 @@ async def _run(
117123
)
118124
)
119125

120-
else:
126+
# Try to match compare URL
127+
compare_match = re.search(r'/([\w\-\.]+)/([\w\-\.]+)/(?:-/)?compare/(.+?)(\.{2,3})([^\s\?#]+)', parsed.path)
128+
if compare_match:
129+
# Handle GitHub/GitLab Compare URLs
130+
owner = compare_match.group(1)
131+
repo = compare_match.group(2)
132+
base_ref = compare_match.group(3)
133+
# Group 4 is the separator (.. or ...) - not used, we always use ... for APIs
134+
target_ref = compare_match.group(5).rstrip('.patch') # Remove .patch if present
135+
# Construct repository URL
136+
repo_url = f"https://{parsed.netloc}/{owner}/{repo}.git"
137+
# Fetch compare information to get the list of commits
138+
headers = {
139+
'Accept': 'application/json',
140+
'User-Agent': 'RHEL-Backport-Agent'
141+
}
142+
commits = []
143+
commit_hash = target_ref
144+
try:
145+
async with aiohttp.ClientSession() as session:
146+
# Determine if this is GitHub or GitLab based on the URL pattern
147+
if '/-/' not in parsed.path:
148+
# GitHub API - URL-encode refs to handle special characters like / in branch names
149+
api_url = f"https://api.github.com/repos/{owner}/{repo}/compare/{quote(base_ref, safe='')}...{quote(target_ref, safe='')}"
150+
async with session.get(api_url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as response:
151+
response.raise_for_status()
152+
data = await response.json()
153+
# GitHub: commits are in 'commits' array (oldest first)
154+
commits = [commit['sha'] for commit in data.get('commits', [])]
155+
else:
156+
# GitLab API - use params dict for automatic URL encoding
157+
api_url = f"https://{parsed.netloc}/api/v4/projects/{owner}%2F{repo}/repository/compare"
158+
params = {'from': base_ref, 'to': target_ref}
159+
async with session.get(api_url, params=params, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as response:
160+
response.raise_for_status()
161+
data = await response.json()
162+
# GitLab: commits are in 'commits' array (newest first)
163+
commits = [commit['id'] for commit in data.get('commits', [])]
164+
# Reverse to get oldest first
165+
commits = list(reversed(commits))
166+
# Use the last commit (newest) as the commit_hash
167+
commit_hash = commits[-1] if commits else target_ref
168+
except (aiohttp.ClientError, KeyError) as e:
169+
# If API fails, fall back to using target_ref as commit_hash
170+
# This allows the tool to still work even if API is unavailable
171+
commit_hash = target_ref
172+
commits = []
173+
# Return with compare information
174+
return ExtractUpstreamRepositoryOutput(
175+
result=UpstreamRepository(
176+
repo_url=repo_url,
177+
commit_hash=commit_hash,
178+
original_url=tool_input.upstream_fix_url,
179+
pr_number=None,
180+
is_pr=False,
181+
is_compare=True,
182+
base_ref=base_ref,
183+
target_ref=target_ref,
184+
compare_commits=commits if commits else None
185+
)
186+
)
187+
# Try to match regular commit URL or query parameter format
188+
repo_path = None
189+
commit_hash = None
190+
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
191+
if commit_match:
121192
# Handle regular commit URLs
122-
commit_hash = None
123-
repo_path = None
124-
125-
# Pattern 1: /commit/hash or /-/commit/hash in the path (capture repo path and commit hash together)
126-
commit_match = re.search(r'^(.*?)(?:/(?:-/)?commit(?:s)?/([a-f0-9]{7,40})(?:\.patch)?)', parsed.path)
127-
if commit_match:
128-
repo_path = commit_match.group(1).strip('/')
129-
commit_hash = commit_match.group(2)
130-
131-
# Pattern 2: query parameters (?id=hash or &h=hash for cgit/gitweb, ?p=repo for repo path)
132-
if not commit_hash and parsed.query:
133-
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
134-
if query_match:
135-
commit_hash = query_match.group(1)
136-
# Extract repo from ?p= parameter
137-
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
138-
if repo_query_match:
139-
repo_path = repo_query_match.group(1)
140-
141-
if not commit_hash:
142-
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
143-
193+
repo_path = commit_match.group(1).strip('/')
194+
commit_hash = commit_match.group(2)
195+
elif parsed.query:
196+
# Handle query parameter format (cgit/gitweb)
197+
query_match = re.search(r'(?:id|h)=([a-f0-9]{7,40})', parsed.query)
198+
if query_match:
199+
commit_hash = query_match.group(1)
200+
repo_query_match = re.search(r'[?&]p=([^;&]+)', parsed.query)
201+
if repo_query_match:
202+
repo_path = repo_query_match.group(1)
203+
if commit_hash:
144204
if not repo_path:
145205
raise ToolError(f"Could not extract repository path from URL: {tool_input.upstream_fix_url}")
146-
147206
# Construct clone URL
148207
scheme = parsed.scheme or 'https'
149208
repo_url = f"{scheme}://{parsed.netloc}/{repo_path}"
150209
if not repo_url.endswith('.git'):
151210
repo_url += '.git'
152-
153-
# Return for non-PR commits
154-
return ExtractUpstreamRepositoryOutput(
155-
result=UpstreamRepository(
156-
repo_url=repo_url,
157-
commit_hash=commit_hash,
158-
original_url=tool_input.upstream_fix_url,
159-
pr_number=None,
160-
is_pr=False
211+
# Return for non-PR/non-compare URLs
212+
return ExtractUpstreamRepositoryOutput(
213+
result=UpstreamRepository(
214+
repo_url=repo_url,
215+
commit_hash=commit_hash,
216+
original_url=tool_input.upstream_fix_url,
217+
pr_number=None,
218+
is_pr=False
219+
)
161220
)
162-
)
163-
221+
# If we got here, we couldn't match any pattern
222+
raise ToolError(f"Could not extract commit hash from URL: {tool_input.upstream_fix_url}")
164223
except ToolError:
165224
raise
166225
except Exception as e:

0 commit comments

Comments
 (0)