Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 218 additions & 11 deletions pr_agent/git_providers/azuredevops_provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime as _dt
import os
from typing import Optional, Tuple
from urllib.parse import urlparse
Expand All @@ -8,12 +9,12 @@

from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import (PRDescriptionHeader, clip_tokens,
from ..algo.utils import (PRDescriptionHeader, PRReviewHeader, clip_tokens,
find_line_number_of_relevant_line_in_file,
load_large_diff)
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import GitProvider
from .git_provider import GitProvider, IncrementalPR

AZURE_DEVOPS_AVAILABLE = True
ADO_APP_CLIENT_DEFAULT_ID = "499b84ac-1321-427f-aa17-267ca6975798/.default"
Expand All @@ -32,6 +33,32 @@
AZURE_DEVOPS_AVAILABLE = False


def _to_naive_utc(dt):
if dt is None:
return None
if getattr(dt, "tzinfo", None) is not None:
return dt.astimezone(_dt.timezone.utc).replace(tzinfo=None)
return dt


class _AzureCommitInner:
def __init__(self, raw):
self.message = getattr(raw, "comment", "") or ""
author = getattr(raw, "author", None)
author_date = _to_naive_utc(getattr(author, "date", None)) if author else None
self.author = type("_AzureAuthor", (), {"date": author_date})()


class _AzureCommitAdapter:
"""Mimics PyGithub `Commit` shape (.sha, .commit.author.date, .commit.message, .parents)."""

def __init__(self, raw):
self.sha = raw.commit_id
self.commit_id = raw.commit_id
self.commit = _AzureCommitInner(raw)
self.parents = list(getattr(raw, "parents", None) or [])


class AzureDevopsProvider(GitProvider):

def __init__(
Expand All @@ -51,6 +78,9 @@ def __init__(
self.pr = None
self.temp_comments = []
self.incremental = incremental
self.unreviewed_files_map = {}
self.pr_commits = None
self.previous_review = None
if pr_url:
self.set_pr(pr_url)

Expand Down Expand Up @@ -158,6 +188,135 @@ def set_pr(self, pr_url: str):
self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()

def get_incremental_commits(self, incremental=None):
if incremental is None:
incremental = IncrementalPR(False)
self.incremental = incremental
if self.incremental.is_incremental:
self.unreviewed_files_map = {}
self._get_incremental_commits()

def _get_incremental_commits(self):
if not self.pr_commits:
raw = list(self.azure_devops_client.get_pull_request_commits(
project=self.workspace_slug,
repository_id=self.repo_slug,
pull_request_id=self.pr_num,
))
# Azure returns newest-first; oldest-first matches GitHub iteration order.
raw.reverse()
self.pr_commits = [_AzureCommitAdapter(c) for c in raw]

self.previous_review = self.get_previous_review(full=True, incremental=True)
if not self.previous_review:
get_logger().info("No previous review found, will review the entire PR")
self.incremental.is_incremental = False
return

self.incremental.commits_range = self._get_commit_range()
if self.incremental.commits_range is None:
return
candidate_paths = []
had_errors = False
non_merge_seen = False
for commit in self.incremental.commits_range:
if len(commit.parents) > 1:
get_logger().info(f"Skipping merge commit {commit.sha}")
continue
non_merge_seen = True
try:
changes_obj = self.azure_devops_client.get_changes(
project=self.workspace_slug,
repository_id=self.repo_slug,
commit_id=commit.commit_id,
)
except Exception as e:
had_errors = True
get_logger().warning(f"Failed to fetch changes for {commit.commit_id}: {e}")
continue
for change in (getattr(changes_obj, "changes", None) or []):
try:
item = change["item"]
except (KeyError, TypeError):
additional = getattr(change, "additional_properties", None) or {}
item = additional.get("item") or {}
if not isinstance(item, dict) or item.get("gitObjectType") == "tree":
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
continue
path = item.get("path")
if path:
candidate_paths.append(path)

if candidate_paths:
deduped = list(dict.fromkeys(candidate_paths))
filtered = filter_ignored(deduped, "azure")
for path in filtered:
if is_valid_file(path):
self.unreviewed_files_map[path] = path
elif had_errors and self.incremental.commits_range:
get_logger().warning(
"Failed to fetch changes for incremental commits; falling back to full review."
)
self.incremental.is_incremental = False
elif self.incremental.commits_range and not non_merge_seen:
get_logger().info(
"Incremental range only contains merge commits; falling back to full review."
)
self.incremental.is_incremental = False

def _get_commit_range(self):
last_review_time = _to_naive_utc(getattr(self.previous_review, "created_at", None))
if last_review_time is None or not self.pr_commits:
get_logger().info(
"Cannot compute incremental commit range "
"(missing previous review timestamp or PR commits); falling back to full review."
)
self.incremental.is_incremental = False
return None
first_new_commit_index = None
saw_reliable_date = False
for index in range(len(self.pr_commits) - 1, -1, -1):
cdate = self.pr_commits[index].commit.author.date
if cdate is None:
continue
saw_reliable_date = True
if cdate > last_review_time:
self.incremental.first_new_commit = self.pr_commits[index]
first_new_commit_index = index
else:
self.incremental.last_seen_commit = self.pr_commits[index]
break
if not saw_reliable_date:
get_logger().info(
"All PR commit author dates are missing; cannot compute incremental range. "
"Falling back to full review."
)
self.incremental.is_incremental = False
return None
return self.pr_commits[first_new_commit_index:] if first_new_commit_index is not None else []
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_previous_review(self, *, full: bool, incremental: bool):
if not (full or incremental):
raise ValueError("At least one of full or incremental must be True")
prefixes = []
if full:
prefixes.append(PRReviewHeader.REGULAR.value)
if incremental:
prefixes.append(PRReviewHeader.INCREMENTAL.value)
matches = []
for comment in self.get_issue_comments():
body = getattr(comment, "body", None)
if body and any(body.startswith(p) for p in prefixes):
matches.append(comment)
if not matches:
return None
latest = max(
matches,
key=lambda c: _to_naive_utc(getattr(c, "published_date", None)) or _dt.datetime.min,
)
latest.html_url = self.get_comment_url(latest)
latest.created_at = _to_naive_utc(getattr(latest, "published_date", None))
return latest

def get_repo_settings(self):
try:
contents = self.azure_devops_client.get_item_content(
Expand All @@ -175,6 +334,13 @@ def get_repo_settings(self):
return ""

def get_files(self):
if (isinstance(getattr(self, "incremental", None), IncrementalPR)
and self.incremental.is_incremental
and self.unreviewed_files_map):
return list(self.unreviewed_files_map.keys())
return self._get_files_full()

def _get_files_full(self):
files = []
for i in self.azure_devops_client.get_pull_request_commits(
project=self.workspace_slug,
Expand All @@ -194,9 +360,17 @@ def get_files(self):
def get_diff_files(self) -> list[FilePatchInfo]:
try:

if self.diff_files:
if self.diff_files is not None:
return self.diff_files

Comment on lines +363 to 365
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Stale diff cache reused 🐞 Bug ☼ Reliability

AzureDevopsProvider.get_diff_files() now returns cached self.diff_files whenever it’s not
None, but neither set_pr() nor get_incremental_commits() resets
diff_files/pr_commits/previous_review; if the provider instance is reused, incremental and
full reviews can return stale diffs and wrong review scope. This can cause incremental reviews to
unintentionally include the full PR diff (or vice versa), defeating the feature and potentially
reviewing incorrect content.
Agent Prompt
### Issue description
`AzureDevopsProvider` caches diff/commit-derived state (`diff_files`, `pr_commits`, `previous_review`, and `unreviewed_files_map`) but does not reset it when (a) a new PR is set via `set_pr()` or (b) incremental mode is enabled via `get_incremental_commits()`. In reused provider instances, `get_diff_files()` can return stale cached results and ignore the current incremental/full mode.

### Issue Context
The codebase has a context-based provider reuse mechanism; even if reuse is not always active, the provider should be safe under reuse to avoid returning wrong diffs.

### Fix Focus Areas
- pr_agent/git_providers/azuredevops_provider.py[186-198]
- pr_agent/git_providers/azuredevops_provider.py[191-210]
- pr_agent/git_providers/azuredevops_provider.py[360-365]

### Suggested fix
- In `set_pr()`: clear PR-specific caches, e.g. `self.diff_files = None`, `self.pr_commits = None`, `self.previous_review = None`, and `self.unreviewed_files_map = {}`.
- In `get_incremental_commits()`: also clear `self.diff_files` (and potentially `self.pr_commits` if you need a fresh commit list) before computing incremental state, so incremental diff filtering/rebuilding is based on current data.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

if self.pr.last_merge_commit is None or self.pr.last_merge_target_commit is None:
get_logger().info(
f"PR {self.pr_num} has no last_merge_commit/last_merge_target_commit; "
f"cannot compute diff files."
)
self.diff_files = []
return []

base_sha = self.pr.last_merge_target_commit
head_sha = self.pr.last_merge_commit

Expand Down Expand Up @@ -254,7 +428,7 @@ def get_diff_files(self) -> list[FilePatchInfo]:
# diffs = list(set(diffs))

diffs_original = diffs
diffs = filter_ignored(diffs_original, 'azure')
diffs = filter_ignored(diffs_original, "azure")
if diffs_original != diffs:
try:
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
Expand All @@ -263,6 +437,15 @@ def get_diff_files(self) -> list[FilePatchInfo]:
except Exception:
pass

incremental_active = (
isinstance(getattr(self, "incremental", None), IncrementalPR)
and self.incremental.is_incremental
and bool(self.unreviewed_files_map)
and self.incremental.last_seen_commit_sha
)
if incremental_active:
diffs = [f for f in diffs if f in self.unreviewed_files_map]

invalid_files_names = []
for file in diffs:
if not is_valid_file(file):
Expand Down Expand Up @@ -301,29 +484,53 @@ def get_diff_files(self) -> list[FilePatchInfo]:
elif "rename" in diff_types[file]: # diff_type can be `rename` | `edit, rename`
edit_type = EDIT_TYPE.RENAMED

version = GitVersionDescriptor(
version=base_sha.commit_id, version_type="commit"
)
if edit_type == EDIT_TYPE.ADDED or edit_type == EDIT_TYPE.RENAMED:
original_file_content_str = ""
elif incremental_active:
inc_version = GitVersionDescriptor(
version=self.incremental.last_seen_commit_sha, version_type="commit"
)
try:
inc_original = self.azure_devops_client.get_item(
repository_id=self.repo_slug,
path=file,
project=self.workspace_slug,
version_descriptor=inc_version,
download=False,
include_content=True,
)
original_file_content_str = inc_original.content or ""
except Exception as error:
get_logger().warning(
f"Failed to retrieve original of {file} at {self.incremental.last_seen_commit_sha}: {error}"
)
original_file_content_str = ""
else:
base_version = GitVersionDescriptor(
version=base_sha.commit_id, version_type="commit"
)
try:
original_file_content_str = self.azure_devops_client.get_item(
base_original = self.azure_devops_client.get_item(
repository_id=self.repo_slug,
path=file,
project=self.workspace_slug,
version_descriptor=version,
version_descriptor=base_version,
download=False,
include_content=True,
)
original_file_content_str = original_file_content_str.content
original_file_content_str = base_original.content
except Exception as error:
get_logger().error(f"Failed to retrieve original file content of {file} at version {version}", error=error)
get_logger().error(
f"Failed to retrieve original file content of {file} at version {base_version}",
error=error,
)
original_file_content_str = ""

patch = load_large_diff(
file, new_file_content_str, original_file_content_str, show_warning=False
).rstrip()
if incremental_active:
self.unreviewed_files_map[file] = patch

# count number of lines added and removed
patch_lines = patch.splitlines(keepends=True)
Expand Down
6 changes: 3 additions & 3 deletions pr_agent/git_providers/gitea_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(self, url: Optional[str] = None):
self.diff_files = []
self.incremental = IncrementalPR(False)
self.comments_list = []
self.unreviewed_files_set = dict()
self.unreviewed_files_map = dict()

if "pulls" in url:
self.pr_url = url
Expand Down Expand Up @@ -475,9 +475,9 @@ def get_diff_files(self) -> List[FilePatchInfo]:
# Get file content from this pr
head_file = self.file_contents.get(filename,"")

if self.incremental.is_incremental and self.unreviewed_files_set:
if self.incremental.is_incremental and self.unreviewed_files_map:
base_file = self._get_file_content_from_latest_commit(filename)
self.unreviewed_files_set[filename] = patch
self.unreviewed_files_map[filename] = patch
else:
if avoid_load:
base_file = ""
Expand Down
12 changes: 6 additions & 6 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _get_issue_handle(self, issue_url) -> Optional[Issue]:
def get_incremental_commits(self, incremental=IncrementalPR(False)):
self.incremental = incremental
if self.incremental.is_incremental:
self.unreviewed_files_set = dict()
self.unreviewed_files_map = dict()
self._get_incremental_commits()

def is_supported(self, capability: str) -> bool:
Expand Down Expand Up @@ -162,7 +162,7 @@ def _get_incremental_commits(self):
if commit.commit.message.startswith(f"Merge branch '{self._get_repo().default_branch}'"):
get_logger().info(f"Skipping merge commit {commit.commit.message}")
continue
self.unreviewed_files_set.update({file.filename: file for file in commit.files})
self.unreviewed_files_map.update({file.filename: file for file in commit.files})
else:
get_logger().info("No previous review found, will review the entire PR")
self.incremental.is_incremental = False
Expand Down Expand Up @@ -194,8 +194,8 @@ def get_previous_review(self, *, full: bool, incremental: bool):
return self.comments[index]

def get_files(self):
if self.incremental.is_incremental and self.unreviewed_files_set:
return self.unreviewed_files_set.values()
if self.incremental.is_incremental and self.unreviewed_files_map:
return self.unreviewed_files_map.values()
try:
git_files = context.get("git_files", None)
if git_files:
Expand Down Expand Up @@ -296,10 +296,10 @@ def get_diff_files(self) -> list[FilePatchInfo]:
else:
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub

if self.incremental.is_incremental and self.unreviewed_files_set:
if self.incremental.is_incremental and self.unreviewed_files_map:
original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha)
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
self.unreviewed_files_set[file.filename] = patch
self.unreviewed_files_map[file.filename] = patch
else:
if avoid_load:
original_file_content_str = ""
Expand Down
Loading
Loading