Skip to content

Improvement: Enhance ask_line tool by adding PR review comment threads as context #1687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,14 +285,8 @@ def get_issue_comments(self):

def get_comment_url(self, comment) -> str:
return ""

def get_review_comment_by_id(self, comment_id: int):
pass

def get_review_id_by_comment_id(self, comment_id: int):
pass

def get_review_thread_comments(self, comment_id: int):

def get_review_thread_comments(self, comment_id: int) -> list[dict]:
pass

#### labels operations ####
Expand Down
124 changes: 19 additions & 105 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,130 +427,44 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool =
self._publish_inline_comments_fallback_with_verification(comments)
except Exception as e:
get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
raise e

def get_review_comment_by_id(self, comment_id: int):
"""
Retrieves a review comment by its ID.

Args:
comment_id: Review comment ID

Returns:
Review comment object or None (if not found)
"""
try:
# Using PyGitHub library
# There's no direct way to get PR comment by ID, so we fetch all comments and filter
all_comments = list(self.pr.get_comments())
for comment in all_comments:
if comment.id == comment_id:
return comment
return None
except Exception as e:
get_logger().warning(f"Failed to get review comment {comment_id}, error: {e}")
return None

def get_review_id_by_comment_id(self, comment_id: int):
"""
Finds the review ID that a comment belongs to based on its comment ID.

Args:
comment_id: Review comment ID

Returns:
Review ID or None (if not found)
"""
try:
comment = self.get_review_comment_by_id(comment_id)
if comment:
return getattr(comment, 'pull_request_review_id', None)
return None
except Exception as e:
get_logger().warning(f"Failed to get review ID for comment {comment_id}, error: {e}")
return None
raise e

def get_review_thread_comments(self, comment_id: int):
def get_review_thread_comments(self, comment_id: int) -> list[dict]:
"""
Retrieves all comments in the thread that a specific comment belongs to.
Retrieves all comments in the same line as the given comment.

Args:
comment_id: Review comment ID

Returns:
List of comments in the same thread
List of comments on the same line
"""
try:
# Get comment information
comment = self.get_review_comment_by_id(comment_id)
# Get the original comment to find its location
comment = self.pr.get_comment(comment_id)
if not comment:
return []

# get all comments
all_comments = list(self.pr.get_comments())

# Filter comments in the same thread
thread_comments = []
in_reply_to_map = {}

# First build the in_reply_to relationship map
for c in all_comments:
in_reply_to_id = getattr(c, 'in_reply_to_id', None)
if in_reply_to_id:
in_reply_to_map[c.id] = in_reply_to_id

# Recursively find all ancestor comments (collect comment's ancestors)
def find_ancestors(cid):
ancestors = []
current = cid
while current in in_reply_to_map:
parent_id = in_reply_to_map[current]
ancestors.append(parent_id)
current = parent_id
return ancestors

# Recursively find all descendant comments (collect all replies to the comment)
def find_descendants(cid):
descendants = []
for c in all_comments:
if getattr(c, 'in_reply_to_id', None) == cid:
descendants.append(c.id)
descendants.extend(find_descendants(c.id))
return descendants

# Find all descendants of a specific ancestor (including sibling comments)
def find_all_descendants_of_ancestor(ancestor_id):
all_descendants = []
for c in all_comments:
if getattr(c, 'in_reply_to_id', None) == ancestor_id:
all_descendants.append(c.id)
all_descendants.extend(find_descendants(c.id))
return all_descendants

# Collect both ancestor and descendant IDs of the comment
ancestors = find_ancestors(comment_id)
descendants = find_descendants(comment_id)
# Extract file path and line number
file_path = comment.path
line_number = comment.raw_data["line"] if "line" in comment.raw_data else comment.raw_data.get("original_line")

# Create thread ID set (self, ancestors, descendants)
thread_ids = set([comment_id] + ancestors + descendants)

# For each ancestor, include all conversation branches (sibling comments with the same ancestor)
for ancestor_id in ancestors:
sibling_ids = find_all_descendants_of_ancestor(ancestor_id)
thread_ids.update(sibling_ids)
# Get all comments
all_comments = list(self.pr.get_comments())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, you're collecting all PR comments. You can expect to find the given comment_id within this list, making the comment = self.pr.get_comment(comment_id) code line redundant and reducing the number of API calls by half, from 2 to 1.


# Filter to only get comments belonging to the thread
for c in all_comments:
if c.id in thread_ids:
thread_comments.append(c)
# Filter comments on the same line of the same file
thread_comments = [
c for c in all_comments
if c.path == file_path and (c.raw_data.get("line") == line_number or c.raw_data.get("original_line") == line_number)
]
Copy link
Collaborator

@ofir-frd ofir-frd Apr 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is too general; let's work with comment IDs instead of non-unique attributes.
Hint: look for in_reply_to_id


# Sort chronologically (by creation date)
# Sort chronologically
thread_comments.sort(key=lambda c: c.created_at)

return thread_comments

except Exception as e:
get_logger().warning(f"Failed to get review thread comments for comment {comment_id}, error: {e}")
get_logger().warning(f"Failed to get review comments for comment {comment_id}, error: {e}")
return []

def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):
Expand Down
3 changes: 1 addition & 2 deletions pr_agent/settings/pr_line_questions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ Previous discussion on this code:
======
{{ conversation_history|trim }}
======
Consider both the previous review comments from authors and reviewers, as well as any previous questions and answers about this code. The "Previous Question" and "Previous AI Answer" show earlier interactions about the same code. Use this context to provide a more informed and consistent answer.
Use this prior discussion context to provide a consistent and informed answer.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make this more informative by describing the conversation structure and how the model should handle it.

{%- endif %}


A question about the selected lines:
======
{{ question|trim }}
Expand Down
95 changes: 32 additions & 63 deletions pr_agent/tools/pr_line_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage

from pr_agent.git_providers.github_provider import GithubProvider
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please place this import in order with the others, next to the from pr_agent.git_providers... imports.


class PR_LineQuestions:
def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
Expand All @@ -43,9 +43,6 @@ def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] =
get_settings().pr_line_questions_prompt.user)
self.patches_diff = None
self.prediction = None

# get settings for use conversation history
self.use_conversation_history = get_settings().pr_questions.use_conversation_history

def parse_args(self, args):
if args and len(args) > 0:
Expand All @@ -61,7 +58,8 @@ async def run(self):
# self.git_provider.publish_comment("Preparing answer...", is_temporary=True)

# set conversation history if enabled
if self.use_conversation_history:
# currently only supports GitHub provider
if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider):
self._load_conversation_history()

self.patch_with_lines = ""
Expand Down Expand Up @@ -103,70 +101,41 @@ async def run(self):

def _load_conversation_history(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function currently introduces unnecessary side effects, making the code harder to read and follow. Let's build the conversation string inside the function and return it, so the run function can handle its output.

Suggested change
def _load_conversation_history(self):
def _load_conversation_history(self) -> str:

"""generate conversation history from the code review thread"""
# set conversation history to empty string
self.vars["conversation_history"] = ""

comment_id = get_settings().get('comment_id', '')
file_path = get_settings().get('file_name', '')
line_number = get_settings().get('line_end', '')

# early return if any required parameter is missing
if not all([comment_id, file_path, line_number]):
return
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets log this


try:
comment_id = get_settings().get('comment_id', '')
file_path = get_settings().get('file_name', '')
line_number = get_settings().get('line_end', '')
# retrieve thread comments
thread_comments = self.git_provider.get_review_thread_comments(comment_id)

# return if no comment id or file path and line number
if not (comment_id or (file_path and line_number)):
return

# initialize conversation history
# generate conversation history
conversation_history = []
for comment in thread_comments:
body = getattr(comment, 'body', '')

# skip empty comments, current comment(will be added as a question at prompt)
if not body or not body.strip() or comment_id == comment.id:
continue

user = comment.user
author = user.login if hasattr(user, 'login') else 'Unknown'
conversation_history.append(f"{author}: {body}")

if hasattr(self.git_provider, 'get_review_thread_comments') and comment_id:
try:
# get review thread comments
thread_comments = self.git_provider.get_review_thread_comments(comment_id)

# current question id (this question is excluded from the context)
current_question_id = comment_id

# generate conversation history from the comments
for comment in thread_comments:
# skip empty comments
body = getattr(comment, 'body', '')
if not body or not body.strip():
continue

# except for current question
# except for current question
if current_question_id and comment.id == current_question_id:

# remove the AI command (/ask etc) from the beginning of the comment (optional)
clean_body = body
if clean_body.startswith('/'):
clean_body = clean_body.split('\n', 1)[-1] if '\n' in clean_body else ''

if not clean_body.strip():
continue

# author info
user = comment.user
author = user.login if hasattr(user, 'login') else 'Unknown'

# confirm if the author is the current user (AI vs user)
is_ai = 'bot' in author.lower() or '[bot]' in author.lower()
role = 'AI' if is_ai else 'User'

# append to the conversation history
conversation_history.append(f"{role} ({author}): {clean_body}")

# transform the conversation history to a string
if conversation_history:
self.vars["conversation_history"] = "\n\n".join(conversation_history)
get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread")
else:
self.vars["conversation_history"] = ""

except Exception as e:
get_logger().warning(f"Failed to get review thread comments: {e}")
self.vars["conversation_history"] = ""
# transform and save conversation history
if conversation_history:
self.vars["conversation_history"] = "\n\n".join(conversation_history)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please improve the sense of order in this string, for example, count the comments.

get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread")

except Exception as e:
get_logger().error(f"Error loading conversation history: {e}")
self.vars["conversation_history"] = ""
get_logger().error(f"Error processing conversation history, error: {e}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's return an empty string in case the code crashes.


async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
Expand Down
Loading