Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(autofix): Update Q&A prompt #2187

Merged
merged 2 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/seer/automation/autofix/components/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ class ConfidenceRequest(BaseComponentRequest):


class ConfidenceOutput(BaseComponentOutput):
comment: str | None = None
question: str | None = None
output_confidence_score: float
proceed_confidence_score: float


class ConfidencePrompts:
@staticmethod
def format_system_msg() -> str:
return "You were an principle engineer responsible for debugging and fixing an issue in a codebase. You have memory of the previous conversation and analysis. But now you are reflecting on the analysis so far. Your goal is to verbalize any uncertainties that affected your answer, your confidence in your final answer, and your confidence in proceeding to the next step. You will decide whether to leave a brief comment on the document for your team to respond to, but only if significant concerns remain. You will also score your confidence."
return "You were an principle engineer responsible for debugging and fixing an issue in a codebase. You have memory of the previous conversation and analysis. But now you are reflecting on the analysis so far. Your goal is to verbalize any uncertainties that affected your answer, your confidence in your final answer, and your confidence in proceeding to the next step. You will decide whether to leave a brief question on the document for your team to respond to, but only if significant concerns remain. You will also score your confidence."

@staticmethod
def format_default_msg(step_goal_description: str, next_step_goal_description: str) -> str:
return textwrap.dedent(
"""\
Think through the uncertainties and open questions, if any, that appeared during your analysis. Is there a missing piece of the puzzle? Anywhere you had to make an assumption or speculate? Any opportunities for a better answer? Anywhere you need more context or an opinion from the team? Be hypercritical. If there are uncertainties or open questions your team should be aware of when reading your final answer, leave a brief (under 50 words) and specific comment/question on the document. If there is nothing worth surfacing, return None/null for the comment.
Think through the uncertainties and open questions, if any, that appeared during your analysis. Is there a missing piece of the puzzle? Anywhere you had to make an assumption or speculate? Any opportunities for a better answer? Anywhere you need more context or an opinion from the team? Be hypercritical. If there are uncertainties or open questions your team should be aware of when reading your final answer, leave a brief (under 50 words) and specific question on the document. If there is nothing worth surfacing, return None/null for the question.

Then score your confidence in the correctness of your final {step_goal_description} with an float between 0 and 1. The more uncertainties there are, the lower your confidence should be.
Then based on your findings so far, score your confidence in successfully completing the next step, {next_step_goal_description}, with an float between 0 and 1. The more uncertain you are about your correctness, or if it seems hard to do the next step based on what you know, the lower your confidence should be.
Expand Down
6 changes: 4 additions & 2 deletions src/seer/automation/autofix/steps/coding_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,12 @@ def _invoke(self, app_config: AppConfig = injected):
cur.steps[-1].proceed_confidence_score = (
confidence_output.proceed_confidence_score
)
if confidence_output.comment:
if confidence_output.question:
cur.steps[-1].agent_comment_thread = CommentThread(
id=str(uuid.uuid4()),
messages=[Message(role="assistant", content=confidence_output.comment)],
messages=[
Message(role="assistant", content=confidence_output.question)
],
)

pr_to_comment_on = state.request.options.comment_on_pr_with_url
Expand Down
6 changes: 4 additions & 2 deletions src/seer/automation/autofix/steps/root_cause_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,12 @@ def _invoke(self, app_config: AppConfig = injected):
cur.steps[-1].proceed_confidence_score = (
confidence_output.proceed_confidence_score
)
if confidence_output.comment:
if confidence_output.question:
cur.steps[-1].agent_comment_thread = CommentThread(
id=str(uuid.uuid4()),
messages=[Message(role="assistant", content=confidence_output.comment)],
messages=[
Message(role="assistant", content=confidence_output.question)
],
)

self.context.event_manager.add_log(
Expand Down
6 changes: 4 additions & 2 deletions src/seer/automation/autofix/steps/solution_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ def _invoke(self):
cur.steps[-1].proceed_confidence_score = (
confidence_output.proceed_confidence_score
)
if confidence_output.comment:
if confidence_output.question:
cur.steps[-1].agent_comment_thread = CommentThread(
id=str(uuid.uuid4()),
messages=[Message(role="assistant", content=confidence_output.comment)],
messages=[
Message(role="assistant", content=confidence_output.question)
],
)
10 changes: 5 additions & 5 deletions tests/automation/autofix/components/test_confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_confidence_successful_response(self, component):
mock_llm_client = MagicMock()
mock_llm_client.generate_structured.return_value = LlmGenerateStructuredResponse(
parsed=ConfidenceOutput(
comment="Need more information about the error handling.",
question="Need more information about the error handling.",
output_confidence_score=0.75,
proceed_confidence_score=0.85,
),
Expand All @@ -56,7 +56,7 @@ def test_confidence_successful_response(self, component):
output = component.invoke(request)

assert output is not None
assert output.comment == "Need more information about the error handling."
assert output.question == "Need more information about the error handling."
assert output.output_confidence_score == 0.75
assert output.proceed_confidence_score == 0.85

Expand All @@ -73,7 +73,7 @@ def test_confidence_null_comment(self, component):
mock_llm_client = MagicMock()
mock_llm_client.generate_structured.return_value = LlmGenerateStructuredResponse(
parsed=ConfidenceOutput(
comment=None,
question=None,
output_confidence_score=0.95,
proceed_confidence_score=0.90,
),
Expand All @@ -97,7 +97,7 @@ def test_confidence_null_comment(self, component):
output = component.invoke(request)

assert output is not None
assert output.comment is None
assert output.question is None
assert output.output_confidence_score == 0.95
assert output.proceed_confidence_score == 0.90

Expand Down Expand Up @@ -127,4 +127,4 @@ def test_confidence_none_response(self, component):
assert output is not None
assert output.output_confidence_score == 0.5
assert output.proceed_confidence_score == 0.5
assert output.comment is None
assert output.question is None
10 changes: 6 additions & 4 deletions tests/automation/autofix/steps/test_root_cause_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def test_confidence_evaluation(self, mock_confidence_component, mock_root_cause_

# Mock the confidence component output
mock_confidence_output = ConfidenceOutput(
comment="This is a test comment",
question="This is a test question",
output_confidence_score=0.85,
proceed_confidence_score=0.75,
)
Expand Down Expand Up @@ -205,7 +205,7 @@ def test_confidence_evaluation(self, mock_confidence_component, mock_root_cause_
assert context_manager.steps[-1].agent_comment_thread is not None
assert (
context_manager.steps[-1].agent_comment_thread.messages[0].content
== "This is a test comment"
== "This is a test question"
)

@patch("seer.automation.autofix.steps.root_cause_step.RootCauseAnalysisComponent")
Expand Down Expand Up @@ -256,9 +256,11 @@ def test_confidence_evaluation_no_comment(
mock_root_cause_output = next(generate(RootCauseAnalysisOutput))
mock_root_cause_component.return_value.invoke.return_value = mock_root_cause_output

# Mock the confidence component output with None comment
# Mock the confidence component output with None question
mock_confidence_output = ConfidenceOutput(
comment=None, output_confidence_score=0.95, proceed_confidence_score=0.90 # No comment
question=None,
output_confidence_score=0.95,
proceed_confidence_score=0.90, # No question
)
mock_confidence_component.return_value.invoke.return_value = mock_confidence_output

Expand Down