getsentry · roaga · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025
@@ -17,21 +17,21 @@ class ConfidenceRequest(BaseComponentRequest):
 
 
 class ConfidenceOutput(BaseComponentOutput):
-    comment: str | None = None
+    question: str | None = None
     output_confidence_score: float
     proceed_confidence_score: float
 
 
 class ConfidencePrompts:
     @staticmethod
     def format_system_msg() -> str:
-        return "You were an principle engineer responsible for debugging and fixing an issue in a codebase. You have memory of the previous conversation and analysis. But now you are reflecting on the analysis so far. Your goal is to verbalize any uncertainties that affected your answer, your confidence in your final answer, and your confidence in proceeding to the next step. You will decide whether to leave a brief comment on the document for your team to respond to, but only if significant concerns remain. You will also score your confidence."
+        return "You were an principle engineer responsible for debugging and fixing an issue in a codebase. You have memory of the previous conversation and analysis. But now you are reflecting on the analysis so far. Your goal is to verbalize any uncertainties that affected your answer, your confidence in your final answer, and your confidence in proceeding to the next step. You will decide whether to leave a brief question on the document for your team to respond to, but only if significant concerns remain. You will also score your confidence."
 
     @staticmethod
     def format_default_msg(step_goal_description: str, next_step_goal_description: str) -> str:
         return textwrap.dedent(
             """\
-            Think through the uncertainties and open questions, if any, that appeared during your analysis. Is there a missing piece of the puzzle? Anywhere you had to make an assumption or speculate? Any opportunities for a better answer? Anywhere you need more context or an opinion from the team? Be hypercritical. If there are uncertainties or open questions your team should be aware of when reading your final answer, leave a brief (under 50 words) and specific comment/question on the document. If there is nothing worth surfacing, return None/null for the comment.
+            Think through the uncertainties and open questions, if any, that appeared during your analysis. Is there a missing piece of the puzzle? Anywhere you had to make an assumption or speculate? Any opportunities for a better answer? Anywhere you need more context or an opinion from the team? Be hypercritical. If there are uncertainties or open questions your team should be aware of when reading your final answer, leave a brief (under 50 words) and specific question on the document. If there is nothing worth surfacing, return None/null for the question.
 
             Then score your confidence in the correctness of your final {step_goal_description} with an float between 0 and 1. The more uncertainties there are, the lower your confidence should be.
             Then based on your findings so far, score your confidence in successfully completing the next step, {next_step_goal_description}, with an float between 0 and 1. The more uncertain you are about your correctness, or if it seems hard to do the next step based on what you know, the lower your confidence should be.

@@ -128,10 +128,12 @@ def _invoke(self, app_config: AppConfig = injected):
                     cur.steps[-1].proceed_confidence_score = (
                         confidence_output.proceed_confidence_score
                     )
-                    if confidence_output.comment:
+                    if confidence_output.question:
                         cur.steps[-1].agent_comment_thread = CommentThread(
                             id=str(uuid.uuid4()),
-                            messages=[Message(role="assistant", content=confidence_output.comment)],
+                            messages=[
+                                Message(role="assistant", content=confidence_output.question)
+                            ],
                         )
 
         pr_to_comment_on = state.request.options.comment_on_pr_with_url

@@ -116,10 +116,12 @@ def _invoke(self, app_config: AppConfig = injected):
                     cur.steps[-1].proceed_confidence_score = (
                         confidence_output.proceed_confidence_score
                     )
-                    if confidence_output.comment:
+                    if confidence_output.question:
                         cur.steps[-1].agent_comment_thread = CommentThread(
                             id=str(uuid.uuid4()),
-                            messages=[Message(role="assistant", content=confidence_output.comment)],
+                            messages=[
+                                Message(role="assistant", content=confidence_output.question)
+                            ],
                         )
 
         self.context.event_manager.add_log(

@@ -114,8 +114,10 @@ def _invoke(self):
                     cur.steps[-1].proceed_confidence_score = (
                         confidence_output.proceed_confidence_score
                     )
-                    if confidence_output.comment:
+                    if confidence_output.question:
                         cur.steps[-1].agent_comment_thread = CommentThread(
                             id=str(uuid.uuid4()),
-                            messages=[Message(role="assistant", content=confidence_output.comment)],
+                            messages=[
+                                Message(role="assistant", content=confidence_output.question)
+                            ],
                         )
@@ -32,7 +32,7 @@ def test_confidence_successful_response(self, component):
         mock_llm_client = MagicMock()
         mock_llm_client.generate_structured.return_value = LlmGenerateStructuredResponse(
             parsed=ConfidenceOutput(
-                comment="Need more information about the error handling.",
+                question="Need more information about the error handling.",
                 output_confidence_score=0.75,
                 proceed_confidence_score=0.85,
             ),
@@ -56,7 +56,7 @@ def test_confidence_successful_response(self, component):
             output = component.invoke(request)
 
         assert output is not None
-        assert output.comment == "Need more information about the error handling."
+        assert output.question == "Need more information about the error handling."
         assert output.output_confidence_score == 0.75
         assert output.proceed_confidence_score == 0.85
 
@@ -73,7 +73,7 @@ def test_confidence_null_comment(self, component):
         mock_llm_client = MagicMock()
         mock_llm_client.generate_structured.return_value = LlmGenerateStructuredResponse(
             parsed=ConfidenceOutput(
-                comment=None,
+                question=None,
                 output_confidence_score=0.95,
                 proceed_confidence_score=0.90,
             ),
@@ -97,7 +97,7 @@ def test_confidence_null_comment(self, component):
             output = component.invoke(request)
 
         assert output is not None
-        assert output.comment is None
+        assert output.question is None
         assert output.output_confidence_score == 0.95
         assert output.proceed_confidence_score == 0.90
 
@@ -127,4 +127,4 @@ def test_confidence_none_response(self, component):
         assert output is not None
         assert output.output_confidence_score == 0.5
         assert output.proceed_confidence_score == 0.5
-        assert output.comment is None
+        assert output.question is None
@@ -172,7 +172,7 @@ def test_confidence_evaluation(self, mock_confidence_component, mock_root_cause_
 
         # Mock the confidence component output
         mock_confidence_output = ConfidenceOutput(
-            comment="This is a test comment",
+            question="This is a test question",
             output_confidence_score=0.85,
             proceed_confidence_score=0.75,
         )
@@ -205,7 +205,7 @@ def test_confidence_evaluation(self, mock_confidence_component, mock_root_cause_
         assert context_manager.steps[-1].agent_comment_thread is not None
         assert (
             context_manager.steps[-1].agent_comment_thread.messages[0].content
-            == "This is a test comment"
+            == "This is a test question"
         )
 
     @patch("seer.automation.autofix.steps.root_cause_step.RootCauseAnalysisComponent")
@@ -256,9 +256,11 @@ def test_confidence_evaluation_no_comment(
         mock_root_cause_output = next(generate(RootCauseAnalysisOutput))
         mock_root_cause_component.return_value.invoke.return_value = mock_root_cause_output
 
-        # Mock the confidence component output with None comment
+        # Mock the confidence component output with None question
         mock_confidence_output = ConfidenceOutput(
-            comment=None, output_confidence_score=0.95, proceed_confidence_score=0.90  # No comment
+            question=None,
+            output_confidence_score=0.95,
+            proceed_confidence_score=0.90,  # No question
         )
         mock_confidence_component.return_value.invoke.return_value = mock_confidence_output