WIP: refactoring replay code into its own set of files

Domiii · Domiii · commit f7e3d8c3ced7 · 2025-01-21T20:37:18.000+08:00
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -36,12 +36,14 @@
 from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.observation import Observation
 from openhands.events.observation.replay import (
-    ReplayPhaseUpdateObservation,
-    ReplayToolCmdOutputObservation,
+    ReplayObservation,
 )
-from openhands.events.replay import replay_enhance_action
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
+from openhands.replay.replay_commands import replay_enhance_action
+from openhands.replay.replay_state_machine import (
+    get_replay_observation_message,
+)
 from openhands.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -253,38 +255,8 @@ def get_observation_message(
                 )
             text += f'\n[Command finished with exit code {obs.exit_code}]'
             message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, ReplayToolCmdOutputObservation):
-            # if it doesn't have tool call metadata, it was triggered by a user action
-            if obs.tool_call_metadata is None:
-                text = truncate_content(
-                    f'\nObserved result of replay command executed by user:\n{obs.content}',
-                    max_message_chars,
-                )
-            else:
-                text = obs.content
-            message = Message(role='user', content=[TextContent(text=text)])
-        elif isinstance(obs, ReplayPhaseUpdateObservation):
-            # NOTE: The phase change itself is handled in AgentController.
-            new_phase = obs.new_phase
-            if new_phase == ReplayDebuggingPhase.Edit:
-                # Tell the agent to stop analyzing and start editing:
-                text = """
-You have concluded the analysis.
-
-IMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace. Start by answering these questions:
-  1. What is the goal of the investigation according to the initial prompt and initial analysis? IMPORTANT. PAY ATTENTION TO THIS. THIS IS THE ENTRY POINT OF EVERYTHING.
-  2. Given (1), is the hypothesis's `problem` description correct? Does it match the goal of the investigation?
-  3. Do the `editSuggestions` actually address the issue?
-  4. Rephrase the hypothesis so that it is consistent and correct.
-
-IMPORTANT: Don't stop. Keep working.
-IMPORTANT: Don't stop. Keep working.
-"""
-                message = Message(role='user', content=[TextContent(text=text)])
-            else:
-                raise NotImplementedError(
-                    f'Unhandled ReplayPhaseUpdateAction: {new_phase}'
-                )
+        elif isinstance(obs, ReplayObservation):
+            message = get_replay_observation_message(obs, max_message_chars)
         elif isinstance(obs, IPythonRunCellObservation):
             text = obs.content
             # replace base64 images with a placeholder
@@ -388,7 +360,7 @@ def step(self, state: State) -> Action:
             return AgentFinishAction()
 
         if self.config.codeact_enable_replay:
-            # Replay enhancement.
+            # Check for whether we should enhance the prompt.
             enhance_action = replay_enhance_action(state, self.config.is_workspace_repo)
             if enhance_action:
                 logger.info('[REPLAY] Enhancing prompt for Replay recording...')
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
@@ -49,11 +49,11 @@
 )
 from openhands.events.observation.replay import (
     ReplayInternalCmdOutputObservation,
-    ReplayPhaseUpdateObservation,
+    ReplayObservation,
 )
-from openhands.events.replay import handle_replay_internal_observation
 from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
+from openhands.replay.replay_state_machine import on_replay_observation
 from openhands.utils.shutdown_listener import should_continue
 
 # note: RESUME is only available on web GUI
@@ -297,28 +297,8 @@ async def _handle_observation(self, observation: Observation) -> None:
 
         if self._pending_action and self._pending_action.id == observation.cause:
             self._pending_action = None
-            if isinstance(observation, ReplayInternalCmdOutputObservation):
-                # NOTE: Currently, the only internal command is the initial-analysis command.
-                analysis_tool_metadata = handle_replay_internal_observation(
-                    self.state, observation
-                )
-                if analysis_tool_metadata:
-                    # Start analysis phase
-                    self.state.replay_recording_id = analysis_tool_metadata[
-                        'recordingId'
-                    ]
-                    self.state.replay_phase = ReplayDebuggingPhase.Analysis
-                    self.agent.replay_phase_changed(ReplayDebuggingPhase.Analysis)
-            elif isinstance(observation, ReplayPhaseUpdateObservation):
-                new_phase = observation.new_phase
-                if self.state.replay_phase == new_phase:
-                    self.log(
-                        'warning',
-                        f'Unexpected ReplayPhaseUpdateAction. Already in phase. Observation:\n {repr(observation)}',
-                    )
-                else:
-                    self.state.replay_phase = new_phase
-                    self.agent.replay_phase_changed(new_phase)
+            if isinstance(observation, ReplayObservation):
+                on_replay_observation(observation, self.state, self.agent)
 
             if self.state.agent_state == AgentState.USER_CONFIRMED:
                 await self.set_agent_state_to(AgentState.RUNNING)
diff --git a/openhands/events/observation/replay.py b/openhands/events/observation/replay.py
@@ -1,3 +1,4 @@
+from abc import ABC
 from dataclasses import dataclass
 
 from openhands.core.schema import ObservationType
@@ -6,7 +7,12 @@
 
 
 @dataclass
-class ReplayCmdOutputObservationBase(Observation):
+class ReplayObservation(Observation, ABC):
+    pass
+
+
+@dataclass
+class ReplayCmdOutputObservationBase(ReplayObservation, ABC):
     """This data class represents the output of a replay command."""
 
     command_id: int
@@ -38,7 +44,7 @@ class ReplayToolCmdOutputObservation(ReplayCmdOutputObservationBase):
 
 
 @dataclass
-class ReplayPhaseUpdateObservation(Observation):
+class ReplayPhaseUpdateObservation(ReplayObservation):
     new_phase: ReplayDebuggingPhase
     observation: str = ObservationType.REPLAY_UPDATE_PHASE
 
diff --git a/openhands/replay/replay_commands.py b/openhands/replay/replay_commands.py
@@ -1,13 +1,15 @@
 import json
 import re
-from typing import Any, TypedDict, cast
+from typing import Any, cast
 
 from openhands.controller.state.state import State
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action.action import Action
 from openhands.events.action.message import MessageAction
 from openhands.events.action.replay import ReplayInternalCmdRunAction
 from openhands.events.observation.replay import ReplayInternalCmdOutputObservation
+from openhands.replay.replay_prompts import replay_prompt_phase_analysis
+from openhands.replay.replay_types import AnalysisToolMetadata, AnnotateResult
 
 
 def scan_recording_id(issue: str) -> str | None:
@@ -24,7 +26,7 @@ def scan_recording_id(issue: str) -> str | None:
 
 
 # Produce the command string for the `annotate-execution-points` command.
-def command_annotate_execution_points(
+def start_initial_analysis(
     thought: str, is_workspace_repo: bool
 ) -> ReplayInternalCmdRunAction:
     command_input: dict[str, Any] = dict()
@@ -57,30 +59,12 @@ def replay_enhance_action(state: State, is_workspace_repo: bool) -> Action | Non
                 )
                 state.extra_data['replay_enhance_prompt_id'] = latest_user_message.id
                 logger.info('[REPLAY] stored latest_user_message id in state')
-                return command_annotate_execution_points(
+                return start_initial_analysis(
                     latest_user_message.content, is_workspace_repo
                 )
     return None
 
 
-class AnnotatedLocation(TypedDict, total=False):
-    filePath: str
-    line: int
-
-
-class AnalysisToolMetadata(TypedDict, total=False):
-    recordingId: str
-
-
-class AnnotateResult(TypedDict, total=False):
-    point: str
-    commentText: str | None
-    annotatedRepo: str | None
-    annotatedLocations: list[AnnotatedLocation] | None
-    pointLocation: str | None
-    metadata: AnalysisToolMetadata | None
-
-
 def safe_parse_json(text: str) -> dict[str, Any] | None:
     try:
         return json.loads(text)
@@ -97,15 +81,7 @@ def split_metadata(result):
     return metadata, data
 
 
-def enhance_prompt(user_message: MessageAction, prefix: str, suffix: str | None = None):
-    if prefix != '':
-        user_message.content = f'{prefix}\n\n{user_message.content}'
-    if suffix is not None:
-        user_message.content = f'{user_message.content}\n\n{suffix}'
-    logger.info(f'[REPLAY] Enhanced user prompt:\n{user_message.content}')
-
-
-def handle_replay_internal_observation(
+def handle_replay_internal_command_observation(
     state: State, observation: ReplayInternalCmdOutputObservation
 ) -> AnalysisToolMetadata | None:
     """
@@ -126,63 +102,20 @@ def handle_replay_internal_observation(
         assert user_message
         state.extra_data['replay_enhance_observed'] = True
 
+        # Deserialize stringified result.
         result: AnnotateResult = cast(
             AnnotateResult, safe_parse_json(observation.content)
         )
 
-        # Determine what initial-analysis did:
+        # Get metadata and enhance prompt.
         if result and 'metadata' in result:
-            # New workflow: initial-analysis provided the metadata to allow tool use.
-            metadata, data = split_metadata(result)
-            prefix = ''
-            suffix = """
-# Instructions
-0. Take a look at below `Initial Analysis`, based on a recorded trace of the bug. Pay special attention to `IMPORTANT_NOTES`.
-1. State the main problem statement. It MUST address `IMPORTANT_NOTES`. It must make sure that the application won't crash. It must fix the issue.
-2. Propose a plan to fix or investigate with multiple options in order of priority.
-3. Then use the `inspect-*` tools to investigate.
-4. Once found, `submit-hypothesis`.
-
-
-# Initial Analysis
-""" + json.dumps(data, indent=2)
-            enhance_prompt(
-                user_message,
-                prefix,
-                suffix,
-            )
+            # initial-analysis provides metadata needed for tool use.
+            metadata, command_result = split_metadata(result)
+            replay_prompt_phase_analysis(command_result, user_message)
             return metadata
-        elif result and result.get('annotatedRepo'):
-            # Old workflow: initial-analysis left hints in form of source code annotations.
-            annotated_repo_path = result.get('annotatedRepo', '')
-            comment_text = result.get('commentText', '')
-            react_component_name = result.get('reactComponentName', '')
-            console_error = result.get('consoleError', '')
-            # start_location = result.get('startLocation', '')
-            start_name = result.get('startName', '')
-
-            # TODO: Move this to a prompt template file.
-            if comment_text:
-                if react_component_name:
-                    prefix = f'There is a change needed to the {react_component_name} component.\n'
-                else:
-                    prefix = f'There is a change needed in {annotated_repo_path}:\n'
-                prefix += f'{comment_text}\n\n'
-            elif console_error:
-                prefix = f'There is a change needed in {annotated_repo_path} to fix a console error that has appeared unexpectedly:\n'
-                prefix += f'{console_error}\n\n'
-
-            prefix += '<IMPORTANT>\n'
-            prefix += 'Information about a reproduction of the problem is available in source comments.\n'
-            prefix += 'You must search for these comments and use them to get a better understanding of the problem.\n'
-            prefix += f'The first reproduction comment to search for is named {start_name}. Start your investigation there.\n'
-            prefix += '</IMPORTANT>\n'
-
-            enhance_prompt(user_message, prefix)
-            return None
         else:
             logger.warning(
-                f'[REPLAY] Replay observation cannot be interpreted. Observed content: {str(observation.content)}'
+                f'[REPLAY] Replay command result cannot be interpreted. Observed content: {str(observation.content)}'
             )
 
     return None
diff --git a/openhands/replay/replay_prompts.py b/openhands/replay/replay_prompts.py
@@ -0,0 +1,77 @@
+import json
+
+from openhands.core.logger import openhands_logger as logger
+from openhands.events.action.message import MessageAction
+from openhands.replay.replay_types import AnnotateResult
+
+
+def enhance_prompt(user_message: MessageAction, prefix: str, suffix: str):
+    if prefix != '':
+        user_message.content = f'{prefix}\n\n{user_message.content}'
+    if suffix != '':
+        user_message.content = f'{user_message.content}\n\n{suffix}'
+    logger.info(f'[REPLAY] Enhanced user prompt:\n{user_message.content}')
+
+
+def replay_prompt_phase_analysis(command_result: dict, user_message: MessageAction):
+    prefix = ''
+    suffix = """
+# Instructions
+0. Take a look at below `Initial Analysis`, based on a recorded trace of the bug. Pay special attention to `IMPORTANT_NOTES`.
+1. State the main problem statement. It MUST address `IMPORTANT_NOTES`. It must make sure that the application won't crash. It must fix the issue.
+2. Propose a plan to fix or investigate with multiple options in order of priority.
+3. Then use the `inspect-*` tools to investigate.
+4. Once found, `submit-hypothesis`.
+
+
+# Initial Analysis
+""" + json.dumps(command_result, indent=2)
+    return enhance_prompt(user_message, prefix, suffix)
+
+
+def replay_prompt_phase_analysis_legacy(
+    command_result: AnnotateResult, user_message: MessageAction
+):
+    # Old workflow: initial-analysis left hints in form of source code annotations.
+    annotated_repo_path = command_result.get('annotatedRepo', '')
+    comment_text = command_result.get('commentText', '')
+    react_component_name = command_result.get('reactComponentName', '')
+    console_error = command_result.get('consoleError', '')
+    # start_location = result.get('startLocation', '')
+    start_name = command_result.get('startName', '')
+
+    # TODO: Move this to a prompt template file.
+    if comment_text:
+        if react_component_name:
+            prefix = (
+                f'There is a change needed to the {react_component_name} component.\n'
+            )
+        else:
+            prefix = f'There is a change needed in {annotated_repo_path}:\n'
+        prefix += f'{comment_text}\n\n'
+    elif console_error:
+        prefix = f'There is a change needed in {annotated_repo_path} to fix a console error that has appeared unexpectedly:\n'
+        prefix += f'{console_error}\n\n'
+
+    prefix += '<IMPORTANT>\n'
+    prefix += 'Information about a reproduction of the problem is available in source comments.\n'
+    prefix += 'You must search for these comments and use them to get a better understanding of the problem.\n'
+    prefix += f'The first reproduction comment to search for is named {start_name}. Start your investigation there.\n'
+    prefix += '</IMPORTANT>\n'
+
+    suffix = ''
+
+    return enhance_prompt(user_message, prefix, suffix)
+
+
+def replay_prompt_phase_edit():
+    # Tell the agent to stop analyzing and start editing:
+    return """
+You have concluded the analysis.
+
+IMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace. Start by answering these questions:
+  1. What is the goal of the investigation according to the initial prompt and initial analysis? IMPORTANT. PAY ATTENTION TO THIS. THIS IS THE ENTRY POINT OF EVERYTHING.
+  2. Given (1), is the hypothesis's `problem` description correct? Does it match the goal of the investigation?
+  3. Do the `editSuggestions` actually address the issue?
+  4. Rephrase the hypothesis so that it is consistent and correct.
+"""
diff --git a/openhands/replay/replay_state_machine.py b/openhands/replay/replay_state_machine.py
diff --git a/openhands/replay/replay_types.py b/openhands/replay/replay_types.py