Skip to content

Commit

Permalink
WIP: refactoring replay code into its own set of files
Browse files Browse the repository at this point in the history
  • Loading branch information
Domiii committed Jan 21, 2025
1 parent bb77b47 commit f7e3d8c
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 141 deletions.
44 changes: 8 additions & 36 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@
from openhands.events.observation.error import ErrorObservation
from openhands.events.observation.observation import Observation
from openhands.events.observation.replay import (
ReplayPhaseUpdateObservation,
ReplayToolCmdOutputObservation,
ReplayObservation,
)
from openhands.events.replay import replay_enhance_action
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.replay.replay_commands import replay_enhance_action
from openhands.replay.replay_state_machine import (
get_replay_observation_message,
)
from openhands.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
Expand Down Expand Up @@ -253,38 +255,8 @@ def get_observation_message(
)
text += f'\n[Command finished with exit code {obs.exit_code}]'
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, ReplayToolCmdOutputObservation):
# if it doesn't have tool call metadata, it was triggered by a user action
if obs.tool_call_metadata is None:
text = truncate_content(
f'\nObserved result of replay command executed by user:\n{obs.content}',
max_message_chars,
)
else:
text = obs.content
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, ReplayPhaseUpdateObservation):
# NOTE: The phase change itself is handled in AgentController.
new_phase = obs.new_phase
if new_phase == ReplayDebuggingPhase.Edit:
# Tell the agent to stop analyzing and start editing:
text = """
You have concluded the analysis.
IMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace. Start by answering these questions:
1. What is the goal of the investigation according to the initial prompt and initial analysis? IMPORTANT. PAY ATTENTION TO THIS. THIS IS THE ENTRY POINT OF EVERYTHING.
2. Given (1), is the hypothesis's `problem` description correct? Does it match the goal of the investigation?
3. Do the `editSuggestions` actually address the issue?
4. Rephrase the hypothesis so that it is consistent and correct.
IMPORTANT: Don't stop. Keep working.
IMPORTANT: Don't stop. Keep working.
"""
message = Message(role='user', content=[TextContent(text=text)])
else:
raise NotImplementedError(
f'Unhandled ReplayPhaseUpdateAction: {new_phase}'
)
elif isinstance(obs, ReplayObservation):
message = get_replay_observation_message(obs, max_message_chars)
elif isinstance(obs, IPythonRunCellObservation):
text = obs.content
# replace base64 images with a placeholder
Expand Down Expand Up @@ -388,7 +360,7 @@ def step(self, state: State) -> Action:
return AgentFinishAction()

if self.config.codeact_enable_replay:
# Replay enhancement.
# Check for whether we should enhance the prompt.
enhance_action = replay_enhance_action(state, self.config.is_workspace_repo)
if enhance_action:
logger.info('[REPLAY] Enhancing prompt for Replay recording...')
Expand Down
28 changes: 4 additions & 24 deletions openhands/controller/agent_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@
)
from openhands.events.observation.replay import (
ReplayInternalCmdOutputObservation,
ReplayPhaseUpdateObservation,
ReplayObservation,
)
from openhands.events.replay import handle_replay_internal_observation
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.replay.replay_state_machine import on_replay_observation
from openhands.utils.shutdown_listener import should_continue

# note: RESUME is only available on web GUI
Expand Down Expand Up @@ -297,28 +297,8 @@ async def _handle_observation(self, observation: Observation) -> None:

if self._pending_action and self._pending_action.id == observation.cause:
self._pending_action = None
if isinstance(observation, ReplayInternalCmdOutputObservation):
# NOTE: Currently, the only internal command is the initial-analysis command.
analysis_tool_metadata = handle_replay_internal_observation(
self.state, observation
)
if analysis_tool_metadata:
# Start analysis phase
self.state.replay_recording_id = analysis_tool_metadata[
'recordingId'
]
self.state.replay_phase = ReplayDebuggingPhase.Analysis
self.agent.replay_phase_changed(ReplayDebuggingPhase.Analysis)
elif isinstance(observation, ReplayPhaseUpdateObservation):
new_phase = observation.new_phase
if self.state.replay_phase == new_phase:
self.log(
'warning',
f'Unexpected ReplayPhaseUpdateAction. Already in phase. Observation:\n {repr(observation)}',
)
else:
self.state.replay_phase = new_phase
self.agent.replay_phase_changed(new_phase)
if isinstance(observation, ReplayObservation):
on_replay_observation(observation, self.state, self.agent)

if self.state.agent_state == AgentState.USER_CONFIRMED:
await self.set_agent_state_to(AgentState.RUNNING)
Expand Down
10 changes: 8 additions & 2 deletions openhands/events/observation/replay.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import ABC
from dataclasses import dataclass

from openhands.core.schema import ObservationType
Expand All @@ -6,7 +7,12 @@


@dataclass
class ReplayCmdOutputObservationBase(Observation):
class ReplayObservation(Observation, ABC):
pass


@dataclass
class ReplayCmdOutputObservationBase(ReplayObservation, ABC):
"""This data class represents the output of a replay command."""

command_id: int
Expand Down Expand Up @@ -38,7 +44,7 @@ class ReplayToolCmdOutputObservation(ReplayCmdOutputObservationBase):


@dataclass
class ReplayPhaseUpdateObservation(Observation):
class ReplayPhaseUpdateObservation(ReplayObservation):
new_phase: ReplayDebuggingPhase
observation: str = ObservationType.REPLAY_UPDATE_PHASE

Expand Down
91 changes: 12 additions & 79 deletions openhands/events/replay.py → openhands/replay/replay_commands.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import json
import re
from typing import Any, TypedDict, cast
from typing import Any, cast

from openhands.controller.state.state import State
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.action import Action
from openhands.events.action.message import MessageAction
from openhands.events.action.replay import ReplayInternalCmdRunAction
from openhands.events.observation.replay import ReplayInternalCmdOutputObservation
from openhands.replay.replay_prompts import replay_prompt_phase_analysis
from openhands.replay.replay_types import AnalysisToolMetadata, AnnotateResult


def scan_recording_id(issue: str) -> str | None:
Expand All @@ -24,7 +26,7 @@ def scan_recording_id(issue: str) -> str | None:


# Produce the command string for the `annotate-execution-points` command.
def command_annotate_execution_points(
def start_initial_analysis(
thought: str, is_workspace_repo: bool
) -> ReplayInternalCmdRunAction:
command_input: dict[str, Any] = dict()
Expand Down Expand Up @@ -57,30 +59,12 @@ def replay_enhance_action(state: State, is_workspace_repo: bool) -> Action | Non
)
state.extra_data['replay_enhance_prompt_id'] = latest_user_message.id
logger.info('[REPLAY] stored latest_user_message id in state')
return command_annotate_execution_points(
return start_initial_analysis(
latest_user_message.content, is_workspace_repo
)
return None


class AnnotatedLocation(TypedDict, total=False):
filePath: str
line: int


class AnalysisToolMetadata(TypedDict, total=False):
recordingId: str


class AnnotateResult(TypedDict, total=False):
point: str
commentText: str | None
annotatedRepo: str | None
annotatedLocations: list[AnnotatedLocation] | None
pointLocation: str | None
metadata: AnalysisToolMetadata | None


def safe_parse_json(text: str) -> dict[str, Any] | None:
try:
return json.loads(text)
Expand All @@ -97,15 +81,7 @@ def split_metadata(result):
return metadata, data


def enhance_prompt(user_message: MessageAction, prefix: str, suffix: str | None = None):
if prefix != '':
user_message.content = f'{prefix}\n\n{user_message.content}'
if suffix is not None:
user_message.content = f'{user_message.content}\n\n{suffix}'
logger.info(f'[REPLAY] Enhanced user prompt:\n{user_message.content}')


def handle_replay_internal_observation(
def handle_replay_internal_command_observation(
state: State, observation: ReplayInternalCmdOutputObservation
) -> AnalysisToolMetadata | None:
"""
Expand All @@ -126,63 +102,20 @@ def handle_replay_internal_observation(
assert user_message
state.extra_data['replay_enhance_observed'] = True

# Deserialize stringified result.
result: AnnotateResult = cast(
AnnotateResult, safe_parse_json(observation.content)
)

# Determine what initial-analysis did:
# Get metadata and enhance prompt.
if result and 'metadata' in result:
# New workflow: initial-analysis provided the metadata to allow tool use.
metadata, data = split_metadata(result)
prefix = ''
suffix = """
# Instructions
0. Take a look at below `Initial Analysis`, based on a recorded trace of the bug. Pay special attention to `IMPORTANT_NOTES`.
1. State the main problem statement. It MUST address `IMPORTANT_NOTES`. It must make sure that the application won't crash. It must fix the issue.
2. Propose a plan to fix or investigate with multiple options in order of priority.
3. Then use the `inspect-*` tools to investigate.
4. Once found, `submit-hypothesis`.
# Initial Analysis
""" + json.dumps(data, indent=2)
enhance_prompt(
user_message,
prefix,
suffix,
)
# initial-analysis provides metadata needed for tool use.
metadata, command_result = split_metadata(result)
replay_prompt_phase_analysis(command_result, user_message)
return metadata
elif result and result.get('annotatedRepo'):
# Old workflow: initial-analysis left hints in form of source code annotations.
annotated_repo_path = result.get('annotatedRepo', '')
comment_text = result.get('commentText', '')
react_component_name = result.get('reactComponentName', '')
console_error = result.get('consoleError', '')
# start_location = result.get('startLocation', '')
start_name = result.get('startName', '')

# TODO: Move this to a prompt template file.
if comment_text:
if react_component_name:
prefix = f'There is a change needed to the {react_component_name} component.\n'
else:
prefix = f'There is a change needed in {annotated_repo_path}:\n'
prefix += f'{comment_text}\n\n'
elif console_error:
prefix = f'There is a change needed in {annotated_repo_path} to fix a console error that has appeared unexpectedly:\n'
prefix += f'{console_error}\n\n'

prefix += '<IMPORTANT>\n'
prefix += 'Information about a reproduction of the problem is available in source comments.\n'
prefix += 'You must search for these comments and use them to get a better understanding of the problem.\n'
prefix += f'The first reproduction comment to search for is named {start_name}. Start your investigation there.\n'
prefix += '</IMPORTANT>\n'

enhance_prompt(user_message, prefix)
return None
else:
logger.warning(
f'[REPLAY] Replay observation cannot be interpreted. Observed content: {str(observation.content)}'
f'[REPLAY] Replay command result cannot be interpreted. Observed content: {str(observation.content)}'
)

return None
77 changes: 77 additions & 0 deletions openhands/replay/replay_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json

from openhands.core.logger import openhands_logger as logger
from openhands.events.action.message import MessageAction
from openhands.replay.replay_types import AnnotateResult


def enhance_prompt(user_message: MessageAction, prefix: str, suffix: str):
if prefix != '':
user_message.content = f'{prefix}\n\n{user_message.content}'
if suffix != '':
user_message.content = f'{user_message.content}\n\n{suffix}'
logger.info(f'[REPLAY] Enhanced user prompt:\n{user_message.content}')


def replay_prompt_phase_analysis(command_result: dict, user_message: MessageAction):
prefix = ''
suffix = """
# Instructions
0. Take a look at below `Initial Analysis`, based on a recorded trace of the bug. Pay special attention to `IMPORTANT_NOTES`.
1. State the main problem statement. It MUST address `IMPORTANT_NOTES`. It must make sure that the application won't crash. It must fix the issue.
2. Propose a plan to fix or investigate with multiple options in order of priority.
3. Then use the `inspect-*` tools to investigate.
4. Once found, `submit-hypothesis`.
# Initial Analysis
""" + json.dumps(command_result, indent=2)
return enhance_prompt(user_message, prefix, suffix)


def replay_prompt_phase_analysis_legacy(
command_result: AnnotateResult, user_message: MessageAction
):
# Old workflow: initial-analysis left hints in form of source code annotations.
annotated_repo_path = command_result.get('annotatedRepo', '')
comment_text = command_result.get('commentText', '')
react_component_name = command_result.get('reactComponentName', '')
console_error = command_result.get('consoleError', '')
# start_location = result.get('startLocation', '')
start_name = command_result.get('startName', '')

# TODO: Move this to a prompt template file.
if comment_text:
if react_component_name:
prefix = (
f'There is a change needed to the {react_component_name} component.\n'
)
else:
prefix = f'There is a change needed in {annotated_repo_path}:\n'
prefix += f'{comment_text}\n\n'
elif console_error:
prefix = f'There is a change needed in {annotated_repo_path} to fix a console error that has appeared unexpectedly:\n'
prefix += f'{console_error}\n\n'

prefix += '<IMPORTANT>\n'
prefix += 'Information about a reproduction of the problem is available in source comments.\n'
prefix += 'You must search for these comments and use them to get a better understanding of the problem.\n'
prefix += f'The first reproduction comment to search for is named {start_name}. Start your investigation there.\n'
prefix += '</IMPORTANT>\n'

suffix = ''

return enhance_prompt(user_message, prefix, suffix)


def replay_prompt_phase_edit():
# Tell the agent to stop analyzing and start editing:
return """
You have concluded the analysis.
IMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace. Start by answering these questions:
1. What is the goal of the investigation according to the initial prompt and initial analysis? IMPORTANT. PAY ATTENTION TO THIS. THIS IS THE ENTRY POINT OF EVERYTHING.
2. Given (1), is the hypothesis's `problem` description correct? Does it match the goal of the investigation?
3. Do the `editSuggestions` actually address the issue?
4. Rephrase the hypothesis so that it is consistent and correct.
"""
Loading

0 comments on commit f7e3d8c

Please sign in to comment.