From 8d6649c7e71e6e1ba049395724f6ce2e8aab2aaa Mon Sep 17 00:00:00 2001 From: "D. Seifert" Date: Tue, 21 Jan 2025 21:48:30 +0800 Subject: [PATCH] WIP: tool refactoring --- .../agenthub/codeact_agent/codeact_agent.py | 4 +- .../codeact_agent/function_calling.py | 217 +--------------- openhands/events/action/replay.py | 9 +- ...commands.py => replay_initial_analysis.py} | 25 +- openhands/replay/replay_prompts.py | 24 +- openhands/replay/replay_state_machine.py | 10 +- openhands/replay/replay_tools.py | 236 ++++++++++++++++++ openhands/replay/replay_types.py | 9 - 8 files changed, 285 insertions(+), 249 deletions(-) rename openhands/replay/{replay_commands.py => replay_initial_analysis.py} (82%) create mode 100644 openhands/replay/replay_tools.py diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 4e04f87443f1..7453c9df2d41 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -40,7 +40,7 @@ ) from openhands.events.serialization.event import truncate_content from openhands.llm.llm import LLM -from openhands.replay.replay_commands import replay_enhance_action +from openhands.replay.replay_initial_analysis import replay_enhance_action from openhands.replay.replay_state_machine import ( get_replay_observation_message, ) @@ -327,7 +327,7 @@ def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None: codeact_enable_jupyter=self.config.codeact_enable_jupyter, codeact_enable_llm_editor=self.config.codeact_enable_llm_editor, codeact_enable_replay=self.config.codeact_enable_replay, - codeact_replay_phase=phase, + replay_phase=phase, ) logger.debug( f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).' diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index 2fbd34f71282..16a7d1af848d 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -26,171 +26,13 @@ IPythonRunCellAction, MessageAction, ) -from openhands.events.action.replay import ( - ReplayPhaseUpdateAction, - ReplayToolCmdRunAction, -) from openhands.events.tool import ToolCallMetadata - -# --------------------------------------------------------- -# Tool: inspect-data -# --------------------------------------------------------- -_REPLAY_INSPECT_DATA_DESCRIPTION = """ -Explains value, data flow and origin information for `expression` at `point`. -IMPORTANT: Prefer using inspect-data over inspect-point. -""" - -ReplayInspectDataTool = ChatCompletionToolParam( - type='function', - function=ChatCompletionToolParamFunctionChunk( - name='inspect-data', - description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(), - parameters={ - 'type': 'object', - 'properties': { - 'expression': { - 'type': 'string', - 'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.', - }, - 'point': { - 'type': 'string', - 'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.', - }, - 'explanation': { - 'type': 'string', - 'description': 'Give a concise explanation as to why you take this investigative step.', - }, - 'explanation_source': { - 'type': 'string', - 'description': 'Explain which data you saw in the previous analysis results that informs this step.', - }, - }, - 'required': ['expression', 'point', 'explanation', 'explanation_source'], - }, - ), -) - -# --------------------------------------------------------- -# Tool: inspect-point -# --------------------------------------------------------- -_REPLAY_INSPECT_POINT_DESCRIPTION = """ -Explains dynamic control flow and data flow dependencies of the code at `point`. -Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate. -""" - -ReplayInspectPointTool = ChatCompletionToolParam( - type='function', - function=ChatCompletionToolParamFunctionChunk( - name='inspect-point', - description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(), - parameters={ - 'type': 'object', - 'properties': { - 'point': {'type': 'string'}, - }, - 'required': ['point'], - }, - ), -) - -# --------------------------------------------------------- -# Tool: SubmitHypothesis -# TODO: Divide this into multiple steps - -# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps. -# 2. The second submission, after analysis has already concluded, must be as complete as possible. -# --------------------------------------------------------- -# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ -# Your investigation has yielded a complete thin slice from symptom to root cause, -# enough proof to let the `CodeEdit` agent take over to fix the bug. -# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug, -# based on evidence you have gathered. -# """ - -# ReplaySubmitHypothesisTool = ChatCompletionToolParam( -# type='function', -# function=ChatCompletionToolParamFunctionChunk( -# name='submit-hypothesis', -# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), -# parameters={ -# 'type': 'object', -# 'properties': { -# 'rootCauseHypothesis': {'type': 'string'}, -# 'thinSlice': { -# 'type': 'array', -# 'items': { -# 'type': 'object', -# 'properties': { -# 'point': {'type': 'string'}, -# 'code': {'type': 'string'}, -# 'role': {'type': 'string'}, -# }, -# 'required': ['point', 'code', 'role'], -# }, -# }, -# 'modifications': { -# 'type': 'array', -# 'items': { -# 'type': 'object', -# 'properties': { -# 'kind': { -# 'type': 'string', -# 'enum': ['add', 'remove', 'modify'], -# }, -# 'newCode': {'type': 'string'}, -# 'oldCode': {'type': 'string'}, -# 'location': {'type': 'string'}, -# 'point': {'type': 'string'}, -# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation. -# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue. -# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176 -# # 'line': {'type': 'number'}, -# 'briefExplanation': {'type': 'string'}, -# 'verificationProof': {'type': 'string'}, -# }, -# 'required': [ -# 'kind', -# 'location', -# 'briefExplanation', -# # 'line', -# 'verificationProof', -# ], -# }, -# }, -# }, -# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'], -# }, -# ), -# ) -_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ -# Use this tool to conclude your analysis and move on to code editing. -# """ - -ReplaySubmitHypothesisTool = ChatCompletionToolParam( - type='function', - function=ChatCompletionToolParamFunctionChunk( - name='submit-hypothesis', - description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), - parameters={ - 'type': 'object', - 'properties': { - 'problem': { - 'type': 'string', - 'description': 'One-sentence explanation of the core problem that this will solve.', - }, - 'rootCauseHypothesis': {'type': 'string'}, - 'editSuggestions': { - 'type': 'string', - 'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.', - }, - }, - 'required': ['rootCauseHypothesis'], - }, - ), +from openhands.replay.replay_tools import ( + get_replay_tools, + handle_replay_tool_call, + is_replay_tool, ) -REPLAY_TOOLS = ['inspect-data', 'inspect-point', 'submit-hypothesis'] - - # --------------------------------------------------------- # OH default tools. # --------------------------------------------------------- @@ -631,36 +473,8 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]: ) from e if tool_call.function.name == 'execute_bash': action = CmdRunAction(**arguments) - elif tool_call.function.name in REPLAY_TOOLS: - logger.info( - f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}' - ) - if tool_call.function.name == 'inspect-data': - # Remove explanation props. - arguments = { - k: v for k, v in arguments.items() if 'explanation' not in k - } - action = ReplayToolCmdRunAction( - command_name='inspect-data', - command_args=arguments - | {'recordingId': state.replay_recording_id}, - ) - elif tool_call.function.name == 'inspect-point': - # if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment - # raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.') - action = ReplayToolCmdRunAction( - command_name='inspect-point', - command_args=arguments - | {'recordingId': state.replay_recording_id}, - ) - elif tool_call.function.name == 'submit-hypothesis': - action = ReplayPhaseUpdateAction( - new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments) - ) - else: - raise ValueError( - f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}' - ) + elif is_replay_tool(tool_call.function.name): + handle_replay_tool_call(tool_call, arguments, state) elif tool_call.function.name == 'execute_ipython_cell': action = IPythonRunCellAction(**arguments) elif tool_call.function.name == 'delegate_to_browsing_agent': @@ -727,31 +541,18 @@ def get_tools( codeact_enable_llm_editor: bool = False, codeact_enable_jupyter: bool = False, codeact_enable_replay: bool = False, - codeact_replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal, + replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal, ) -> list[ChatCompletionToolParam]: default_tools = get_default_tools( codeact_enable_browsing, codeact_enable_llm_editor, codeact_enable_jupyter, ) - if not codeact_enable_replay or codeact_replay_phase == ReplayDebuggingPhase.Normal: + if not codeact_enable_replay or replay_phase == ReplayDebuggingPhase.Normal: # Use the default tools when not in a Replay-specific phase. return default_tools if codeact_enable_replay: - analysis_tools = [ - ReplayInspectDataTool, - ReplayInspectPointTool, - ] - if codeact_replay_phase == ReplayDebuggingPhase.Analysis: - # Analysis tools only. This phase is concluded upon submit-hypothesis. - tools = analysis_tools + [ReplaySubmitHypothesisTool] - elif codeact_replay_phase == ReplayDebuggingPhase.Edit: - # Combine default and analysis tools. - tools = default_tools + analysis_tools - else: - raise ValueError( - f'Unhandled ReplayDebuggingPhase in get_tools: {codeact_replay_phase}' - ) + tools = get_replay_tools(replay_phase, default_tools) return tools diff --git a/openhands/events/action/replay.py b/openhands/events/action/replay.py index 0d0c2d0983cc..885b8f8eab1f 100644 --- a/openhands/events/action/replay.py +++ b/openhands/events/action/replay.py @@ -11,9 +11,14 @@ ) +@dataclass +class ReplayAction(Action): + pass + + # NOTE: We need the same class twice because a lot of the agent logic is based on isinstance checks. @dataclass -class ReplayCmdRunActionBase(Action): +class ReplayCmdRunActionBase(ReplayAction): # Name of the command in @replayapi/cli. command_name: str @@ -62,7 +67,7 @@ class ReplayToolCmdRunAction(ReplayCmdRunActionBase): @dataclass -class ReplayPhaseUpdateAction(Action): +class ReplayPhaseUpdateAction(ReplayAction): new_phase: ReplayDebuggingPhase thought: str = '' diff --git a/openhands/replay/replay_commands.py b/openhands/replay/replay_initial_analysis.py similarity index 82% rename from openhands/replay/replay_commands.py rename to openhands/replay/replay_initial_analysis.py index a5c037a69ff1..fe6fa1c6fa40 100644 --- a/openhands/replay/replay_commands.py +++ b/openhands/replay/replay_initial_analysis.py @@ -1,6 +1,6 @@ import json import re -from typing import Any, cast +from typing import Any, Tuple, cast from openhands.controller.state.state import State from openhands.core.logger import openhands_logger as logger @@ -9,7 +9,7 @@ from openhands.events.action.replay import ReplayInternalCmdRunAction from openhands.events.observation.replay import ReplayInternalCmdOutputObservation from openhands.replay.replay_prompts import replay_prompt_phase_analysis -from openhands.replay.replay_types import AnalysisToolMetadata, AnnotateResult +from openhands.replay.replay_types import AnalysisToolMetadata def scan_recording_id(issue: str) -> str | None: @@ -72,20 +72,23 @@ def safe_parse_json(text: str) -> dict[str, Any] | None: return None -def split_metadata(result): +def split_metadata(result: dict) -> Tuple[AnalysisToolMetadata, dict]: if 'metadata' not in result: return {}, result - metadata = result['metadata'] + metadata = cast(AnalysisToolMetadata, result['metadata']) data = dict(result) del data['metadata'] return metadata, data -def handle_replay_internal_command_observation( +def on_replay_internal_command_observation( state: State, observation: ReplayInternalCmdOutputObservation ) -> AnalysisToolMetadata | None: """ - Enhance the user prompt with the results of the replay analysis. + Handle result for an internally sent command (not agent tool use or user action). + + NOTE: Currently, the only internal command is the initial-analysis command. + Enhance the user prompt with the results of the initial analysis. Returns the metadata needed for the agent to switch to analysis tools. """ enhance_action_id = state.extra_data.get('replay_enhance_prompt_id') @@ -103,19 +106,19 @@ def handle_replay_internal_command_observation( state.extra_data['replay_enhance_observed'] = True # Deserialize stringified result. - result: AnnotateResult = cast( - AnnotateResult, safe_parse_json(observation.content) - ) + result = safe_parse_json(observation.content) # Get metadata and enhance prompt. if result and 'metadata' in result: # initial-analysis provides metadata needed for tool use. metadata, command_result = split_metadata(result) - replay_prompt_phase_analysis(command_result, user_message) + user_message.content = replay_prompt_phase_analysis( + command_result, user_message.content + ) return metadata else: logger.warning( - f'[REPLAY] Replay command result cannot be interpreted. Observed content: {str(observation.content)}' + f'[REPLAY] Replay command result missing metadata. Observed content: {str(observation.content)}' ) return None diff --git a/openhands/replay/replay_prompts.py b/openhands/replay/replay_prompts.py index c022249971b4..860c3ed3b155 100644 --- a/openhands/replay/replay_prompts.py +++ b/openhands/replay/replay_prompts.py @@ -1,19 +1,19 @@ import json from openhands.core.logger import openhands_logger as logger -from openhands.events.action.message import MessageAction -from openhands.replay.replay_types import AnnotateResult +from openhands.events.observation.replay import ReplayPhaseUpdateObservation -def enhance_prompt(user_message: MessageAction, prefix: str, suffix: str): +def enhance_prompt(prompt: str, prefix: str, suffix: str): if prefix != '': - user_message.content = f'{prefix}\n\n{user_message.content}' + prompt = f'{prefix}\n\n{prompt}' if suffix != '': - user_message.content = f'{user_message.content}\n\n{suffix}' - logger.info(f'[REPLAY] Enhanced user prompt:\n{user_message.content}') + prompt = f'{prompt}\n\n{suffix}' + logger.info(f'[REPLAY] Enhanced prompt:\n{prompt}') + return prompt -def replay_prompt_phase_analysis(command_result: dict, user_message: MessageAction): +def replay_prompt_phase_analysis(command_result: dict, prompt: str) -> str: prefix = '' suffix = """ # Instructions @@ -26,12 +26,10 @@ def replay_prompt_phase_analysis(command_result: dict, user_message: MessageActi # Initial Analysis """ + json.dumps(command_result, indent=2) - return enhance_prompt(user_message, prefix, suffix) + return enhance_prompt(prompt, prefix, suffix) -def replay_prompt_phase_analysis_legacy( - command_result: AnnotateResult, user_message: MessageAction -): +def replay_prompt_phase_analysis_legacy(command_result: dict, prompt: str) -> str: # Old workflow: initial-analysis left hints in form of source code annotations. annotated_repo_path = command_result.get('annotatedRepo', '') comment_text = command_result.get('commentText', '') @@ -61,10 +59,10 @@ def replay_prompt_phase_analysis_legacy( suffix = '' - return enhance_prompt(user_message, prefix, suffix) + return enhance_prompt(prompt, prefix, suffix) -def replay_prompt_phase_edit(): +def replay_prompt_phase_edit(obs: ReplayPhaseUpdateObservation) -> str: # Tell the agent to stop analyzing and start editing: return """ You have concluded the analysis. diff --git a/openhands/replay/replay_state_machine.py b/openhands/replay/replay_state_machine.py index 68f45b93be25..e8a9fab4b643 100644 --- a/openhands/replay/replay_state_machine.py +++ b/openhands/replay/replay_state_machine.py @@ -10,7 +10,9 @@ ReplayToolCmdOutputObservation, ) from openhands.events.serialization.event import truncate_content -from openhands.replay.replay_commands import handle_replay_internal_command_observation +from openhands.replay.replay_initial_analysis import ( + on_replay_internal_command_observation, +) from openhands.replay.replay_prompts import replay_prompt_phase_edit @@ -18,7 +20,7 @@ def on_replay_observation(obs: ReplayObservation, state: State, agent: Agent) -> """Handle the observation.""" if isinstance(obs, ReplayInternalCmdOutputObservation): # NOTE: Currently, the only internal command is the initial-analysis command. - analysis_tool_metadata = handle_replay_internal_command_observation(state, obs) + analysis_tool_metadata = on_replay_internal_command_observation(state, obs) if analysis_tool_metadata: # Start analysis phase state.replay_recording_id = analysis_tool_metadata['recordingId'] @@ -52,10 +54,10 @@ def get_replay_observation_message( elif isinstance(obs, ReplayPhaseUpdateObservation): new_phase = obs.new_phase if new_phase == ReplayDebuggingPhase.Edit: - text = replay_prompt_phase_edit() - message = Message(role='user', content=[TextContent(text=text)]) + text = replay_prompt_phase_edit(obs) else: raise NotImplementedError(f'Unhandled ReplayPhaseUpdateAction: {new_phase}') + message = Message(role='user', content=[TextContent(text=text)]) else: raise NotImplementedError( f"Unhandled observation type: {obs.__class__.__name__} ({getattr(obs, 'observation', None)})" diff --git a/openhands/replay/replay_tools.py b/openhands/replay/replay_tools.py new file mode 100644 index 000000000000..820e98efb0ca --- /dev/null +++ b/openhands/replay/replay_tools.py @@ -0,0 +1,236 @@ +"""This file contains the function calling implementation for different actions. + +This is similar to the functionality of `CodeActResponseParser`. +""" + +import json + +from litellm import ( + ChatCompletionMessageToolCall, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, +) + +from openhands.controller.state.state import State +from openhands.core.logger import openhands_logger as logger +from openhands.core.schema.replay import ReplayDebuggingPhase +from openhands.events.action.replay import ( + ReplayAction, + ReplayPhaseUpdateAction, + ReplayToolCmdRunAction, +) + + +class ReplayTool(ChatCompletionToolParam): + pass + + +def replay_tool(**kwargs): + f = ChatCompletionToolParamFunctionChunk(**kwargs) + return ReplayTool(type='function', function=f) + + +# --------------------------------------------------------- +# Tool: inspect-data +# --------------------------------------------------------- +_REPLAY_INSPECT_DATA_DESCRIPTION = """ +Explains value, data flow and origin information for `expression` at `point`. +IMPORTANT: Prefer using inspect-data over inspect-point. +""" + +ReplayInspectDataTool = replay_tool( + name='inspect-data', + description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(), + parameters={ + 'type': 'object', + 'properties': { + 'expression': { + 'type': 'string', + 'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.', + }, + 'point': { + 'type': 'string', + 'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.', + }, + 'explanation': { + 'type': 'string', + 'description': 'Give a concise explanation as to why you take this investigative step.', + }, + 'explanation_source': { + 'type': 'string', + 'description': 'Explain which data you saw in the previous analysis results that informs this step.', + }, + }, + 'required': ['expression', 'point', 'explanation', 'explanation_source'], + }, +) + +# --------------------------------------------------------- +# Tool: inspect-point +# --------------------------------------------------------- +_REPLAY_INSPECT_POINT_DESCRIPTION = """ +Explains dynamic control flow and data flow dependencies of the code at `point`. +Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate. +""" + +ReplayInspectPointTool = replay_tool( + name='inspect-point', + description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(), + parameters={ + 'type': 'object', + 'properties': { + 'point': {'type': 'string'}, + }, + 'required': ['point'], + }, +) + +# --------------------------------------------------------- +# Tool: SubmitHypothesis +# TODO: Divide this into multiple steps - +# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps. +# 2. The second submission, after analysis has already concluded, must be as complete as possible. +# --------------------------------------------------------- +# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ +# Your investigation has yielded a complete thin slice from symptom to root cause, +# enough proof to let the `CodeEdit` agent take over to fix the bug. +# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug, +# based on evidence you have gathered. +# """ + +# ReplaySubmitHypothesisTool = ReplayToolDefinition( +# name='submit-hypothesis', +# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), +# parameters={ +# 'type': 'object', +# 'properties': { +# 'rootCauseHypothesis': {'type': 'string'}, +# 'thinSlice': { +# 'type': 'array', +# 'items': { +# 'type': 'object', +# 'properties': { +# 'point': {'type': 'string'}, +# 'code': {'type': 'string'}, +# 'role': {'type': 'string'}, +# }, +# 'required': ['point', 'code', 'role'], +# }, +# }, +# 'modifications': { +# 'type': 'array', +# 'items': { +# 'type': 'object', +# 'properties': { +# 'kind': { +# 'type': 'string', +# 'enum': ['add', 'remove', 'modify'], +# }, +# 'newCode': {'type': 'string'}, +# 'oldCode': {'type': 'string'}, +# 'location': {'type': 'string'}, +# 'point': {'type': 'string'}, +# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation. +# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue. +# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176 +# # 'line': {'type': 'number'}, +# 'briefExplanation': {'type': 'string'}, +# 'verificationProof': {'type': 'string'}, +# }, +# 'required': [ +# 'kind', +# 'location', +# 'briefExplanation', +# # 'line', +# 'verificationProof', +# ], +# }, +# }, +# }, +# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'], +# }, +# ) +_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ +# Use this tool to conclude your analysis and move on to code editing. +# """ + +ReplaySubmitHypothesisTool = replay_tool( + name='submit-hypothesis', + description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), + parameters={ + 'type': 'object', + 'properties': { + 'problem': { + 'type': 'string', + 'description': 'One-sentence explanation of the core problem that this will solve.', + }, + 'rootCauseHypothesis': {'type': 'string'}, + 'editSuggestions': { + 'type': 'string', + 'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.', + }, + }, + 'required': ['rootCauseHypothesis'], + }, +) + +replay_tools: list[ReplayTool] = [ + ReplayInspectDataTool, + ReplayInspectPointTool, + ReplaySubmitHypothesisTool, +] +replay_tool_names: set[str] = set([t.function['name'] for t in replay_tools]) + + +def is_replay_tool(tool_name: str) -> bool: + return tool_name in replay_tool_names + + +def handle_replay_tool_call( + tool_call: ChatCompletionMessageToolCall, arguments: dict, state: State +) -> ReplayAction: + logger.info( + f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}' + ) + action: ReplayAction + if tool_call.function.name == 'inspect-data': + # Remove explanation props. + arguments = {k: v for k, v in arguments.items() if 'explanation' not in k} + action = ReplayToolCmdRunAction( + command_name='inspect-data', + command_args=arguments | {'recordingId': state.replay_recording_id}, + ) + elif tool_call.function.name == 'inspect-point': + # if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment + # raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.') + action = ReplayToolCmdRunAction( + command_name='inspect-point', + command_args=arguments | {'recordingId': state.replay_recording_id}, + ) + elif tool_call.function.name == 'submit-hypothesis': + action = ReplayPhaseUpdateAction( + new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments) + ) + else: + raise ValueError( + f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}' + ) + return action + + +def get_replay_tools( + replay_phase: ReplayDebuggingPhase, default_tools: list[ChatCompletionToolParam] +) -> list[ChatCompletionToolParam]: + analysis_tools = [ + ReplayInspectDataTool, + ReplayInspectPointTool, + ] + if replay_phase == ReplayDebuggingPhase.Analysis: + # Analysis tools only. This phase is concluded upon submit-hypothesis. + tools = analysis_tools + [ReplaySubmitHypothesisTool] + elif replay_phase == ReplayDebuggingPhase.Edit: + # Combine default and analysis tools. + tools = default_tools + analysis_tools + else: + raise ValueError(f'Unhandled ReplayDebuggingPhase in get_tools: {replay_phase}') + return tools diff --git a/openhands/replay/replay_types.py b/openhands/replay/replay_types.py index 2628da9e414f..213bc6d8ded0 100644 --- a/openhands/replay/replay_types.py +++ b/openhands/replay/replay_types.py @@ -8,12 +8,3 @@ class AnnotatedLocation(TypedDict, total=False): class AnalysisToolMetadata(TypedDict, total=False): recordingId: str - - -class AnnotateResult(TypedDict, total=False): - point: str - commentText: str | None - annotatedRepo: str | None - annotatedLocations: list[AnnotatedLocation] | None - pointLocation: str | None - metadata: AnalysisToolMetadata | None