Skip to content

Commit

Permalink
WIP: tool refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Domiii committed Jan 21, 2025
1 parent f7e3d8c commit 8d6649c
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 249 deletions.
4 changes: 2 additions & 2 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
)
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.replay.replay_commands import replay_enhance_action
from openhands.replay.replay_initial_analysis import replay_enhance_action
from openhands.replay.replay_state_machine import (
get_replay_observation_message,
)
Expand Down Expand Up @@ -327,7 +327,7 @@ def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None:
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
codeact_enable_replay=self.config.codeact_enable_replay,
codeact_replay_phase=phase,
replay_phase=phase,
)
logger.debug(
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).'
Expand Down
217 changes: 9 additions & 208 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,171 +26,13 @@
IPythonRunCellAction,
MessageAction,
)
from openhands.events.action.replay import (
ReplayPhaseUpdateAction,
ReplayToolCmdRunAction,
)
from openhands.events.tool import ToolCallMetadata

# ---------------------------------------------------------
# Tool: inspect-data
# ---------------------------------------------------------
_REPLAY_INSPECT_DATA_DESCRIPTION = """
Explains value, data flow and origin information for `expression` at `point`.
IMPORTANT: Prefer using inspect-data over inspect-point.
"""

ReplayInspectDataTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='inspect-data',
description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'expression': {
'type': 'string',
'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.',
},
'point': {
'type': 'string',
'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.',
},
'explanation': {
'type': 'string',
'description': 'Give a concise explanation as to why you take this investigative step.',
},
'explanation_source': {
'type': 'string',
'description': 'Explain which data you saw in the previous analysis results that informs this step.',
},
},
'required': ['expression', 'point', 'explanation', 'explanation_source'],
},
),
)

# ---------------------------------------------------------
# Tool: inspect-point
# ---------------------------------------------------------
_REPLAY_INSPECT_POINT_DESCRIPTION = """
Explains dynamic control flow and data flow dependencies of the code at `point`.
Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate.
"""

ReplayInspectPointTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='inspect-point',
description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'point': {'type': 'string'},
},
'required': ['point'],
},
),
)

# ---------------------------------------------------------
# Tool: SubmitHypothesis
# TODO: Divide this into multiple steps -
# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps.
# 2. The second submission, after analysis has already concluded, must be as complete as possible.
# ---------------------------------------------------------
# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
# Your investigation has yielded a complete thin slice from symptom to root cause,
# enough proof to let the `CodeEdit` agent take over to fix the bug.
# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
# based on evidence you have gathered.
# """

# ReplaySubmitHypothesisTool = ChatCompletionToolParam(
# type='function',
# function=ChatCompletionToolParamFunctionChunk(
# name='submit-hypothesis',
# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
# parameters={
# 'type': 'object',
# 'properties': {
# 'rootCauseHypothesis': {'type': 'string'},
# 'thinSlice': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'point': {'type': 'string'},
# 'code': {'type': 'string'},
# 'role': {'type': 'string'},
# },
# 'required': ['point', 'code', 'role'],
# },
# },
# 'modifications': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'kind': {
# 'type': 'string',
# 'enum': ['add', 'remove', 'modify'],
# },
# 'newCode': {'type': 'string'},
# 'oldCode': {'type': 'string'},
# 'location': {'type': 'string'},
# 'point': {'type': 'string'},
# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
# # 'line': {'type': 'number'},
# 'briefExplanation': {'type': 'string'},
# 'verificationProof': {'type': 'string'},
# },
# 'required': [
# 'kind',
# 'location',
# 'briefExplanation',
# # 'line',
# 'verificationProof',
# ],
# },
# },
# },
# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
# },
# ),
# )
_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
# Use this tool to conclude your analysis and move on to code editing.
# """

ReplaySubmitHypothesisTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='submit-hypothesis',
description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'problem': {
'type': 'string',
'description': 'One-sentence explanation of the core problem that this will solve.',
},
'rootCauseHypothesis': {'type': 'string'},
'editSuggestions': {
'type': 'string',
'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.',
},
},
'required': ['rootCauseHypothesis'],
},
),
from openhands.replay.replay_tools import (
get_replay_tools,
handle_replay_tool_call,
is_replay_tool,
)

REPLAY_TOOLS = ['inspect-data', 'inspect-point', 'submit-hypothesis']


# ---------------------------------------------------------
# OH default tools.
# ---------------------------------------------------------
Expand Down Expand Up @@ -631,36 +473,8 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
) from e
if tool_call.function.name == 'execute_bash':
action = CmdRunAction(**arguments)
elif tool_call.function.name in REPLAY_TOOLS:
logger.info(
f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}'
)
if tool_call.function.name == 'inspect-data':
# Remove explanation props.
arguments = {
k: v for k, v in arguments.items() if 'explanation' not in k
}
action = ReplayToolCmdRunAction(
command_name='inspect-data',
command_args=arguments
| {'recordingId': state.replay_recording_id},
)
elif tool_call.function.name == 'inspect-point':
# if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment
# raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.')
action = ReplayToolCmdRunAction(
command_name='inspect-point',
command_args=arguments
| {'recordingId': state.replay_recording_id},
)
elif tool_call.function.name == 'submit-hypothesis':
action = ReplayPhaseUpdateAction(
new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments)
)
else:
raise ValueError(
f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}'
)
elif is_replay_tool(tool_call.function.name):
handle_replay_tool_call(tool_call, arguments, state)
elif tool_call.function.name == 'execute_ipython_cell':
action = IPythonRunCellAction(**arguments)
elif tool_call.function.name == 'delegate_to_browsing_agent':
Expand Down Expand Up @@ -727,31 +541,18 @@ def get_tools(
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
codeact_enable_replay: bool = False,
codeact_replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal,
replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal,
) -> list[ChatCompletionToolParam]:
default_tools = get_default_tools(
codeact_enable_browsing,
codeact_enable_llm_editor,
codeact_enable_jupyter,
)
if not codeact_enable_replay or codeact_replay_phase == ReplayDebuggingPhase.Normal:
if not codeact_enable_replay or replay_phase == ReplayDebuggingPhase.Normal:
# Use the default tools when not in a Replay-specific phase.
return default_tools

if codeact_enable_replay:
analysis_tools = [
ReplayInspectDataTool,
ReplayInspectPointTool,
]
if codeact_replay_phase == ReplayDebuggingPhase.Analysis:
# Analysis tools only. This phase is concluded upon submit-hypothesis.
tools = analysis_tools + [ReplaySubmitHypothesisTool]
elif codeact_replay_phase == ReplayDebuggingPhase.Edit:
# Combine default and analysis tools.
tools = default_tools + analysis_tools
else:
raise ValueError(
f'Unhandled ReplayDebuggingPhase in get_tools: {codeact_replay_phase}'
)
tools = get_replay_tools(replay_phase, default_tools)

return tools
9 changes: 7 additions & 2 deletions openhands/events/action/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
)


@dataclass
class ReplayAction(Action):
pass


# NOTE: We need the same class twice because a lot of the agent logic is based on isinstance checks.
@dataclass
class ReplayCmdRunActionBase(Action):
class ReplayCmdRunActionBase(ReplayAction):
# Name of the command in @replayapi/cli.
command_name: str

Expand Down Expand Up @@ -62,7 +67,7 @@ class ReplayToolCmdRunAction(ReplayCmdRunActionBase):


@dataclass
class ReplayPhaseUpdateAction(Action):
class ReplayPhaseUpdateAction(ReplayAction):
new_phase: ReplayDebuggingPhase

thought: str = ''
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import re
from typing import Any, cast
from typing import Any, Tuple, cast

from openhands.controller.state.state import State
from openhands.core.logger import openhands_logger as logger
Expand All @@ -9,7 +9,7 @@
from openhands.events.action.replay import ReplayInternalCmdRunAction
from openhands.events.observation.replay import ReplayInternalCmdOutputObservation
from openhands.replay.replay_prompts import replay_prompt_phase_analysis
from openhands.replay.replay_types import AnalysisToolMetadata, AnnotateResult
from openhands.replay.replay_types import AnalysisToolMetadata


def scan_recording_id(issue: str) -> str | None:
Expand Down Expand Up @@ -72,20 +72,23 @@ def safe_parse_json(text: str) -> dict[str, Any] | None:
return None


def split_metadata(result):
def split_metadata(result: dict) -> Tuple[AnalysisToolMetadata, dict]:
if 'metadata' not in result:
return {}, result
metadata = result['metadata']
metadata = cast(AnalysisToolMetadata, result['metadata'])
data = dict(result)
del data['metadata']
return metadata, data


def handle_replay_internal_command_observation(
def on_replay_internal_command_observation(
state: State, observation: ReplayInternalCmdOutputObservation
) -> AnalysisToolMetadata | None:
"""
Enhance the user prompt with the results of the replay analysis.
Handle result for an internally sent command (not agent tool use or user action).
NOTE: Currently, the only internal command is the initial-analysis command.
Enhance the user prompt with the results of the initial analysis.
Returns the metadata needed for the agent to switch to analysis tools.
"""
enhance_action_id = state.extra_data.get('replay_enhance_prompt_id')
Expand All @@ -103,19 +106,19 @@ def handle_replay_internal_command_observation(
state.extra_data['replay_enhance_observed'] = True

# Deserialize stringified result.
result: AnnotateResult = cast(
AnnotateResult, safe_parse_json(observation.content)
)
result = safe_parse_json(observation.content)

# Get metadata and enhance prompt.
if result and 'metadata' in result:
# initial-analysis provides metadata needed for tool use.
metadata, command_result = split_metadata(result)
replay_prompt_phase_analysis(command_result, user_message)
user_message.content = replay_prompt_phase_analysis(
command_result, user_message.content
)
return metadata
else:
logger.warning(
f'[REPLAY] Replay command result cannot be interpreted. Observed content: {str(observation.content)}'
f'[REPLAY] Replay command result missing metadata. Observed content: {str(observation.content)}'
)

return None
Loading

0 comments on commit 8d6649c

Please sign in to comment.