Skip to content

Commit 8d6649c

Browse files
committed
WIP: tool refactoring
1 parent f7e3d8c commit 8d6649c

File tree

8 files changed

+285
-249
lines changed

8 files changed

+285
-249
lines changed

openhands/agenthub/codeact_agent/codeact_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
)
4141
from openhands.events.serialization.event import truncate_content
4242
from openhands.llm.llm import LLM
43-
from openhands.replay.replay_commands import replay_enhance_action
43+
from openhands.replay.replay_initial_analysis import replay_enhance_action
4444
from openhands.replay.replay_state_machine import (
4545
get_replay_observation_message,
4646
)
@@ -327,7 +327,7 @@ def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None:
327327
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
328328
codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
329329
codeact_enable_replay=self.config.codeact_enable_replay,
330-
codeact_replay_phase=phase,
330+
replay_phase=phase,
331331
)
332332
logger.debug(
333333
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).'

openhands/agenthub/codeact_agent/function_calling.py

Lines changed: 9 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -26,171 +26,13 @@
2626
IPythonRunCellAction,
2727
MessageAction,
2828
)
29-
from openhands.events.action.replay import (
30-
ReplayPhaseUpdateAction,
31-
ReplayToolCmdRunAction,
32-
)
3329
from openhands.events.tool import ToolCallMetadata
34-
35-
# ---------------------------------------------------------
36-
# Tool: inspect-data
37-
# ---------------------------------------------------------
38-
_REPLAY_INSPECT_DATA_DESCRIPTION = """
39-
Explains value, data flow and origin information for `expression` at `point`.
40-
IMPORTANT: Prefer using inspect-data over inspect-point.
41-
"""
42-
43-
ReplayInspectDataTool = ChatCompletionToolParam(
44-
type='function',
45-
function=ChatCompletionToolParamFunctionChunk(
46-
name='inspect-data',
47-
description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(),
48-
parameters={
49-
'type': 'object',
50-
'properties': {
51-
'expression': {
52-
'type': 'string',
53-
'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.',
54-
},
55-
'point': {
56-
'type': 'string',
57-
'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.',
58-
},
59-
'explanation': {
60-
'type': 'string',
61-
'description': 'Give a concise explanation as to why you take this investigative step.',
62-
},
63-
'explanation_source': {
64-
'type': 'string',
65-
'description': 'Explain which data you saw in the previous analysis results that informs this step.',
66-
},
67-
},
68-
'required': ['expression', 'point', 'explanation', 'explanation_source'],
69-
},
70-
),
71-
)
72-
73-
# ---------------------------------------------------------
74-
# Tool: inspect-point
75-
# ---------------------------------------------------------
76-
_REPLAY_INSPECT_POINT_DESCRIPTION = """
77-
Explains dynamic control flow and data flow dependencies of the code at `point`.
78-
Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate.
79-
"""
80-
81-
ReplayInspectPointTool = ChatCompletionToolParam(
82-
type='function',
83-
function=ChatCompletionToolParamFunctionChunk(
84-
name='inspect-point',
85-
description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(),
86-
parameters={
87-
'type': 'object',
88-
'properties': {
89-
'point': {'type': 'string'},
90-
},
91-
'required': ['point'],
92-
},
93-
),
94-
)
95-
96-
# ---------------------------------------------------------
97-
# Tool: SubmitHypothesis
98-
# TODO: Divide this into multiple steps -
99-
# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps.
100-
# 2. The second submission, after analysis has already concluded, must be as complete as possible.
101-
# ---------------------------------------------------------
102-
# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
103-
# Your investigation has yielded a complete thin slice from symptom to root cause,
104-
# enough proof to let the `CodeEdit` agent take over to fix the bug.
105-
# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
106-
# based on evidence you have gathered.
107-
# """
108-
109-
# ReplaySubmitHypothesisTool = ChatCompletionToolParam(
110-
# type='function',
111-
# function=ChatCompletionToolParamFunctionChunk(
112-
# name='submit-hypothesis',
113-
# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
114-
# parameters={
115-
# 'type': 'object',
116-
# 'properties': {
117-
# 'rootCauseHypothesis': {'type': 'string'},
118-
# 'thinSlice': {
119-
# 'type': 'array',
120-
# 'items': {
121-
# 'type': 'object',
122-
# 'properties': {
123-
# 'point': {'type': 'string'},
124-
# 'code': {'type': 'string'},
125-
# 'role': {'type': 'string'},
126-
# },
127-
# 'required': ['point', 'code', 'role'],
128-
# },
129-
# },
130-
# 'modifications': {
131-
# 'type': 'array',
132-
# 'items': {
133-
# 'type': 'object',
134-
# 'properties': {
135-
# 'kind': {
136-
# 'type': 'string',
137-
# 'enum': ['add', 'remove', 'modify'],
138-
# },
139-
# 'newCode': {'type': 'string'},
140-
# 'oldCode': {'type': 'string'},
141-
# 'location': {'type': 'string'},
142-
# 'point': {'type': 'string'},
143-
# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
144-
# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
145-
# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
146-
# # 'line': {'type': 'number'},
147-
# 'briefExplanation': {'type': 'string'},
148-
# 'verificationProof': {'type': 'string'},
149-
# },
150-
# 'required': [
151-
# 'kind',
152-
# 'location',
153-
# 'briefExplanation',
154-
# # 'line',
155-
# 'verificationProof',
156-
# ],
157-
# },
158-
# },
159-
# },
160-
# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
161-
# },
162-
# ),
163-
# )
164-
_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
165-
# Use this tool to conclude your analysis and move on to code editing.
166-
# """
167-
168-
ReplaySubmitHypothesisTool = ChatCompletionToolParam(
169-
type='function',
170-
function=ChatCompletionToolParamFunctionChunk(
171-
name='submit-hypothesis',
172-
description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
173-
parameters={
174-
'type': 'object',
175-
'properties': {
176-
'problem': {
177-
'type': 'string',
178-
'description': 'One-sentence explanation of the core problem that this will solve.',
179-
},
180-
'rootCauseHypothesis': {'type': 'string'},
181-
'editSuggestions': {
182-
'type': 'string',
183-
'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.',
184-
},
185-
},
186-
'required': ['rootCauseHypothesis'],
187-
},
188-
),
30+
from openhands.replay.replay_tools import (
31+
get_replay_tools,
32+
handle_replay_tool_call,
33+
is_replay_tool,
18934
)
19035

191-
REPLAY_TOOLS = ['inspect-data', 'inspect-point', 'submit-hypothesis']
192-
193-
19436
# ---------------------------------------------------------
19537
# OH default tools.
19638
# ---------------------------------------------------------
@@ -631,36 +473,8 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
631473
) from e
632474
if tool_call.function.name == 'execute_bash':
633475
action = CmdRunAction(**arguments)
634-
elif tool_call.function.name in REPLAY_TOOLS:
635-
logger.info(
636-
f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}'
637-
)
638-
if tool_call.function.name == 'inspect-data':
639-
# Remove explanation props.
640-
arguments = {
641-
k: v for k, v in arguments.items() if 'explanation' not in k
642-
}
643-
action = ReplayToolCmdRunAction(
644-
command_name='inspect-data',
645-
command_args=arguments
646-
| {'recordingId': state.replay_recording_id},
647-
)
648-
elif tool_call.function.name == 'inspect-point':
649-
# if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment
650-
# raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.')
651-
action = ReplayToolCmdRunAction(
652-
command_name='inspect-point',
653-
command_args=arguments
654-
| {'recordingId': state.replay_recording_id},
655-
)
656-
elif tool_call.function.name == 'submit-hypothesis':
657-
action = ReplayPhaseUpdateAction(
658-
new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments)
659-
)
660-
else:
661-
raise ValueError(
662-
f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}'
663-
)
476+
elif is_replay_tool(tool_call.function.name):
477+
handle_replay_tool_call(tool_call, arguments, state)
664478
elif tool_call.function.name == 'execute_ipython_cell':
665479
action = IPythonRunCellAction(**arguments)
666480
elif tool_call.function.name == 'delegate_to_browsing_agent':
@@ -727,31 +541,18 @@ def get_tools(
727541
codeact_enable_llm_editor: bool = False,
728542
codeact_enable_jupyter: bool = False,
729543
codeact_enable_replay: bool = False,
730-
codeact_replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal,
544+
replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal,
731545
) -> list[ChatCompletionToolParam]:
732546
default_tools = get_default_tools(
733547
codeact_enable_browsing,
734548
codeact_enable_llm_editor,
735549
codeact_enable_jupyter,
736550
)
737-
if not codeact_enable_replay or codeact_replay_phase == ReplayDebuggingPhase.Normal:
551+
if not codeact_enable_replay or replay_phase == ReplayDebuggingPhase.Normal:
738552
# Use the default tools when not in a Replay-specific phase.
739553
return default_tools
740554

741555
if codeact_enable_replay:
742-
analysis_tools = [
743-
ReplayInspectDataTool,
744-
ReplayInspectPointTool,
745-
]
746-
if codeact_replay_phase == ReplayDebuggingPhase.Analysis:
747-
# Analysis tools only. This phase is concluded upon submit-hypothesis.
748-
tools = analysis_tools + [ReplaySubmitHypothesisTool]
749-
elif codeact_replay_phase == ReplayDebuggingPhase.Edit:
750-
# Combine default and analysis tools.
751-
tools = default_tools + analysis_tools
752-
else:
753-
raise ValueError(
754-
f'Unhandled ReplayDebuggingPhase in get_tools: {codeact_replay_phase}'
755-
)
556+
tools = get_replay_tools(replay_phase, default_tools)
756557

757558
return tools

openhands/events/action/replay.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@
1111
)
1212

1313

14+
@dataclass
15+
class ReplayAction(Action):
16+
pass
17+
18+
1419
# NOTE: We need the same class twice because a lot of the agent logic is based on isinstance checks.
1520
@dataclass
16-
class ReplayCmdRunActionBase(Action):
21+
class ReplayCmdRunActionBase(ReplayAction):
1722
# Name of the command in @replayapi/cli.
1823
command_name: str
1924

@@ -62,7 +67,7 @@ class ReplayToolCmdRunAction(ReplayCmdRunActionBase):
6267

6368

6469
@dataclass
65-
class ReplayPhaseUpdateAction(Action):
70+
class ReplayPhaseUpdateAction(ReplayAction):
6671
new_phase: ReplayDebuggingPhase
6772

6873
thought: str = ''

openhands/replay/replay_commands.py renamed to openhands/replay/replay_initial_analysis.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
import re
3-
from typing import Any, cast
3+
from typing import Any, Tuple, cast
44

55
from openhands.controller.state.state import State
66
from openhands.core.logger import openhands_logger as logger
@@ -9,7 +9,7 @@
99
from openhands.events.action.replay import ReplayInternalCmdRunAction
1010
from openhands.events.observation.replay import ReplayInternalCmdOutputObservation
1111
from openhands.replay.replay_prompts import replay_prompt_phase_analysis
12-
from openhands.replay.replay_types import AnalysisToolMetadata, AnnotateResult
12+
from openhands.replay.replay_types import AnalysisToolMetadata
1313

1414

1515
def scan_recording_id(issue: str) -> str | None:
@@ -72,20 +72,23 @@ def safe_parse_json(text: str) -> dict[str, Any] | None:
7272
return None
7373

7474

75-
def split_metadata(result):
75+
def split_metadata(result: dict) -> Tuple[AnalysisToolMetadata, dict]:
7676
if 'metadata' not in result:
7777
return {}, result
78-
metadata = result['metadata']
78+
metadata = cast(AnalysisToolMetadata, result['metadata'])
7979
data = dict(result)
8080
del data['metadata']
8181
return metadata, data
8282

8383

84-
def handle_replay_internal_command_observation(
84+
def on_replay_internal_command_observation(
8585
state: State, observation: ReplayInternalCmdOutputObservation
8686
) -> AnalysisToolMetadata | None:
8787
"""
88-
Enhance the user prompt with the results of the replay analysis.
88+
Handle result for an internally sent command (not agent tool use or user action).
89+
90+
NOTE: Currently, the only internal command is the initial-analysis command.
91+
Enhance the user prompt with the results of the initial analysis.
8992
Returns the metadata needed for the agent to switch to analysis tools.
9093
"""
9194
enhance_action_id = state.extra_data.get('replay_enhance_prompt_id')
@@ -103,19 +106,19 @@ def handle_replay_internal_command_observation(
103106
state.extra_data['replay_enhance_observed'] = True
104107

105108
# Deserialize stringified result.
106-
result: AnnotateResult = cast(
107-
AnnotateResult, safe_parse_json(observation.content)
108-
)
109+
result = safe_parse_json(observation.content)
109110

110111
# Get metadata and enhance prompt.
111112
if result and 'metadata' in result:
112113
# initial-analysis provides metadata needed for tool use.
113114
metadata, command_result = split_metadata(result)
114-
replay_prompt_phase_analysis(command_result, user_message)
115+
user_message.content = replay_prompt_phase_analysis(
116+
command_result, user_message.content
117+
)
115118
return metadata
116119
else:
117120
logger.warning(
118-
f'[REPLAY] Replay command result cannot be interpreted. Observed content: {str(observation.content)}'
121+
f'[REPLAY] Replay command result missing metadata. Observed content: {str(observation.content)}'
119122
)
120123

121124
return None

0 commit comments

Comments
 (0)