|
26 | 26 | IPythonRunCellAction,
|
27 | 27 | MessageAction,
|
28 | 28 | )
|
29 |
| -from openhands.events.action.replay import ( |
30 |
| - ReplayPhaseUpdateAction, |
31 |
| - ReplayToolCmdRunAction, |
32 |
| -) |
33 | 29 | from openhands.events.tool import ToolCallMetadata
|
34 |
| - |
35 |
| -# --------------------------------------------------------- |
36 |
| -# Tool: inspect-data |
37 |
| -# --------------------------------------------------------- |
38 |
| -_REPLAY_INSPECT_DATA_DESCRIPTION = """ |
39 |
| -Explains value, data flow and origin information for `expression` at `point`. |
40 |
| -IMPORTANT: Prefer using inspect-data over inspect-point. |
41 |
| -""" |
42 |
| - |
43 |
| -ReplayInspectDataTool = ChatCompletionToolParam( |
44 |
| - type='function', |
45 |
| - function=ChatCompletionToolParamFunctionChunk( |
46 |
| - name='inspect-data', |
47 |
| - description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(), |
48 |
| - parameters={ |
49 |
| - 'type': 'object', |
50 |
| - 'properties': { |
51 |
| - 'expression': { |
52 |
| - 'type': 'string', |
53 |
| - 'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.', |
54 |
| - }, |
55 |
| - 'point': { |
56 |
| - 'type': 'string', |
57 |
| - 'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.', |
58 |
| - }, |
59 |
| - 'explanation': { |
60 |
| - 'type': 'string', |
61 |
| - 'description': 'Give a concise explanation as to why you take this investigative step.', |
62 |
| - }, |
63 |
| - 'explanation_source': { |
64 |
| - 'type': 'string', |
65 |
| - 'description': 'Explain which data you saw in the previous analysis results that informs this step.', |
66 |
| - }, |
67 |
| - }, |
68 |
| - 'required': ['expression', 'point', 'explanation', 'explanation_source'], |
69 |
| - }, |
70 |
| - ), |
71 |
| -) |
72 |
| - |
73 |
| -# --------------------------------------------------------- |
74 |
| -# Tool: inspect-point |
75 |
| -# --------------------------------------------------------- |
76 |
| -_REPLAY_INSPECT_POINT_DESCRIPTION = """ |
77 |
| -Explains dynamic control flow and data flow dependencies of the code at `point`. |
78 |
| -Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate. |
79 |
| -""" |
80 |
| - |
81 |
| -ReplayInspectPointTool = ChatCompletionToolParam( |
82 |
| - type='function', |
83 |
| - function=ChatCompletionToolParamFunctionChunk( |
84 |
| - name='inspect-point', |
85 |
| - description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(), |
86 |
| - parameters={ |
87 |
| - 'type': 'object', |
88 |
| - 'properties': { |
89 |
| - 'point': {'type': 'string'}, |
90 |
| - }, |
91 |
| - 'required': ['point'], |
92 |
| - }, |
93 |
| - ), |
94 |
| -) |
95 |
| - |
96 |
| -# --------------------------------------------------------- |
97 |
| -# Tool: SubmitHypothesis |
98 |
| -# TODO: Divide this into multiple steps - |
99 |
| -# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps. |
100 |
| -# 2. The second submission, after analysis has already concluded, must be as complete as possible. |
101 |
| -# --------------------------------------------------------- |
102 |
| -# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ |
103 |
| -# Your investigation has yielded a complete thin slice from symptom to root cause, |
104 |
| -# enough proof to let the `CodeEdit` agent take over to fix the bug. |
105 |
| -# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug, |
106 |
| -# based on evidence you have gathered. |
107 |
| -# """ |
108 |
| - |
109 |
| -# ReplaySubmitHypothesisTool = ChatCompletionToolParam( |
110 |
| -# type='function', |
111 |
| -# function=ChatCompletionToolParamFunctionChunk( |
112 |
| -# name='submit-hypothesis', |
113 |
| -# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), |
114 |
| -# parameters={ |
115 |
| -# 'type': 'object', |
116 |
| -# 'properties': { |
117 |
| -# 'rootCauseHypothesis': {'type': 'string'}, |
118 |
| -# 'thinSlice': { |
119 |
| -# 'type': 'array', |
120 |
| -# 'items': { |
121 |
| -# 'type': 'object', |
122 |
| -# 'properties': { |
123 |
| -# 'point': {'type': 'string'}, |
124 |
| -# 'code': {'type': 'string'}, |
125 |
| -# 'role': {'type': 'string'}, |
126 |
| -# }, |
127 |
| -# 'required': ['point', 'code', 'role'], |
128 |
| -# }, |
129 |
| -# }, |
130 |
| -# 'modifications': { |
131 |
| -# 'type': 'array', |
132 |
| -# 'items': { |
133 |
| -# 'type': 'object', |
134 |
| -# 'properties': { |
135 |
| -# 'kind': { |
136 |
| -# 'type': 'string', |
137 |
| -# 'enum': ['add', 'remove', 'modify'], |
138 |
| -# }, |
139 |
| -# 'newCode': {'type': 'string'}, |
140 |
| -# 'oldCode': {'type': 'string'}, |
141 |
| -# 'location': {'type': 'string'}, |
142 |
| -# 'point': {'type': 'string'}, |
143 |
| -# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation. |
144 |
| -# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue. |
145 |
| -# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176 |
146 |
| -# # 'line': {'type': 'number'}, |
147 |
| -# 'briefExplanation': {'type': 'string'}, |
148 |
| -# 'verificationProof': {'type': 'string'}, |
149 |
| -# }, |
150 |
| -# 'required': [ |
151 |
| -# 'kind', |
152 |
| -# 'location', |
153 |
| -# 'briefExplanation', |
154 |
| -# # 'line', |
155 |
| -# 'verificationProof', |
156 |
| -# ], |
157 |
| -# }, |
158 |
| -# }, |
159 |
| -# }, |
160 |
| -# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'], |
161 |
| -# }, |
162 |
| -# ), |
163 |
| -# ) |
164 |
| -_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """ |
165 |
| -# Use this tool to conclude your analysis and move on to code editing. |
166 |
| -# """ |
167 |
| - |
168 |
| -ReplaySubmitHypothesisTool = ChatCompletionToolParam( |
169 |
| - type='function', |
170 |
| - function=ChatCompletionToolParamFunctionChunk( |
171 |
| - name='submit-hypothesis', |
172 |
| - description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(), |
173 |
| - parameters={ |
174 |
| - 'type': 'object', |
175 |
| - 'properties': { |
176 |
| - 'problem': { |
177 |
| - 'type': 'string', |
178 |
| - 'description': 'One-sentence explanation of the core problem that this will solve.', |
179 |
| - }, |
180 |
| - 'rootCauseHypothesis': {'type': 'string'}, |
181 |
| - 'editSuggestions': { |
182 |
| - 'type': 'string', |
183 |
| - 'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.', |
184 |
| - }, |
185 |
| - }, |
186 |
| - 'required': ['rootCauseHypothesis'], |
187 |
| - }, |
188 |
| - ), |
| 30 | +from openhands.replay.replay_tools import ( |
| 31 | + get_replay_tools, |
| 32 | + handle_replay_tool_call, |
| 33 | + is_replay_tool, |
189 | 34 | )
|
190 | 35 |
|
191 |
| -REPLAY_TOOLS = ['inspect-data', 'inspect-point', 'submit-hypothesis'] |
192 |
| - |
193 |
| - |
194 | 36 | # ---------------------------------------------------------
|
195 | 37 | # OH default tools.
|
196 | 38 | # ---------------------------------------------------------
|
@@ -631,36 +473,8 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
|
631 | 473 | ) from e
|
632 | 474 | if tool_call.function.name == 'execute_bash':
|
633 | 475 | action = CmdRunAction(**arguments)
|
634 |
| - elif tool_call.function.name in REPLAY_TOOLS: |
635 |
| - logger.info( |
636 |
| - f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}' |
637 |
| - ) |
638 |
| - if tool_call.function.name == 'inspect-data': |
639 |
| - # Remove explanation props. |
640 |
| - arguments = { |
641 |
| - k: v for k, v in arguments.items() if 'explanation' not in k |
642 |
| - } |
643 |
| - action = ReplayToolCmdRunAction( |
644 |
| - command_name='inspect-data', |
645 |
| - command_args=arguments |
646 |
| - | {'recordingId': state.replay_recording_id}, |
647 |
| - ) |
648 |
| - elif tool_call.function.name == 'inspect-point': |
649 |
| - # if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment |
650 |
| - # raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.') |
651 |
| - action = ReplayToolCmdRunAction( |
652 |
| - command_name='inspect-point', |
653 |
| - command_args=arguments |
654 |
| - | {'recordingId': state.replay_recording_id}, |
655 |
| - ) |
656 |
| - elif tool_call.function.name == 'submit-hypothesis': |
657 |
| - action = ReplayPhaseUpdateAction( |
658 |
| - new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments) |
659 |
| - ) |
660 |
| - else: |
661 |
| - raise ValueError( |
662 |
| - f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}' |
663 |
| - ) |
| 476 | + elif is_replay_tool(tool_call.function.name): |
| 477 | + handle_replay_tool_call(tool_call, arguments, state) |
664 | 478 | elif tool_call.function.name == 'execute_ipython_cell':
|
665 | 479 | action = IPythonRunCellAction(**arguments)
|
666 | 480 | elif tool_call.function.name == 'delegate_to_browsing_agent':
|
@@ -727,31 +541,18 @@ def get_tools(
|
727 | 541 | codeact_enable_llm_editor: bool = False,
|
728 | 542 | codeact_enable_jupyter: bool = False,
|
729 | 543 | codeact_enable_replay: bool = False,
|
730 |
| - codeact_replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal, |
| 544 | + replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal, |
731 | 545 | ) -> list[ChatCompletionToolParam]:
|
732 | 546 | default_tools = get_default_tools(
|
733 | 547 | codeact_enable_browsing,
|
734 | 548 | codeact_enable_llm_editor,
|
735 | 549 | codeact_enable_jupyter,
|
736 | 550 | )
|
737 |
| - if not codeact_enable_replay or codeact_replay_phase == ReplayDebuggingPhase.Normal: |
| 551 | + if not codeact_enable_replay or replay_phase == ReplayDebuggingPhase.Normal: |
738 | 552 | # Use the default tools when not in a Replay-specific phase.
|
739 | 553 | return default_tools
|
740 | 554 |
|
741 | 555 | if codeact_enable_replay:
|
742 |
| - analysis_tools = [ |
743 |
| - ReplayInspectDataTool, |
744 |
| - ReplayInspectPointTool, |
745 |
| - ] |
746 |
| - if codeact_replay_phase == ReplayDebuggingPhase.Analysis: |
747 |
| - # Analysis tools only. This phase is concluded upon submit-hypothesis. |
748 |
| - tools = analysis_tools + [ReplaySubmitHypothesisTool] |
749 |
| - elif codeact_replay_phase == ReplayDebuggingPhase.Edit: |
750 |
| - # Combine default and analysis tools. |
751 |
| - tools = default_tools + analysis_tools |
752 |
| - else: |
753 |
| - raise ValueError( |
754 |
| - f'Unhandled ReplayDebuggingPhase in get_tools: {codeact_replay_phase}' |
755 |
| - ) |
| 556 | + tools = get_replay_tools(replay_phase, default_tools) |
756 | 557 |
|
757 | 558 | return tools
|
0 commit comments