Skip to content

Commit bef88b0

Browse files
authored
Added bolt samples (#15)
1 parent 7d7ea11 commit bef88b0

File tree

16 files changed

+281
-68
lines changed

16 files changed

+281
-68
lines changed

openhands/agenthub/codeact_agent/codeact_agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
import os
32
from collections import deque
43

@@ -269,7 +268,7 @@ def get_observation_message(
269268
new_phase = obs.new_phase
270269
if new_phase == ReplayDebuggingPhase.Edit:
271270
# Tell the agent to stop analyzing and start editing:
272-
text = "You have concluded the analysis. Review, then implement the hypothesized changes using the edit tools available to you. The code is available in the workspace. Don't stop. Fix the bug."
271+
text = "You have concluded the analysis.\nIMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace.\nIMPORTANT: Don't stop. Fix the bug.\nIMPORTANT: Don't stop. Fix the bug."
273272
message = Message(role='user', content=[TextContent(text=text)])
274273
else:
275274
raise NotImplementedError(
@@ -348,7 +347,8 @@ def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None:
348347
codeact_replay_phase=phase,
349348
)
350349
logger.debug(
351-
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}). New tools: {json.dumps(self.tools, indent=2)}'
350+
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).'
351+
# f'New tools: {json.dumps(self.tools, indent=2)}'
352352
)
353353

354354
def step(self, state: State) -> Action:

openhands/agenthub/codeact_agent/function_calling.py

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,75 @@
9595

9696
# ---------------------------------------------------------
9797
# Tool: SubmitHypothesis
98+
# TODO: Divide this into multiple steps -
99+
# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps.
100+
# 2. The second submission, after analysis has already concluded, must be as complete as possible.
98101
# ---------------------------------------------------------
102+
# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
103+
# Your investigation has yielded a complete thin slice from symptom to root cause,
104+
# enough proof to let the `CodeEdit` agent take over to fix the bug.
105+
# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
106+
# based on evidence you have gathered.
107+
# """
108+
109+
# ReplaySubmitHypothesisTool = ChatCompletionToolParam(
110+
# type='function',
111+
# function=ChatCompletionToolParamFunctionChunk(
112+
# name='submit-hypothesis',
113+
# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
114+
# parameters={
115+
# 'type': 'object',
116+
# 'properties': {
117+
# 'rootCauseHypothesis': {'type': 'string'},
118+
# 'thinSlice': {
119+
# 'type': 'array',
120+
# 'items': {
121+
# 'type': 'object',
122+
# 'properties': {
123+
# 'point': {'type': 'string'},
124+
# 'code': {'type': 'string'},
125+
# 'role': {'type': 'string'},
126+
# },
127+
# 'required': ['point', 'code', 'role'],
128+
# },
129+
# },
130+
# 'modifications': {
131+
# 'type': 'array',
132+
# 'items': {
133+
# 'type': 'object',
134+
# 'properties': {
135+
# 'kind': {
136+
# 'type': 'string',
137+
# 'enum': ['add', 'remove', 'modify'],
138+
# },
139+
# 'newCode': {'type': 'string'},
140+
# 'oldCode': {'type': 'string'},
141+
# 'location': {'type': 'string'},
142+
# 'point': {'type': 'string'},
143+
# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
144+
# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
145+
# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
146+
# # 'line': {'type': 'number'},
147+
# 'briefExplanation': {'type': 'string'},
148+
# 'verificationProof': {'type': 'string'},
149+
# },
150+
# 'required': [
151+
# 'kind',
152+
# 'location',
153+
# 'briefExplanation',
154+
# # 'line',
155+
# 'verificationProof',
156+
# ],
157+
# },
158+
# },
159+
# },
160+
# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
161+
# },
162+
# ),
163+
# )
99164
_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
100-
Your investigation has yielded a complete thin slice from symptom to root cause,
101-
enough proof to let the `CodeEdit` agent take over to fix the bug.
102-
DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
103-
based on evidence you have gathered.
104-
"""
165+
# Use this tool to conclude your analysis and move on to code editing.
166+
# """
105167

106168
ReplaySubmitHypothesisTool = ChatCompletionToolParam(
107169
type='function',
@@ -112,49 +174,12 @@
112174
'type': 'object',
113175
'properties': {
114176
'rootCauseHypothesis': {'type': 'string'},
115-
'thinSlice': {
116-
'type': 'array',
117-
'items': {
118-
'type': 'object',
119-
'properties': {
120-
'point': {'type': 'string'},
121-
'code': {'type': 'string'},
122-
'role': {'type': 'string'},
123-
},
124-
'required': ['point', 'code', 'role'],
125-
},
126-
},
127-
'modifications': {
128-
'type': 'array',
129-
'items': {
130-
'type': 'object',
131-
'properties': {
132-
'kind': {
133-
'type': 'string',
134-
'enum': ['add', 'remove', 'modify'],
135-
},
136-
'newCode': {'type': 'string'},
137-
'oldCode': {'type': 'string'},
138-
'location': {'type': 'string'},
139-
'point': {'type': 'string'},
140-
# NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
141-
# Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
142-
# see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
143-
# 'line': {'type': 'number'},
144-
'briefExplanation': {'type': 'string'},
145-
'verificationProof': {'type': 'string'},
146-
},
147-
'required': [
148-
'kind',
149-
'location',
150-
'briefExplanation',
151-
# 'line',
152-
'verificationProof',
153-
],
154-
},
177+
'editSuggestions': {
178+
'type': 'string',
179+
'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.',
155180
},
156181
},
157-
'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
182+
'required': ['rootCauseHypothesis'],
158183
},
159184
),
160185
)
@@ -626,7 +651,7 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
626651
)
627652
elif tool_call.function.name == 'submit-hypothesis':
628653
action = ReplayPhaseUpdateAction(
629-
new_phase=ReplayDebuggingPhase.Edit
654+
new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments)
630655
)
631656
else:
632657
raise ValueError(
@@ -713,11 +738,10 @@ def get_tools(
713738
analysis_tools = [
714739
ReplayInspectDataTool,
715740
ReplayInspectPointTool,
716-
ReplaySubmitHypothesisTool,
717741
]
718742
if codeact_replay_phase == ReplayDebuggingPhase.Analysis:
719743
# Analysis tools only. This phase is concluded upon submit-hypothesis.
720-
tools = analysis_tools
744+
tools = analysis_tools + [ReplaySubmitHypothesisTool]
721745
elif codeact_replay_phase == ReplayDebuggingPhase.Edit:
722746
# Combine default and analysis tools.
723747
tools = default_tools + analysis_tools

openhands/core/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,5 +284,6 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
284284
config=config,
285285
initial_user_action=initial_user_action,
286286
sid=sid,
287+
exit_on_message=True,
287288
)
288289
)

openhands/events/action/replay.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class ReplayPhaseUpdateAction(Action):
6666
new_phase: ReplayDebuggingPhase
6767

6868
thought: str = ''
69+
info: str = ''
6970

7071
action: str = ActionType.REPLAY_UPDATE_PHASE
7172
runnable: ClassVar[bool] = True
@@ -77,5 +78,5 @@ def message(self) -> str:
7778
return f'{self.__class__.__name__}: {self.new_phase}'
7879

7980
def __str__(self) -> str:
80-
ret = f'{self.message}'
81+
ret = f'[{self.message}] {self.info}'
8182
return ret

openhands/events/replay.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,6 @@ def command_annotate_execution_points(
2828
thought: str, is_workspace_repo: bool
2929
) -> ReplayInternalCmdRunAction:
3030
command_input: dict[str, Any] = dict()
31-
if is_workspace_repo:
32-
# NOTE: In the resolver workflow, the workdir path is equal to the repo path:
33-
# 1. We should not append the repo name to the path.
34-
# 2. The resolver also already hard-reset the repo, so forceDelete is not necessary.
35-
command_input['isWorkspaceRepoPath'] = True
36-
command_input['forceDelete'] = False
37-
else:
38-
command_input['isWorkspaceRepoPath'] = False
39-
command_input['forceDelete'] = True
4031
command_input['prompt'] = thought
4132

4233
action = ReplayInternalCmdRunAction(

openhands/runtime/base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,14 @@ def setup_initial_env(self) -> None:
140140
if self.config.sandbox.runtime_startup_env_vars:
141141
self.add_env_vars(self.config.sandbox.runtime_startup_env_vars)
142142

143-
logger.debug('Maybe adding replay env vars')
144143
if self.config.replay.api_key:
145-
self.add_env_vars({'REPLAY_API_KEY': self.config.replay.api_key})
146-
logger.debug('Added REPLAY_API_KEY to environment')
144+
self.add_env_vars(
145+
{
146+
'REPLAY_API_KEY': self.config.replay.api_key,
147+
'REPLAY_DEV_MODE': os.environ.get('REPLAY_DEV_MODE', ''),
148+
'REPLAY_ENABLE_TOOL_CACHE': os.environ.get('REPLAY_DEV_MODE', ''),
149+
}
150+
)
147151
if self.config.replay.dir:
148152
self.add_env_vars({'REPLAY_DIR': self.config.replay.dir})
149153
logger.debug('Added REPLAY_DIR to environment')

openhands/runtime/replay/replay_cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ async def run_action(
3030
command_args['recordingId'] = action.recording_id
3131
if action.session_id != '':
3232
command_args['sessionId'] = action.session_id
33-
if action.command_name == 'initial-analysis':
34-
# Hardcode the path for now. We won't need it in the long run.
35-
command_args['workspacePath'] = self.bash_session.workdir
33+
# if action.command_name == 'initial-analysis':
34+
# # Hardcode the path for now. We won't need it in the long run.
35+
# command_args['workspacePath'] = self.bash_session.workdir
3636

3737
with (
3838
tempfile.NamedTemporaryFile(

replay_benchmarks/bolt/945/prompt.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The deviations from the median for the total network data transferred are rendered incorrectly. They render as 0.0% when they should be something else.
2+
3+
Bug recording: https://app.replay.io/recording/replay-of-localhost8040--3e0a8f68-14e6-4809-bc72-dea0e0374c77?&point=25312447185420620431941338721681417
312 KB
Binary file not shown.

replay_benchmarks/bolt/946/prompt.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
The following bug has been discovered in a web app inside a browser.
2+
NOTE: Networking errors might be caused by CORS problems. Their fix usually requires server middleware.
3+
4+
This web app should render the results of a single prompt provided to two different models.
5+
The UI renders: `anthropic API error: Connection error`
6+
Fix it.
7+
8+
Bug recording: https://app.replay.io/recording/replay-of-localhost8040--f9dd7902-5188-45d2-9d2a-aa4d2e674592?point=29531188383192074852669071103623174&time=13949.366224168605&viewMode=dev
47.8 KB
Binary file not shown.

replay_benchmarks/bolt/951/prompt.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The React app renders a blank screen.
2+
3+
Bug recording: https://app.replay.io/recording/replay-of-localhost8040--43a890bc-6f37-47e0-ba47-4d04827e4e44
44 KB
Binary file not shown.

replay_benchmarks/bolt/run-bolt.sh

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright 2020-2025 Record Replay Inc.
2+
set -e
3+
4+
if [[ -z "$1" ]]; then
5+
echo "Usage: $0 <instance-id>"
6+
exit 1
7+
fi
8+
INSTANCE_ID=$1
9+
PROMPT_NAME="$2"
10+
11+
THIS_DIR="$(dirname "$0")"
12+
OH_ROOT="$THIS_DIR/.."
13+
OH_ROOT="$(node -e 'console.log(require("path").resolve(process.argv[1]))' $OH_ROOT)"
14+
if [[ -z "$TMP_DIR" ]]; then
15+
TMP_DIR="/tmp"
16+
fi
17+
TARGET_FOLDER="$TMP_DIR/bolt/$INSTANCE_ID"
18+
WORKSPACE_ROOT="$TARGET_FOLDER/workspace"
19+
INSTANCE_DIR="$THIS_DIR/$INSTANCE_ID"
20+
21+
if [[ ! -d "$INSTANCE_DIR" ]]; then
22+
echo -e "Instance directory \"$INSTANCE_DIR\" not found.\n"
23+
echo -e "Available instance folders:\n"
24+
# List all sub folders
25+
ls -1 -d $THIS_DIR/*/
26+
echo -e "\n"
27+
exit 1
28+
fi
29+
30+
31+
# Load prompt.
32+
if [[ -z "$PROMPT_NAME" ]]; then
33+
PROMPT_NAME="prompt"
34+
fi
35+
PROMPT_FILE="$INSTANCE_DIR/$PROMPT_NAME.md"
36+
if [[ ! -f "$PROMPT_FILE" ]]; then
37+
echo "Prompt file \"$PROMPT_FILE\" not found."
38+
exit 1
39+
fi
40+
PROMPT=$(cat $PROMPT_FILE)
41+
if [[ -z "$PROMPT" ]]; then
42+
echo "Prompt file found but was empty."
43+
exit 1
44+
fi
45+
46+
# (Re-load) source files.
47+
SOURCE_ZIP_FILE="$INSTANCE_DIR/source_code.zip"
48+
rm -rf $WORKSPACE_ROOT
49+
mkdir -p $WORKSPACE_ROOT
50+
if [[ -f "$SOURCE_ZIP_FILE" ]]; then
51+
unzip -q $SOURCE_ZIP_FILE -d $WORKSPACE_ROOT
52+
# If it only contains a single folder called "project", move it up.
53+
if [ -d "$WORKSPACE_ROOT/project" ] && [ $(ls -A "$WORKSPACE_ROOT" | wc -l) -eq 1 ]; then
54+
mv "$WORKSPACE_ROOT/project"/* "$WORKSPACE_ROOT"
55+
rm -rf "$WORKSPACE_ROOT/project"
56+
fi
57+
pushd $WORKSPACE_ROOT > /dev/null
58+
git init > /dev/null
59+
git add -A > /dev/null
60+
git commit -am "initial commit" > /dev/null
61+
popd > /dev/null
62+
echo "Workspace has been set up and git initialized."
63+
else
64+
echo "Running analysis WITHOUT source code..."
65+
fi
66+
67+
# Config overrides + sanity checks.
68+
export DEBUG=1
69+
export REPLAY_DEV_MODE=1
70+
export REPLAY_ENABLE_TOOL_CACHE=1
71+
export WORKSPACE_BASE="$WORKSPACE_ROOT"
72+
export LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"
73+
if [[ -z "$LLM_API_KEY" ]]; then
74+
if [[ -z "$ANTHROPIC_API_KEY" ]]; then
75+
echo "LLM_API_KEY or ANTHROPIC_API_KEY environment variable must be set."
76+
exit 1
77+
fi
78+
export LLM_API_KEY=$ANTHROPIC_API_KEY
79+
fi
80+
81+
# Logging.
82+
LOG_FILE="$TARGET_FOLDER/default.log"
83+
echo "WORKSPACE_ROOT: \"$WORKSPACE_ROOT\""
84+
echo "Logging to \"$LOG_FILE\"..."
85+
86+
# GO.
87+
cd $OH_ROOT
88+
poetry run python -m openhands.core.main -t "$PROMPT" \
89+
> "$LOG_FILE" 2>&1

0 commit comments

Comments
 (0)