Skip to content

Commit

Permalink
Added bolt samples (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
Domiii authored Jan 18, 2025
1 parent 7d7ea11 commit bef88b0
Show file tree
Hide file tree
Showing 16 changed files with 281 additions and 68 deletions.
6 changes: 3 additions & 3 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
from collections import deque

Expand Down Expand Up @@ -269,7 +268,7 @@ def get_observation_message(
new_phase = obs.new_phase
if new_phase == ReplayDebuggingPhase.Edit:
# Tell the agent to stop analyzing and start editing:
text = "You have concluded the analysis. Review, then implement the hypothesized changes using the edit tools available to you. The code is available in the workspace. Don't stop. Fix the bug."
text = "You have concluded the analysis.\nIMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace.\nIMPORTANT: Don't stop. Fix the bug.\nIMPORTANT: Don't stop. Fix the bug."
message = Message(role='user', content=[TextContent(text=text)])
else:
raise NotImplementedError(
Expand Down Expand Up @@ -348,7 +347,8 @@ def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None:
codeact_replay_phase=phase,
)
logger.debug(
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}). New tools: {json.dumps(self.tools, indent=2)}'
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).'
# f'New tools: {json.dumps(self.tools, indent=2)}'
)

def step(self, state: State) -> Action:
Expand Down
122 changes: 73 additions & 49 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,75 @@

# ---------------------------------------------------------
# Tool: SubmitHypothesis
# TODO: Divide this into multiple steps -
# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps.
# 2. The second submission, after analysis has already concluded, must be as complete as possible.
# ---------------------------------------------------------
# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
# Your investigation has yielded a complete thin slice from symptom to root cause,
# enough proof to let the `CodeEdit` agent take over to fix the bug.
# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
# based on evidence you have gathered.
# """

# ReplaySubmitHypothesisTool = ChatCompletionToolParam(
# type='function',
# function=ChatCompletionToolParamFunctionChunk(
# name='submit-hypothesis',
# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
# parameters={
# 'type': 'object',
# 'properties': {
# 'rootCauseHypothesis': {'type': 'string'},
# 'thinSlice': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'point': {'type': 'string'},
# 'code': {'type': 'string'},
# 'role': {'type': 'string'},
# },
# 'required': ['point', 'code', 'role'],
# },
# },
# 'modifications': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'kind': {
# 'type': 'string',
# 'enum': ['add', 'remove', 'modify'],
# },
# 'newCode': {'type': 'string'},
# 'oldCode': {'type': 'string'},
# 'location': {'type': 'string'},
# 'point': {'type': 'string'},
# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
# # 'line': {'type': 'number'},
# 'briefExplanation': {'type': 'string'},
# 'verificationProof': {'type': 'string'},
# },
# 'required': [
# 'kind',
# 'location',
# 'briefExplanation',
# # 'line',
# 'verificationProof',
# ],
# },
# },
# },
# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
# },
# ),
# )
_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
Your investigation has yielded a complete thin slice from symptom to root cause,
enough proof to let the `CodeEdit` agent take over to fix the bug.
DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
based on evidence you have gathered.
"""
# Use this tool to conclude your analysis and move on to code editing.
# """

ReplaySubmitHypothesisTool = ChatCompletionToolParam(
type='function',
Expand All @@ -112,49 +174,12 @@
'type': 'object',
'properties': {
'rootCauseHypothesis': {'type': 'string'},
'thinSlice': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'point': {'type': 'string'},
'code': {'type': 'string'},
'role': {'type': 'string'},
},
'required': ['point', 'code', 'role'],
},
},
'modifications': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'kind': {
'type': 'string',
'enum': ['add', 'remove', 'modify'],
},
'newCode': {'type': 'string'},
'oldCode': {'type': 'string'},
'location': {'type': 'string'},
'point': {'type': 'string'},
# NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
# Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
# see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
# 'line': {'type': 'number'},
'briefExplanation': {'type': 'string'},
'verificationProof': {'type': 'string'},
},
'required': [
'kind',
'location',
'briefExplanation',
# 'line',
'verificationProof',
],
},
'editSuggestions': {
'type': 'string',
'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.',
},
},
'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
'required': ['rootCauseHypothesis'],
},
),
)
Expand Down Expand Up @@ -626,7 +651,7 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
)
elif tool_call.function.name == 'submit-hypothesis':
action = ReplayPhaseUpdateAction(
new_phase=ReplayDebuggingPhase.Edit
new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments)
)
else:
raise ValueError(
Expand Down Expand Up @@ -713,11 +738,10 @@ def get_tools(
analysis_tools = [
ReplayInspectDataTool,
ReplayInspectPointTool,
ReplaySubmitHypothesisTool,
]
if codeact_replay_phase == ReplayDebuggingPhase.Analysis:
# Analysis tools only. This phase is concluded upon submit-hypothesis.
tools = analysis_tools
tools = analysis_tools + [ReplaySubmitHypothesisTool]
elif codeact_replay_phase == ReplayDebuggingPhase.Edit:
# Combine default and analysis tools.
tools = default_tools + analysis_tools
Expand Down
1 change: 1 addition & 0 deletions openhands/core/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,5 +284,6 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
config=config,
initial_user_action=initial_user_action,
sid=sid,
exit_on_message=True,
)
)
3 changes: 2 additions & 1 deletion openhands/events/action/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class ReplayPhaseUpdateAction(Action):
new_phase: ReplayDebuggingPhase

thought: str = ''
info: str = ''

action: str = ActionType.REPLAY_UPDATE_PHASE
runnable: ClassVar[bool] = True
Expand All @@ -77,5 +78,5 @@ def message(self) -> str:
return f'{self.__class__.__name__}: {self.new_phase}'

def __str__(self) -> str:
ret = f'{self.message}'
ret = f'[{self.message}] {self.info}'
return ret
9 changes: 0 additions & 9 deletions openhands/events/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,6 @@ def command_annotate_execution_points(
thought: str, is_workspace_repo: bool
) -> ReplayInternalCmdRunAction:
command_input: dict[str, Any] = dict()
if is_workspace_repo:
# NOTE: In the resolver workflow, the workdir path is equal to the repo path:
# 1. We should not append the repo name to the path.
# 2. The resolver also already hard-reset the repo, so forceDelete is not necessary.
command_input['isWorkspaceRepoPath'] = True
command_input['forceDelete'] = False
else:
command_input['isWorkspaceRepoPath'] = False
command_input['forceDelete'] = True
command_input['prompt'] = thought

action = ReplayInternalCmdRunAction(
Expand Down
10 changes: 7 additions & 3 deletions openhands/runtime/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,14 @@ def setup_initial_env(self) -> None:
if self.config.sandbox.runtime_startup_env_vars:
self.add_env_vars(self.config.sandbox.runtime_startup_env_vars)

logger.debug('Maybe adding replay env vars')
if self.config.replay.api_key:
self.add_env_vars({'REPLAY_API_KEY': self.config.replay.api_key})
logger.debug('Added REPLAY_API_KEY to environment')
self.add_env_vars(
{
'REPLAY_API_KEY': self.config.replay.api_key,
'REPLAY_DEV_MODE': os.environ.get('REPLAY_DEV_MODE', ''),
'REPLAY_ENABLE_TOOL_CACHE': os.environ.get('REPLAY_DEV_MODE', ''),
}
)
if self.config.replay.dir:
self.add_env_vars({'REPLAY_DIR': self.config.replay.dir})
logger.debug('Added REPLAY_DIR to environment')
Expand Down
6 changes: 3 additions & 3 deletions openhands/runtime/replay/replay_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ async def run_action(
command_args['recordingId'] = action.recording_id
if action.session_id != '':
command_args['sessionId'] = action.session_id
if action.command_name == 'initial-analysis':
# Hardcode the path for now. We won't need it in the long run.
command_args['workspacePath'] = self.bash_session.workdir
# if action.command_name == 'initial-analysis':
# # Hardcode the path for now. We won't need it in the long run.
# command_args['workspacePath'] = self.bash_session.workdir

with (
tempfile.NamedTemporaryFile(
Expand Down
3 changes: 3 additions & 0 deletions replay_benchmarks/bolt/945/prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The deviations from the median for the total network data transferred are rendered incorrectly. They render as 0.0% when they should be something else.

Bug recording: https://app.replay.io/recording/replay-of-localhost8040--3e0a8f68-14e6-4809-bc72-dea0e0374c77?&point=25312447185420620431941338721681417
Binary file added replay_benchmarks/bolt/945/source_code.zip
Binary file not shown.
8 changes: 8 additions & 0 deletions replay_benchmarks/bolt/946/prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
The following bug has been discovered in a web app inside a browser.
NOTE: Networking errors might be caused by CORS problems. Their fix usually requires server middleware.

This web app should render the results of a single prompt provided to two different models.
The UI renders: `anthropic API error: Connection error`
Fix it.

Bug recording: https://app.replay.io/recording/replay-of-localhost8040--f9dd7902-5188-45d2-9d2a-aa4d2e674592?point=29531188383192074852669071103623174&time=13949.366224168605&viewMode=dev
Binary file added replay_benchmarks/bolt/946/source_code.zip
Binary file not shown.
3 changes: 3 additions & 0 deletions replay_benchmarks/bolt/951/prompt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The React app renders a blank screen.

Bug recording: https://app.replay.io/recording/replay-of-localhost8040--43a890bc-6f37-47e0-ba47-4d04827e4e44
Binary file added replay_benchmarks/bolt/951/source_code.zip
Binary file not shown.
89 changes: 89 additions & 0 deletions replay_benchmarks/bolt/run-bolt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2020-2025 Record Replay Inc.
set -e

if [[ -z "$1" ]]; then
echo "Usage: $0 <instance-id>"
exit 1
fi
INSTANCE_ID=$1
PROMPT_NAME="$2"

THIS_DIR="$(dirname "$0")"
OH_ROOT="$THIS_DIR/.."
OH_ROOT="$(node -e 'console.log(require("path").resolve(process.argv[1]))' $OH_ROOT)"
if [[ -z "$TMP_DIR" ]]; then
TMP_DIR="/tmp"
fi
TARGET_FOLDER="$TMP_DIR/bolt/$INSTANCE_ID"
WORKSPACE_ROOT="$TARGET_FOLDER/workspace"
INSTANCE_DIR="$THIS_DIR/$INSTANCE_ID"

if [[ ! -d "$INSTANCE_DIR" ]]; then
echo -e "Instance directory \"$INSTANCE_DIR\" not found.\n"
echo -e "Available instance folders:\n"
# List all sub folders
ls -1 -d $THIS_DIR/*/
echo -e "\n"
exit 1
fi


# Load prompt.
if [[ -z "$PROMPT_NAME" ]]; then
PROMPT_NAME="prompt"
fi
PROMPT_FILE="$INSTANCE_DIR/$PROMPT_NAME.md"
if [[ ! -f "$PROMPT_FILE" ]]; then
echo "Prompt file \"$PROMPT_FILE\" not found."
exit 1
fi
PROMPT=$(cat $PROMPT_FILE)
if [[ -z "$PROMPT" ]]; then
echo "Prompt file found but was empty."
exit 1
fi

# (Re-load) source files.
SOURCE_ZIP_FILE="$INSTANCE_DIR/source_code.zip"
rm -rf $WORKSPACE_ROOT
mkdir -p $WORKSPACE_ROOT
if [[ -f "$SOURCE_ZIP_FILE" ]]; then
unzip -q $SOURCE_ZIP_FILE -d $WORKSPACE_ROOT
# If it only contains a single folder called "project", move it up.
if [ -d "$WORKSPACE_ROOT/project" ] && [ $(ls -A "$WORKSPACE_ROOT" | wc -l) -eq 1 ]; then
mv "$WORKSPACE_ROOT/project"/* "$WORKSPACE_ROOT"
rm -rf "$WORKSPACE_ROOT/project"
fi
pushd $WORKSPACE_ROOT > /dev/null
git init > /dev/null
git add -A > /dev/null
git commit -am "initial commit" > /dev/null
popd > /dev/null
echo "Workspace has been set up and git initialized."
else
echo "Running analysis WITHOUT source code..."
fi

# Config overrides + sanity checks.
export DEBUG=1
export REPLAY_DEV_MODE=1
export REPLAY_ENABLE_TOOL_CACHE=1
export WORKSPACE_BASE="$WORKSPACE_ROOT"
export LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"
if [[ -z "$LLM_API_KEY" ]]; then
if [[ -z "$ANTHROPIC_API_KEY" ]]; then
echo "LLM_API_KEY or ANTHROPIC_API_KEY environment variable must be set."
exit 1
fi
export LLM_API_KEY=$ANTHROPIC_API_KEY
fi

# Logging.
LOG_FILE="$TARGET_FOLDER/default.log"
echo "WORKSPACE_ROOT: \"$WORKSPACE_ROOT\""
echo "Logging to \"$LOG_FILE\"..."

# GO.
cd $OH_ROOT
poetry run python -m openhands.core.main -t "$PROMPT" \
> "$LOG_FILE" 2>&1
Loading

0 comments on commit bef88b0

Please sign in to comment.