diff --git a/camel/toolkits/hybrid_browser_toolkit/config_loader.py b/camel/toolkits/hybrid_browser_toolkit/config_loader.py index f272ddb9f7..3dc83d5df4 100644 --- a/camel/toolkits/hybrid_browser_toolkit/config_loader.py +++ b/camel/toolkits/hybrid_browser_toolkit/config_loader.py @@ -58,6 +58,7 @@ class ToolkitConfig: log_dir: Optional[str] = None session_id: Optional[str] = None enabled_tools: Optional[list] = None + enable_reasoning: bool = False class ConfigLoader: @@ -123,6 +124,8 @@ def from_kwargs(cls, **kwargs) -> 'ConfigLoader': toolkit_kwargs["session_id"] = value elif key == "enabledTools": toolkit_kwargs["enabled_tools"] = value + elif key == "enableReasoning": + toolkit_kwargs["enable_reasoning"] = value elif key == "fullVisualMode": browser_kwargs["full_visual_mode"] = value diff --git a/camel/toolkits/hybrid_browser_toolkit/decorators.py b/camel/toolkits/hybrid_browser_toolkit/decorators.py new file mode 100644 index 0000000000..09a60ea330 --- /dev/null +++ b/camel/toolkits/hybrid_browser_toolkit/decorators.py @@ -0,0 +1,172 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import functools +import inspect +from typing import Dict + +from camel.logger import get_logger + +logger = get_logger(__name__) + + +def add_reason_field(func): + """ + Decorator to enable reasoning for tool functions. + 1. It modifies the function's signature to add a 'reason' parameter. + 2. The 'reason' argument is a string describing why the tool is + being called and it is added to the function docstring. + 3. It wraps the original function to ensure its return value + includes a 'reason' key. + + Note: This decorator can only be applied to + functions with a return type of Dict. + + """ + sig = inspect.signature(func) + + def is_dict_type(annotation): + if annotation is dict: + return True + if annotation is inspect.Signature.empty: + return False + origin = getattr(annotation, '__origin__', None) + if origin is dict: + return True + if annotation is Dict or origin is Dict: + return True + return False + + if not is_dict_type(sig.return_annotation): + logger.info( + f"add_reason_field: Function '{func.__name__}' " + "does not have return type Dict. " + "Reasoning will not be applied." + ) + return func + + params = list(sig.parameters.values()) + if "reason" not in sig.parameters: + params.append( + inspect.Parameter( + "reason", + inspect.Parameter.KEYWORD_ONLY, + default="", + annotation=str, + ) + ) + new_sig = sig.replace(parameters=params) + + doc = func.__doc__ or "" + lines = doc.splitlines() + + if "Args:" in doc: + args_idx = None + indent = None + for i, line in enumerate(lines): + if "Args:" in line: + args_idx = i + indent = line[: line.index("Args:")] + break + + if args_idx is not None: + insert_idx = args_idx + 1 + last_arg_idx = args_idx + 1 + for j in range(args_idx + 1, len(lines)): + line = lines[j] + if not line.startswith(indent + " "): + insert_idx = j + break + if line.strip() != "": + last_arg_idx = j + insert_idx = j + 1 + + if last_arg_idx > args_idx: + insert_idx = last_arg_idx + 1 + + reason_doc = ( + f"{indent} reason (str): The reason why this " + + "tool is called." + ) + lines.insert(insert_idx, reason_doc) + else: + lines.extend( + [ + "", + "Args:", + " reason (str): The reason why this tool is called.", + ] + ) + + returns_idx = None + for i, line in enumerate(lines): + if "Returns:" in line: + returns_idx = i + indent = line[: line.index("Returns:")] + break + + if returns_idx is not None: + end_idx = len(lines) + for j in range(returns_idx + 1, len(lines)): + line = lines[j] + if line.strip() == "": + continue + if not line.startswith(indent + " "): + end_idx = j + break + + filtered = [] + for i, line in enumerate(lines): + if '"reason"' not in line and "'reason'" not in line: + filtered.append(line) + elif i < returns_idx or i >= end_idx: + filtered.append(line) + + lines = filtered + + end_idx = len(lines) + for j in range(returns_idx + 1, len(lines)): + line = lines[j] + if line.strip() == "": + continue + if not line.startswith(indent + " "): + end_idx = j + break + + while end_idx > returns_idx + 1 and lines[end_idx - 1].strip() == "": + end_idx -= 1 + + reason_line = f'{indent} - "reason" (str): tool call reason.' + lines.insert(end_idx, reason_line) + elif "Returns:" not in doc: + lines.extend( + [ + "", + "Returns:", + " dict: The result dictionary.", + ' - "reason" (str): tool call reason.', + ] + ) + + doc = "\n".join(lines) + + @functools.wraps(func) + async def wrapper(*args, reason: str = "", **kwargs): + result = await func(*args, **kwargs) + result["reason"] = reason + return result + + wrapper.__signature__ = new_sig + wrapper.__doc__ = doc + return wrapper diff --git a/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py b/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py index a6ddc4b20b..536bbfe9aa 100644 --- a/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +++ b/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py @@ -39,6 +39,9 @@ class HybridBrowserToolkit(BaseToolkit): cache_dir (str): Directory for caching. Defaults to "tmp/". enabled_tools (Optional[List[str]]): List of enabled tools. Defaults to None. + enable_reasoning (bool): Whether to enable reasoning when agent + is using the browser toolkit. Defaults to False. agent will + provide explanations for its actions when this is enabled. browser_log_to_file (bool): Whether to log browser actions to file. Defaults to False. log_dir (Optional[str]): Custom directory path for log files. @@ -93,6 +96,7 @@ def __new__( stealth: bool = False, cache_dir: Optional[str] = None, enabled_tools: Optional[List[str]] = None, + enable_reasoning: bool = False, browser_log_to_file: bool = False, log_dir: Optional[str] = None, session_id: Optional[str] = None, @@ -127,6 +131,9 @@ def __new__( cache_dir (str): Directory for caching. Defaults to "tmp/". enabled_tools (Optional[List[str]]): List of enabled tools. Defaults to None. + enable_reasoning (bool): Whether to enable reasoning when agent + is using the browser toolkit. Defaults to False. agent will + provide explanations for its actions when this is enabled. browser_log_to_file (bool): Whether to log browser actions to file. Defaults to False. log_dir (Optional[str]): Custom directory path for log files. @@ -182,6 +189,7 @@ def __new__( stealth=stealth, cache_dir=cache_dir, enabled_tools=enabled_tools, + enable_reasoning=enable_reasoning, browser_log_to_file=browser_log_to_file, log_dir=log_dir, session_id=session_id, diff --git a/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py b/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py index 9d669f1284..827970e87e 100644 --- a/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +++ b/camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py @@ -33,6 +33,7 @@ from camel.utils.commons import dependencies_required from .config_loader import ConfigLoader +from .decorators import add_reason_field from .ws_wrapper import WebSocketBrowserWrapper, high_level_action logger = get_logger(__name__) @@ -99,6 +100,7 @@ def __init__( stealth: bool = False, cache_dir: Optional[str] = None, enabled_tools: Optional[List[str]] = None, + enable_reasoning: bool = False, browser_log_to_file: bool = False, log_dir: Optional[str] = None, session_id: Optional[str] = None, @@ -128,6 +130,9 @@ def __init__( cache_dir (str): Directory for caching. Defaults to "tmp/". enabled_tools (Optional[List[str]]): List of enabled tools. Defaults to None. + enable_reasoning (bool): Whether to enable reasoning when agent + is using the browser toolkit. Defaults to False. agent will + provide explanations for its actions when this is enabled. browser_log_to_file (bool): Whether to log browser actions to file. Defaults to False. log_dir (Optional[str]): Custom directory path for log files. @@ -187,6 +192,7 @@ def __init__( log_dir=log_dir, session_id=session_id, enabled_tools=enabled_tools, + enable_reasoning=enable_reasoning, connect_over_cdp=connect_over_cdp, cdp_url=cdp_url, cdp_keep_current_page=cdp_keep_current_page, @@ -206,17 +212,22 @@ def __init__( "is True, the browser will keep the current page and not " "navigate to any URL." ) + # toolkit settings + self._cache_dir = toolkit_config.cache_dir + self._browser_log_to_file = toolkit_config.browser_log_to_file + self._enabled_tools = toolkit_config.enabled_tools + self._enable_reasoning = toolkit_config.enable_reasoning + # browser settings self._headless = browser_config.headless self._user_data_dir = browser_config.user_data_dir self._stealth = browser_config.stealth - self._cache_dir = toolkit_config.cache_dir - self._browser_log_to_file = toolkit_config.browser_log_to_file self._default_start_url = browser_config.default_start_url self._session_id = toolkit_config.session_id or "default" self._viewport_limit = browser_config.viewport_limit self._full_visual_mode = browser_config.full_visual_mode + # timeout settings self._default_timeout = browser_config.default_timeout self._short_timeout = browser_config.short_timeout self._navigation_timeout = browser_config.navigation_timeout @@ -227,24 +238,32 @@ def __init__( browser_config.dom_content_loaded_timeout ) - if enabled_tools is None: - self.enabled_tools = self.DEFAULT_TOOLS.copy() + if self._enabled_tools is None: + self._enabled_tools = self.DEFAULT_TOOLS.copy() else: invalid_tools = [ - tool for tool in enabled_tools if tool not in self.ALL_TOOLS + tool + for tool in self._enabled_tools + if tool not in self.ALL_TOOLS ] if invalid_tools: raise ValueError( f"Invalid tools specified: {invalid_tools}. " f"Available tools: {self.ALL_TOOLS}" ) - self.enabled_tools = enabled_tools.copy() - logger.info(f"Enabled tools: {self.enabled_tools}") + logger.info(f"Enabled tools: {self._enabled_tools}") self._ws_wrapper: Optional[WebSocketBrowserWrapper] = None self._ws_config = self.config_loader.to_ws_config() + # Dynamically wrap tool methods if reasoning is enabled + if self._enable_reasoning: + for tool_name in self._enabled_tools: + method = getattr(self, tool_name, None) + if method and callable(method): + setattr(self, tool_name, add_reason_field(method)) + async def _ensure_ws_wrapper(self): """Ensure WebSocket wrapper is initialized.""" if self._ws_wrapper is None: @@ -350,22 +369,27 @@ async def browser_open(self) -> Dict[str, Any]: "total_tabs": 0, } - async def browser_close(self) -> str: + async def browser_close(self) -> Dict[str, Any]: r"""Closes the browser session, releasing all resources. This should be called at the end of a task for cleanup. Returns: - str: A confirmation message. + Dict[str, Any]: A dictionary with: + - "message" (str): A confirmation message. + - "success" (bool): Whether the operation succeeded. """ try: if self._ws_wrapper: await self._ws_wrapper.stop() self._ws_wrapper = None - return "Browser session closed." + return {"message": "Browser session closed.", "success": True} except Exception as e: logger.error(f"Failed to close browser: {e}") - return f"Error closing browser: {e}" + return { + "message": f"Error closing browser: {e}", + "success": False, + } async def disconnect_websocket(self) -> str: r"""Disconnects the WebSocket connection without closing the browser. @@ -528,7 +552,7 @@ async def browser_forward(self) -> Dict[str, Any]: "total_tabs": 0, } - async def browser_get_page_snapshot(self) -> str: + async def browser_get_page_snapshot(self) -> Dict[str, Any]: r"""Gets a textual snapshot of the page's interactive elements. The snapshot lists elements like buttons, links, and inputs, @@ -542,17 +566,23 @@ async def browser_get_page_snapshot(self) -> str: will be included in the snapshot. Returns: - str: A formatted string representing the interactive elements and - their `ref` IDs. For example: - '- link "Sign In" [ref=1]' - '- textbox "Username" [ref=2]' + Dict[str, Any]: A dictionary with: + - "snapshot" (str): A formatted string representing the + interactive elements and their `ref` IDs. For example: + '- link "Sign In" [ref=1]' + '- textbox "Username" [ref=2]' + - "success" (bool): Whether the operation succeeded. """ try: ws_wrapper = await self._get_ws_wrapper() - return await ws_wrapper.get_page_snapshot(self._viewport_limit) + snapshot = await ws_wrapper.get_page_snapshot(self._viewport_limit) + return {"snapshot": snapshot, "success": True} except Exception as e: logger.error(f"Failed to get page snapshot: {e}") - return f"Error capturing snapshot: {e}" + return { + "snapshot": f"Error capturing snapshot: {e}", + "success": False, + } @dependencies_required('PIL') async def browser_get_som_screenshot( @@ -1875,7 +1905,12 @@ async def _await_enter(): result_msg = f"Timeout {timeout_sec}s reached, auto-resumed." try: - snapshot = await self.browser_get_page_snapshot() + snapshot_result = await self.browser_get_page_snapshot() + snapshot = ( + snapshot_result["snapshot"] + if snapshot_result["success"] + else "" + ) tab_info = await self.browser_get_tab_info() return {"result": result_msg, "snapshot": snapshot, **tab_info} except Exception as e: @@ -1913,7 +1948,9 @@ def clone_for_new_session( stealth=self._stealth, cache_dir=f"{self._cache_dir.rstrip('/')}_clone_" f"{new_session_id}/", - enabled_tools=self.enabled_tools.copy(), + enabled_tools=( + self._enabled_tools.copy() if self._enabled_tools else None + ), browser_log_to_file=self._browser_log_to_file, session_id=new_session_id, default_start_url=self._default_start_url, @@ -1958,16 +1995,18 @@ def get_tools(self) -> List[FunctionTool]: "browser_sheet_read": self.browser_sheet_read, } - enabled_tools = [] - - for tool_name in self.enabled_tools: + tools = [] + enabled_tools = ( + self._enabled_tools if self._enabled_tools else self.DEFAULT_TOOLS + ) + for tool_name in enabled_tools: if tool_name in tool_map: tool = FunctionTool( cast(Callable[..., Any], tool_map[tool_name]) ) - enabled_tools.append(tool) + tools.append(tool) else: logger.warning(f"Unknown tool name: {tool_name}") - logger.info(f"Returning {len(enabled_tools)} enabled tools") - return enabled_tools + logger.info(f"Returning {len(tools)} enabled tools") + return tools diff --git a/examples/workforce/eigent.py b/examples/workforce/eigent.py index 6dee6f8151..c3dd4addf3 100644 --- a/examples/workforce/eigent.py +++ b/examples/workforce/eigent.py @@ -326,6 +326,7 @@ def search_agent_factory( web_toolkit_custom = HybridBrowserToolkit( headless=False, enabled_tools=custom_tools, + enable_reasoning=True, browser_log_to_file=True, stealth=True, session_id=agent_id, diff --git a/test/toolkits/test_hybrid_browser_toolkit.py b/test/toolkits/test_hybrid_browser_toolkit.py index 5f78d6ae9a..720997cb62 100644 --- a/test/toolkits/test_hybrid_browser_toolkit.py +++ b/test/toolkits/test_hybrid_browser_toolkit.py @@ -304,8 +304,10 @@ async def test_close_browser(self, browser_toolkit_fixture): """Test closing browser.""" toolkit = browser_toolkit_fixture result = await toolkit.browser_close() - assert isinstance(result, str) - assert "closed" in result.lower() + assert isinstance(result, dict) + assert "message" in result + assert "success" in result + assert "closed" in result["message"].lower() @pytest.mark.asyncio async def test_visit_page_valid_url(self, browser_toolkit_fixture): @@ -338,10 +340,13 @@ async def test_get_page_snapshot(self, browser_toolkit_fixture): """Test getting page snapshot.""" toolkit = browser_toolkit_fixture result = await toolkit.browser_get_page_snapshot() - assert isinstance(result, str) + assert isinstance(result, dict) + assert "snapshot" in result + assert "success" in result + snapshot = result["snapshot"] assert ( - "page snapshot" in result.lower() - or "test content" in result.lower() + "page snapshot" in snapshot.lower() + or "test content" in snapshot.lower() ) @pytest.mark.asyncio