|
105 | 105 | ) |
106 | 106 | from camel.utils.commons import dependencies_required |
107 | 107 | from camel.utils.context_utils import ContextUtility |
| 108 | +from camel.utils.tool_result import ToolResult |
108 | 109 |
|
109 | 110 | TOKEN_LIMIT_ERROR_MARKERS = ( |
110 | 111 | "context_length_exceeded", |
@@ -3356,9 +3357,11 @@ def _update_token_usage_tracker( |
3356 | 3357 | tracker (Dict[str, int]): The token usage tracker to update. |
3357 | 3358 | usage_dict (Dict[str, int]): The usage dictionary with new values. |
3358 | 3359 | """ |
3359 | | - tracker["prompt_tokens"] += usage_dict.get("prompt_tokens", 0) |
3360 | | - tracker["completion_tokens"] += usage_dict.get("completion_tokens", 0) |
3361 | | - tracker["total_tokens"] += usage_dict.get("total_tokens", 0) |
| 3360 | + tracker["prompt_tokens"] += usage_dict.get("prompt_tokens") or 0 |
| 3361 | + tracker["completion_tokens"] += ( |
| 3362 | + usage_dict.get("completion_tokens") or 0 |
| 3363 | + ) |
| 3364 | + tracker["total_tokens"] += usage_dict.get("total_tokens") or 0 |
3362 | 3365 |
|
3363 | 3366 | def _convert_to_chatagent_response( |
3364 | 3367 | self, |
@@ -4036,6 +4039,65 @@ def _record_tool_calling( |
4036 | 4039 | cast(List[MemoryRecord], func_records), |
4037 | 4040 | ) |
4038 | 4041 |
|
| 4042 | + if isinstance(result, ToolResult) and result.images: |
| 4043 | + try: |
| 4044 | + import base64 |
| 4045 | + import io |
| 4046 | + |
| 4047 | + try: |
| 4048 | + from PIL import Image |
| 4049 | + except ImportError: |
| 4050 | + logger.warning( |
| 4051 | + f"Tool '{func_name}' returned images but PIL " |
| 4052 | + "is not installed. Install with: pip install " |
| 4053 | + "Pillow. Skipping visual context injection." |
| 4054 | + ) |
| 4055 | + # Continue without injecting images |
| 4056 | + result = ( |
| 4057 | + result.text if hasattr(result, 'text') else str(result) |
| 4058 | + ) |
| 4059 | + else: |
| 4060 | + logger.info( |
| 4061 | + f"Tool '{func_name}' returned ToolResult with " |
| 4062 | + f"{len(result.images)} image(s), injecting into " |
| 4063 | + "context" |
| 4064 | + ) |
| 4065 | + |
| 4066 | + # Convert base64 images to PIL Image objects |
| 4067 | + pil_images: List[Union[Image.Image, str]] = [] |
| 4068 | + for img_data in result.images: |
| 4069 | + if img_data.startswith('data:image/'): |
| 4070 | + # Extract base64 data |
| 4071 | + base64_str = img_data.split(',', 1)[1] |
| 4072 | + img_bytes = base64.b64decode(base64_str) |
| 4073 | + pil_img = Image.open(io.BytesIO(img_bytes)) |
| 4074 | + pil_images.append(pil_img) |
| 4075 | + |
| 4076 | + if pil_images: |
| 4077 | + # Create a user message with the image(s) |
| 4078 | + visual_msg = BaseMessage.make_user_message( |
| 4079 | + role_name="Tool", |
| 4080 | + content=f"[Visual output from {func_name}]", |
| 4081 | + image_list=pil_images, |
| 4082 | + ) |
| 4083 | + |
| 4084 | + # Inject into conversation context with slight |
| 4085 | + # timestamp increment |
| 4086 | + self.update_memory( |
| 4087 | + visual_msg, |
| 4088 | + OpenAIBackendRole.USER, |
| 4089 | + timestamp=base_timestamp + 2e-6, |
| 4090 | + return_records=False, |
| 4091 | + ) |
| 4092 | + logger.info( |
| 4093 | + f"Successfully injected {len(pil_images)} " |
| 4094 | + "image(s) into agent context" |
| 4095 | + ) |
| 4096 | + except Exception as e: |
| 4097 | + logger.error( |
| 4098 | + f"Failed to inject visual content from {func_name}: {e}" |
| 4099 | + ) |
| 4100 | + |
4039 | 4101 | # Record information about this tool call |
4040 | 4102 | tool_record = ToolCallingRecord( |
4041 | 4103 | tool_name=func_name, |
|
0 commit comments