Skip to content

Commit 9c92238

Browse files
committed
feat: filter images
1 parent c74438d commit 9c92238

File tree

2 files changed

+49
-4
lines changed

2 files changed

+49
-4
lines changed

src/scrapybara/client.py

+47-2
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,7 @@ def act(
16671667
on_step: Optional[Callable[[Step], None]] = None,
16681668
temperature: Optional[float] = None,
16691669
max_tokens: Optional[int] = None,
1670+
images_to_keep: Optional[int] = 4,
16701671
request_options: Optional[RequestOptions] = None,
16711672
) -> ActResponse[SchemaT]:
16721673
"""
@@ -1682,6 +1683,7 @@ def act(
16821683
on_step: Callback for each step of the conversation
16831684
temperature: Optional temperature parameter for the model
16841685
max_tokens: Optional max tokens parameter for the model
1686+
images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
16851687
request_options: Optional request configuration
16861688
16871689
Returns:
@@ -1706,6 +1708,7 @@ def act(
17061708
on_step=on_step,
17071709
temperature=temperature,
17081710
max_tokens=max_tokens,
1711+
images_to_keep=images_to_keep,
17091712
request_options=request_options,
17101713
):
17111714
steps.append(step)
@@ -1743,6 +1746,8 @@ def act(
17431746
total_tokens=total_tokens,
17441747
)
17451748

1749+
_filter_images(result_messages, images_to_keep)
1750+
17461751
return ActResponse(
17471752
messages=result_messages, steps=steps, text=text, output=output, usage=usage
17481753
)
@@ -1759,6 +1764,7 @@ def act_stream(
17591764
on_step: Optional[Callable[[Step], None]] = None,
17601765
temperature: Optional[float] = None,
17611766
max_tokens: Optional[int] = None,
1767+
images_to_keep: Optional[int] = 4,
17621768
request_options: Optional[RequestOptions] = None,
17631769
) -> Generator[Step, None, None]:
17641770
"""
@@ -1774,6 +1780,7 @@ def act_stream(
17741780
on_step: Callback for each step of the conversation
17751781
temperature: Optional temperature parameter for the model
17761782
max_tokens: Optional max tokens parameter for the model
1783+
images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
17771784
request_options: Optional request configuration
17781785
17791786
Yields:
@@ -1813,6 +1820,8 @@ def act_stream(
18131820
while True:
18141821
# Convert tools to ApiTools
18151822
api_tools = [ApiTool.from_tool(tool) for tool in current_tools]
1823+
1824+
_filter_images(current_messages, images_to_keep)
18161825

18171826
request = SingleActRequest(
18181827
model=model,
@@ -2076,6 +2085,7 @@ async def act(
20762085
on_step: Optional[Callable[[Step], None]] = None,
20772086
temperature: Optional[float] = None,
20782087
max_tokens: Optional[int] = None,
2088+
images_to_keep: Optional[int] = 4,
20792089
request_options: Optional[RequestOptions] = None,
20802090
) -> ActResponse[SchemaT]:
20812091
"""
@@ -2091,6 +2101,7 @@ async def act(
20912101
on_step: Callback for each step of the conversation
20922102
temperature: Optional temperature parameter for the model
20932103
max_tokens: Optional max tokens parameter for the model
2104+
images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
20942105
request_options: Optional request configuration
20952106
20962107
Returns:
@@ -2112,9 +2123,10 @@ async def act(
21122123
prompt=prompt,
21132124
messages=messages,
21142125
schema=schema,
2126+
on_step=on_step,
21152127
temperature=temperature,
21162128
max_tokens=max_tokens,
2117-
on_step=on_step,
2129+
images_to_keep=images_to_keep,
21182130
request_options=request_options,
21192131
):
21202132
steps.append(step)
@@ -2152,6 +2164,8 @@ async def act(
21522164
total_tokens=total_tokens,
21532165
)
21542166

2167+
_filter_images(result_messages, images_to_keep)
2168+
21552169
return ActResponse(
21562170
messages=result_messages, steps=steps, text=text, output=output, usage=usage
21572171
)
@@ -2168,6 +2182,7 @@ async def act_stream(
21682182
on_step: Optional[Callable[[Step], None]] = None,
21692183
temperature: Optional[float] = None,
21702184
max_tokens: Optional[int] = None,
2185+
images_to_keep: Optional[int] = 4,
21712186
request_options: Optional[RequestOptions] = None,
21722187
) -> AsyncGenerator[Step, None]:
21732188
"""
@@ -2183,6 +2198,7 @@ async def act_stream(
21832198
on_step: Callback for each step of the conversation
21842199
temperature: Optional temperature parameter for the model
21852200
max_tokens: Optional max tokens parameter for the model
2201+
images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
21862202
request_options: Optional request configuration
21872203
21882204
Yields:
@@ -2223,6 +2239,8 @@ async def act_stream(
22232239
# Convert tools to ApiTools
22242240
api_tools = [ApiTool.from_tool(tool) for tool in current_tools]
22252241

2242+
_filter_images(current_messages, images_to_keep)
2243+
22262244
request = SingleActRequest(
22272245
model=model,
22282246
system=system,
@@ -2321,7 +2339,6 @@ async def act_stream(
23212339
if not has_tool_calls or has_structured_output:
23222340
break
23232341

2324-
23252342
def _create_request_from_action(action):
23262343
"""Helper function to create a request object from an action object."""
23272344
if isinstance(action, MoveMouseAction):
@@ -2369,3 +2386,31 @@ def _create_request_from_action(action):
23692386
return Request_GetCursorPosition()
23702387
else:
23712388
return None
2389+
2390+
def _filter_images(messages: List[Message], images_to_keep: int):
2391+
"""
2392+
Helper function to filter base64 images in messages, keeping only the latest ones up to specified limit.
2393+
2394+
Args:
2395+
messages: List of messages to filter
2396+
images_to_keep: Maximum number of images to keep
2397+
"""
2398+
images_kept = 0
2399+
2400+
for i in range(len(messages) - 1, -1, -1):
2401+
msg = messages[i]
2402+
2403+
if isinstance(msg, ToolMessage) and msg.content:
2404+
for j in range(len(msg.content) - 1, -1, -1):
2405+
tool_result = msg.content[j]
2406+
2407+
if (tool_result and
2408+
hasattr(tool_result, "result") and
2409+
tool_result.result and
2410+
isinstance(tool_result.result, dict) and
2411+
"base_64_image" in tool_result.result):
2412+
2413+
if images_kept < images_to_keep:
2414+
images_kept += 1
2415+
else:
2416+
del tool_result.result["base_64_image"]

tests/custom/test_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,6 @@ def test_browser_thinking() -> None:
178178
if __name__ == "__main__":
179179
test_ubuntu()
180180
test_browser()
181-
test_ubuntu_thinking()
182-
test_browser_thinking()
181+
# test_ubuntu_thinking()
182+
# test_browser_thinking()
183183
# test_windows()

0 commit comments

Comments
 (0)