@@ -1667,6 +1667,7 @@ def act(
1667
1667
on_step : Optional [Callable [[Step ], None ]] = None ,
1668
1668
temperature : Optional [float ] = None ,
1669
1669
max_tokens : Optional [int ] = None ,
1670
+ images_to_keep : Optional [int ] = 4 ,
1670
1671
request_options : Optional [RequestOptions ] = None ,
1671
1672
) -> ActResponse [SchemaT ]:
1672
1673
"""
@@ -1682,6 +1683,7 @@ def act(
1682
1683
on_step: Callback for each step of the conversation
1683
1684
temperature: Optional temperature parameter for the model
1684
1685
max_tokens: Optional max tokens parameter for the model
1686
+ images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
1685
1687
request_options: Optional request configuration
1686
1688
1687
1689
Returns:
@@ -1706,6 +1708,7 @@ def act(
1706
1708
on_step = on_step ,
1707
1709
temperature = temperature ,
1708
1710
max_tokens = max_tokens ,
1711
+ images_to_keep = images_to_keep ,
1709
1712
request_options = request_options ,
1710
1713
):
1711
1714
steps .append (step )
@@ -1743,6 +1746,8 @@ def act(
1743
1746
total_tokens = total_tokens ,
1744
1747
)
1745
1748
1749
+ _filter_images (result_messages , images_to_keep )
1750
+
1746
1751
return ActResponse (
1747
1752
messages = result_messages , steps = steps , text = text , output = output , usage = usage
1748
1753
)
@@ -1759,6 +1764,7 @@ def act_stream(
1759
1764
on_step : Optional [Callable [[Step ], None ]] = None ,
1760
1765
temperature : Optional [float ] = None ,
1761
1766
max_tokens : Optional [int ] = None ,
1767
+ images_to_keep : Optional [int ] = 4 ,
1762
1768
request_options : Optional [RequestOptions ] = None ,
1763
1769
) -> Generator [Step , None , None ]:
1764
1770
"""
@@ -1774,6 +1780,7 @@ def act_stream(
1774
1780
on_step: Callback for each step of the conversation
1775
1781
temperature: Optional temperature parameter for the model
1776
1782
max_tokens: Optional max tokens parameter for the model
1783
+ images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
1777
1784
request_options: Optional request configuration
1778
1785
1779
1786
Yields:
@@ -1813,6 +1820,8 @@ def act_stream(
1813
1820
while True :
1814
1821
# Convert tools to ApiTools
1815
1822
api_tools = [ApiTool .from_tool (tool ) for tool in current_tools ]
1823
+
1824
+ _filter_images (current_messages , images_to_keep )
1816
1825
1817
1826
request = SingleActRequest (
1818
1827
model = model ,
@@ -2076,6 +2085,7 @@ async def act(
2076
2085
on_step : Optional [Callable [[Step ], None ]] = None ,
2077
2086
temperature : Optional [float ] = None ,
2078
2087
max_tokens : Optional [int ] = None ,
2088
+ images_to_keep : Optional [int ] = 4 ,
2079
2089
request_options : Optional [RequestOptions ] = None ,
2080
2090
) -> ActResponse [SchemaT ]:
2081
2091
"""
@@ -2091,6 +2101,7 @@ async def act(
2091
2101
on_step: Callback for each step of the conversation
2092
2102
temperature: Optional temperature parameter for the model
2093
2103
max_tokens: Optional max tokens parameter for the model
2104
+ images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
2094
2105
request_options: Optional request configuration
2095
2106
2096
2107
Returns:
@@ -2112,9 +2123,10 @@ async def act(
2112
2123
prompt = prompt ,
2113
2124
messages = messages ,
2114
2125
schema = schema ,
2126
+ on_step = on_step ,
2115
2127
temperature = temperature ,
2116
2128
max_tokens = max_tokens ,
2117
- on_step = on_step ,
2129
+ images_to_keep = images_to_keep ,
2118
2130
request_options = request_options ,
2119
2131
):
2120
2132
steps .append (step )
@@ -2152,6 +2164,8 @@ async def act(
2152
2164
total_tokens = total_tokens ,
2153
2165
)
2154
2166
2167
+ _filter_images (result_messages , images_to_keep )
2168
+
2155
2169
return ActResponse (
2156
2170
messages = result_messages , steps = steps , text = text , output = output , usage = usage
2157
2171
)
@@ -2168,6 +2182,7 @@ async def act_stream(
2168
2182
on_step : Optional [Callable [[Step ], None ]] = None ,
2169
2183
temperature : Optional [float ] = None ,
2170
2184
max_tokens : Optional [int ] = None ,
2185
+ images_to_keep : Optional [int ] = 4 ,
2171
2186
request_options : Optional [RequestOptions ] = None ,
2172
2187
) -> AsyncGenerator [Step , None ]:
2173
2188
"""
@@ -2183,6 +2198,7 @@ async def act_stream(
2183
2198
on_step: Callback for each step of the conversation
2184
2199
temperature: Optional temperature parameter for the model
2185
2200
max_tokens: Optional max tokens parameter for the model
2201
+ images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4
2186
2202
request_options: Optional request configuration
2187
2203
2188
2204
Yields:
@@ -2223,6 +2239,8 @@ async def act_stream(
2223
2239
# Convert tools to ApiTools
2224
2240
api_tools = [ApiTool .from_tool (tool ) for tool in current_tools ]
2225
2241
2242
+ _filter_images (current_messages , images_to_keep )
2243
+
2226
2244
request = SingleActRequest (
2227
2245
model = model ,
2228
2246
system = system ,
@@ -2321,7 +2339,6 @@ async def act_stream(
2321
2339
if not has_tool_calls or has_structured_output :
2322
2340
break
2323
2341
2324
-
2325
2342
def _create_request_from_action (action ):
2326
2343
"""Helper function to create a request object from an action object."""
2327
2344
if isinstance (action , MoveMouseAction ):
@@ -2369,3 +2386,31 @@ def _create_request_from_action(action):
2369
2386
return Request_GetCursorPosition ()
2370
2387
else :
2371
2388
return None
2389
+
2390
+ def _filter_images (messages : List [Message ], images_to_keep : int ):
2391
+ """
2392
+ Helper function to filter base64 images in messages, keeping only the latest ones up to specified limit.
2393
+
2394
+ Args:
2395
+ messages: List of messages to filter
2396
+ images_to_keep: Maximum number of images to keep
2397
+ """
2398
+ images_kept = 0
2399
+
2400
+ for i in range (len (messages ) - 1 , - 1 , - 1 ):
2401
+ msg = messages [i ]
2402
+
2403
+ if isinstance (msg , ToolMessage ) and msg .content :
2404
+ for j in range (len (msg .content ) - 1 , - 1 , - 1 ):
2405
+ tool_result = msg .content [j ]
2406
+
2407
+ if (tool_result and
2408
+ hasattr (tool_result , "result" ) and
2409
+ tool_result .result and
2410
+ isinstance (tool_result .result , dict ) and
2411
+ "base_64_image" in tool_result .result ):
2412
+
2413
+ if images_kept < images_to_keep :
2414
+ images_kept += 1
2415
+ else :
2416
+ del tool_result .result ["base_64_image" ]
0 commit comments