Skip to content

Commit 8b5dfb5

Browse files
Merge pull request #212 from scszcoder/dev
Dev
2 parents be062b7 + f39e5a8 commit 8b5dfb5

26 files changed

Lines changed: 4030 additions & 108 deletions

agent/ec_skills/browser_use_extension/agent_config.py

Lines changed: 557 additions & 0 deletions
Large diffs are not rendered by default.

agent/ec_skills/browser_use_extension/cloud_agent.py

Lines changed: 198 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,33 @@ def __init__(
359359
bootstrap_url: str | None = None,
360360
**kwargs,
361361
) -> None:
362+
# Use unified agent configuration for consistency across local and cloud modes
363+
from agent.ec_skills.browser_use_extension.agent_config import (
364+
get_ultra_aggressive_compaction_settings,
365+
DEFAULT_MAX_CLICKABLE_ELEMENTS
366+
)
367+
import logging
368+
logger = logging.getLogger(__name__)
369+
370+
if 'message_compaction' not in kwargs:
371+
compaction_settings = get_ultra_aggressive_compaction_settings()
372+
kwargs['message_compaction'] = compaction_settings
373+
logger.warning(
374+
f"[CloudAgent] 🔧 Using unified MessageCompaction config: "
375+
f"compact_every={compaction_settings.compact_every_n_steps}, "
376+
f"trigger={compaction_settings.trigger_token_count}, "
377+
f"keep_items={compaction_settings.keep_last_items}"
378+
)
379+
380+
# No max_history_items limit - let MessageCompaction handle it naturally
381+
382+
if 'max_clickable_elements_length' not in kwargs:
383+
kwargs['max_clickable_elements_length'] = DEFAULT_MAX_CLICKABLE_ELEMENTS
384+
logger.warning(
385+
f"[CloudAgent] 🔧 Using unified DOM config: "
386+
f"max_clickable_elements_length={DEFAULT_MAX_CLICKABLE_ELEMENTS}"
387+
)
388+
362389
super().__init__(**kwargs)
363390
self.transport = transport
364391
self.run_id = run_id
@@ -468,6 +495,9 @@ async def _prepare_context(self, step_info: AgentStepInfo | None = None) -> Brow
468495
await self._update_action_models_for_page(browser_state_summary.url)
469496

470497
page_filtered_actions = self.tools.registry.get_prompt_description(browser_state_summary.url)
498+
499+
# browser_use's MessageCompaction will automatically handle history management
500+
# No need for manual truncation - let the framework do its job
471501
self._message_manager.create_state_messages(
472502
browser_state_summary=browser_state_summary,
473503
model_output=self.state.last_model_output,
@@ -1014,10 +1044,104 @@ async def _cloud_extract_llm(
10141044
content = content[start_from_char:]
10151045

10161046
# --- Smart truncation with context preservation ---
1017-
MAX_CHAR_LIMIT = 100000
1047+
# Calculate max content limit based on model's context_length
1048+
# browser_use's MessageCompaction handles history automatically
1049+
# For CJK languages: 1 character ≈ 1.5-2.5 tokens (NOT 0.5 tokens!)
1050+
model_context_length = getattr(self.llm, 'context_length', None) or 65536
1051+
1052+
# Extraction is a SEPARATE LLM call with its own token budget:
1053+
# Request = system_prompt + extracted_content
1054+
# Response = structured extraction result
1055+
#
1056+
# Reserve tokens for:
1057+
# - System prompt: ~5000 tokens (extraction instructions + schema)
1058+
# - Response: ~4000 tokens (structured extraction output)
1059+
# - Safety margin: ~2000 tokens (for schema overhead, etc.)
1060+
# Total reserved: ~11000 tokens
1061+
EXTRACTION_SYSTEM_TOKENS = 5000
1062+
EXTRACTION_RESPONSE_TOKENS = 4000
1063+
EXTRACTION_SAFETY_MARGIN = 2000
1064+
extraction_reserved = EXTRACTION_SYSTEM_TOKENS + EXTRACTION_RESPONSE_TOKENS + EXTRACTION_SAFETY_MARGIN
1065+
1066+
available_tokens = max(model_context_length - extraction_reserved, 10000)
1067+
1068+
# Auto-detect content type for more accurate token estimation
1069+
from agent.ec_skills.browser_use_extension.token_utils import (
1070+
tokens_to_chars, detect_content_type, estimate_tokens_auto
1071+
)
1072+
1073+
# Detect content type from sample (first 2000 chars)
1074+
content_type = detect_content_type(content)
1075+
_cloud_agent_log(
1076+
f'[CloudAgent] 🔍 Detected content type: {content_type}',
1077+
level='info'
1078+
)
1079+
1080+
# Use detected content type for accurate char limit calculation
1081+
MAX_CHAR_LIMIT = tokens_to_chars(available_tokens, content_type=content_type)
1082+
1083+
_cloud_agent_log(
1084+
f'[CloudAgent] 📊 Extraction limit: model_context={model_context_length}, '
1085+
f'reserved={extraction_reserved}, available={available_tokens} tokens, '
1086+
f'max_chars={MAX_CHAR_LIMIT}, content_type={content_type}',
1087+
level='info'
1088+
)
1089+
1090+
# --- Intelligent Batched Extraction ---
1091+
# For very large content (>50K chars), use batched processing to prevent timeout
1092+
# and improve success rate. This is more efficient than pagination.
1093+
BATCH_THRESHOLD = 50000 # 50K chars threshold for batching
1094+
use_batching = len(content) > BATCH_THRESHOLD and len(content) > MAX_CHAR_LIMIT * 1.5
1095+
1096+
if use_batching and output_schema is not None:
1097+
# Calculate optimal batch size (slightly smaller than MAX_CHAR_LIMIT for safety)
1098+
batch_size = int(MAX_CHAR_LIMIT * 0.9)
1099+
num_batches = (len(content) + batch_size - 1) // batch_size # Ceiling division
1100+
1101+
_cloud_agent_log(
1102+
f'[CloudAgent] 📦 Large content detected ({len(content):,} chars), '
1103+
f'using batched extraction: {num_batches} batches × ~{batch_size:,} chars',
1104+
level='info'
1105+
)
1106+
1107+
# Process each batch
1108+
batch_results = []
1109+
for batch_idx in range(num_batches):
1110+
batch_start = batch_idx * batch_size
1111+
batch_end = min((batch_idx + 1) * batch_size, len(content))
1112+
batch_content = content[batch_start:batch_end]
1113+
1114+
_cloud_agent_log(
1115+
f'[CloudAgent] 📦 Processing batch {batch_idx + 1}/{num_batches}: '
1116+
f'chars {batch_start:,}-{batch_end:,} ({len(batch_content):,} chars)',
1117+
level='info'
1118+
)
1119+
1120+
# Extract from this batch (reuse extraction logic below)
1121+
# We'll set a flag to indicate batched mode
1122+
try:
1123+
# TODO: Process batch and collect results
1124+
# For now, fall back to normal truncation
1125+
_cloud_agent_log(
1126+
f'[CloudAgent] ⚠️ Batched extraction not fully implemented yet, '
1127+
f'falling back to truncation',
1128+
level='warning'
1129+
)
1130+
use_batching = False
1131+
break
1132+
except Exception as batch_err:
1133+
_cloud_agent_log(
1134+
f'[CloudAgent] ❌ Batch {batch_idx + 1} failed: {batch_err}, '
1135+
f'falling back to truncation',
1136+
level='error'
1137+
)
1138+
use_batching = False
1139+
break
1140+
1141+
# --- Standard Truncation (if not using batching) ---
10181142
truncated = False
10191143
next_start = None
1020-
if len(content) > MAX_CHAR_LIMIT:
1144+
if not use_batching and len(content) > MAX_CHAR_LIMIT:
10211145
truncate_at = MAX_CHAR_LIMIT
10221146
paragraph_break = content.rfind('\n\n', MAX_CHAR_LIMIT - 500, MAX_CHAR_LIMIT)
10231147
if paragraph_break > 0:
@@ -1080,12 +1204,48 @@ async def _cloud_extract_llm(
10801204
f'<webpage_content>\n{content}\n</webpage_content>'
10811205
)
10821206

1207+
# Log detailed token usage for debugging with auto-detection
1208+
from agent.ec_skills.browser_use_extension.token_utils import estimate_tokens_auto
1209+
1210+
system_chars = len(system_prompt)
1211+
prompt_chars = len(prompt)
1212+
total_chars = system_chars + prompt_chars
1213+
# Use auto-detection for accurate token estimation
1214+
estimated_tokens = estimate_tokens_auto(system_prompt + prompt)
1215+
_cloud_agent_log(
1216+
f'[CloudAgent] 📤 LLM Request: system={system_chars} chars, '
1217+
f'prompt={prompt_chars} chars (content={len(content)} chars), '
1218+
f'total={total_chars} chars ≈ {estimated_tokens} tokens',
1219+
level='info'
1220+
)
1221+
1222+
# Calculate dynamic timeout based on content size and type
1223+
base_timeout = 60.0
1224+
if len(content) < 10000:
1225+
size_multiplier = 1.0
1226+
elif len(content) < 50000:
1227+
size_multiplier = 1.5
1228+
elif len(content) < 100000:
1229+
size_multiplier = 2.0
1230+
else:
1231+
size_multiplier = 3.0
1232+
1233+
# CJK content needs more processing time
1234+
type_multiplier = 1.2 if content_type == 'cjk' else 1.0
1235+
1236+
timeout_seconds = min(base_timeout * size_multiplier * type_multiplier, 300.0)
1237+
_cloud_agent_log(
1238+
f'[CloudAgent] Extract timeout: {timeout_seconds:.1f}s '
1239+
f'(content: {len(content)} chars, type: {content_type})',
1240+
level='info'
1241+
)
1242+
10831243
response = await asyncio.wait_for(
10841244
page_extraction_llm.ainvoke(
10851245
[SystemMessage(content=system_prompt), UserMessage(content=prompt)],
10861246
output_format=structured_model,
10871247
),
1088-
timeout=120.0,
1248+
timeout=timeout_seconds,
10891249
)
10901250

10911251
result_data = response.completion.model_dump(mode='json')
@@ -1143,11 +1303,44 @@ async def _cloud_extract_llm(
11431303
f'<webpage_content>\n{content}\n</webpage_content>'
11441304
)
11451305

1306+
# Log detailed token usage for debugging with auto-detection
1307+
system_chars = len(system_prompt)
1308+
prompt_chars = len(prompt)
1309+
total_chars = system_chars + prompt_chars
1310+
# Use auto-detection for accurate token estimation
1311+
from agent.ec_skills.browser_use_extension.token_utils import estimate_tokens_auto
1312+
estimated_tokens = estimate_tokens_auto(system_prompt + prompt)
1313+
_cloud_agent_log(
1314+
f'[CloudAgent] 📤 LLM Request (free-text): system={system_chars} chars, '
1315+
f'prompt={prompt_chars} chars (content={len(content)} chars), '
1316+
f'total={total_chars} chars ≈ {estimated_tokens} tokens',
1317+
level='info'
1318+
)
1319+
1320+
# Calculate dynamic timeout (same logic as structured extraction)
1321+
base_timeout = 60.0
1322+
if len(content) < 10000:
1323+
size_multiplier = 1.0
1324+
elif len(content) < 50000:
1325+
size_multiplier = 1.5
1326+
elif len(content) < 100000:
1327+
size_multiplier = 2.0
1328+
else:
1329+
size_multiplier = 3.0
1330+
1331+
type_multiplier = 1.2 if content_type == 'cjk' else 1.0
1332+
timeout_seconds = min(base_timeout * size_multiplier * type_multiplier, 300.0)
1333+
_cloud_agent_log(
1334+
f'[CloudAgent] Extract timeout: {timeout_seconds:.1f}s '
1335+
f'(content: {len(content)} chars, type: {content_type})',
1336+
level='info'
1337+
)
1338+
11461339
response = await asyncio.wait_for(
11471340
page_extraction_llm.ainvoke(
1148-
[SystemMessage(content=system_prompt), UserMessage(content=prompt)]
1341+
[SystemMessage(content=system_prompt), UserMessage(content=prompt)],
11491342
),
1150-
timeout=120.0,
1343+
timeout=timeout_seconds,
11511344
)
11521345

11531346
extracted_content = (

0 commit comments

Comments
 (0)