@@ -359,6 +359,33 @@ def __init__(
359359 bootstrap_url : str | None = None ,
360360 ** kwargs ,
361361 ) -> None :
362+ # Use unified agent configuration for consistency across local and cloud modes
363+ from agent .ec_skills .browser_use_extension .agent_config import (
364+ get_ultra_aggressive_compaction_settings ,
365+ DEFAULT_MAX_CLICKABLE_ELEMENTS
366+ )
367+ import logging
368+ logger = logging .getLogger (__name__ )
369+
370+ if 'message_compaction' not in kwargs :
371+ compaction_settings = get_ultra_aggressive_compaction_settings ()
372+ kwargs ['message_compaction' ] = compaction_settings
373+ logger .warning (
374+ f"[CloudAgent] 🔧 Using unified MessageCompaction config: "
375+ f"compact_every={ compaction_settings .compact_every_n_steps } , "
376+ f"trigger={ compaction_settings .trigger_token_count } , "
377+ f"keep_items={ compaction_settings .keep_last_items } "
378+ )
379+
380+ # No max_history_items limit - let MessageCompaction handle it naturally
381+
382+ if 'max_clickable_elements_length' not in kwargs :
383+ kwargs ['max_clickable_elements_length' ] = DEFAULT_MAX_CLICKABLE_ELEMENTS
384+ logger .warning (
385+ f"[CloudAgent] 🔧 Using unified DOM config: "
386+ f"max_clickable_elements_length={ DEFAULT_MAX_CLICKABLE_ELEMENTS } "
387+ )
388+
362389 super ().__init__ (** kwargs )
363390 self .transport = transport
364391 self .run_id = run_id
@@ -468,6 +495,9 @@ async def _prepare_context(self, step_info: AgentStepInfo | None = None) -> Brow
468495 await self ._update_action_models_for_page (browser_state_summary .url )
469496
470497 page_filtered_actions = self .tools .registry .get_prompt_description (browser_state_summary .url )
498+
499+ # browser_use's MessageCompaction will automatically handle history management
500+ # No need for manual truncation - let the framework do its job
471501 self ._message_manager .create_state_messages (
472502 browser_state_summary = browser_state_summary ,
473503 model_output = self .state .last_model_output ,
@@ -1014,10 +1044,104 @@ async def _cloud_extract_llm(
10141044 content = content [start_from_char :]
10151045
10161046 # --- Smart truncation with context preservation ---
1017- MAX_CHAR_LIMIT = 100000
1047+ # Calculate max content limit based on model's context_length
1048+ # browser_use's MessageCompaction handles history automatically
1049+ # For CJK languages: 1 character ≈ 1.5-2.5 tokens (NOT 0.5 tokens!)
1050+ model_context_length = getattr (self .llm , 'context_length' , None ) or 65536
1051+
1052+ # Extraction is a SEPARATE LLM call with its own token budget:
1053+ # Request = system_prompt + extracted_content
1054+ # Response = structured extraction result
1055+ #
1056+ # Reserve tokens for:
1057+ # - System prompt: ~5000 tokens (extraction instructions + schema)
1058+ # - Response: ~4000 tokens (structured extraction output)
1059+ # - Safety margin: ~2000 tokens (for schema overhead, etc.)
1060+ # Total reserved: ~11000 tokens
1061+ EXTRACTION_SYSTEM_TOKENS = 5000
1062+ EXTRACTION_RESPONSE_TOKENS = 4000
1063+ EXTRACTION_SAFETY_MARGIN = 2000
1064+ extraction_reserved = EXTRACTION_SYSTEM_TOKENS + EXTRACTION_RESPONSE_TOKENS + EXTRACTION_SAFETY_MARGIN
1065+
1066+ available_tokens = max (model_context_length - extraction_reserved , 10000 )
1067+
1068+ # Auto-detect content type for more accurate token estimation
1069+ from agent .ec_skills .browser_use_extension .token_utils import (
1070+ tokens_to_chars , detect_content_type , estimate_tokens_auto
1071+ )
1072+
1073+ # Detect content type from sample (first 2000 chars)
1074+ content_type = detect_content_type (content )
1075+ _cloud_agent_log (
1076+ f'[CloudAgent] 🔍 Detected content type: { content_type } ' ,
1077+ level = 'info'
1078+ )
1079+
1080+ # Use detected content type for accurate char limit calculation
1081+ MAX_CHAR_LIMIT = tokens_to_chars (available_tokens , content_type = content_type )
1082+
1083+ _cloud_agent_log (
1084+ f'[CloudAgent] 📊 Extraction limit: model_context={ model_context_length } , '
1085+ f'reserved={ extraction_reserved } , available={ available_tokens } tokens, '
1086+ f'max_chars={ MAX_CHAR_LIMIT } , content_type={ content_type } ' ,
1087+ level = 'info'
1088+ )
1089+
1090+ # --- Intelligent Batched Extraction ---
1091+ # For very large content (>50K chars), use batched processing to prevent timeout
1092+ # and improve success rate. This is more efficient than pagination.
1093+ BATCH_THRESHOLD = 50000 # 50K chars threshold for batching
1094+ use_batching = len (content ) > BATCH_THRESHOLD and len (content ) > MAX_CHAR_LIMIT * 1.5
1095+
1096+ if use_batching and output_schema is not None :
1097+ # Calculate optimal batch size (slightly smaller than MAX_CHAR_LIMIT for safety)
1098+ batch_size = int (MAX_CHAR_LIMIT * 0.9 )
1099+ num_batches = (len (content ) + batch_size - 1 ) // batch_size # Ceiling division
1100+
1101+ _cloud_agent_log (
1102+ f'[CloudAgent] 📦 Large content detected ({ len (content ):,} chars), '
1103+ f'using batched extraction: { num_batches } batches × ~{ batch_size :,} chars' ,
1104+ level = 'info'
1105+ )
1106+
1107+ # Process each batch
1108+ batch_results = []
1109+ for batch_idx in range (num_batches ):
1110+ batch_start = batch_idx * batch_size
1111+ batch_end = min ((batch_idx + 1 ) * batch_size , len (content ))
1112+ batch_content = content [batch_start :batch_end ]
1113+
1114+ _cloud_agent_log (
1115+ f'[CloudAgent] 📦 Processing batch { batch_idx + 1 } /{ num_batches } : '
1116+ f'chars { batch_start :,} -{ batch_end :,} ({ len (batch_content ):,} chars)' ,
1117+ level = 'info'
1118+ )
1119+
1120+ # Extract from this batch (reuse extraction logic below)
1121+ # We'll set a flag to indicate batched mode
1122+ try :
1123+ # TODO: Process batch and collect results
1124+ # For now, fall back to normal truncation
1125+ _cloud_agent_log (
1126+ f'[CloudAgent] ⚠️ Batched extraction not fully implemented yet, '
1127+ f'falling back to truncation' ,
1128+ level = 'warning'
1129+ )
1130+ use_batching = False
1131+ break
1132+ except Exception as batch_err :
1133+ _cloud_agent_log (
1134+ f'[CloudAgent] ❌ Batch { batch_idx + 1 } failed: { batch_err } , '
1135+ f'falling back to truncation' ,
1136+ level = 'error'
1137+ )
1138+ use_batching = False
1139+ break
1140+
1141+ # --- Standard Truncation (if not using batching) ---
10181142 truncated = False
10191143 next_start = None
1020- if len (content ) > MAX_CHAR_LIMIT :
1144+ if not use_batching and len (content ) > MAX_CHAR_LIMIT :
10211145 truncate_at = MAX_CHAR_LIMIT
10221146 paragraph_break = content .rfind ('\n \n ' , MAX_CHAR_LIMIT - 500 , MAX_CHAR_LIMIT )
10231147 if paragraph_break > 0 :
@@ -1080,12 +1204,48 @@ async def _cloud_extract_llm(
10801204 f'<webpage_content>\n { content } \n </webpage_content>'
10811205 )
10821206
1207+ # Log detailed token usage for debugging with auto-detection
1208+ from agent .ec_skills .browser_use_extension .token_utils import estimate_tokens_auto
1209+
1210+ system_chars = len (system_prompt )
1211+ prompt_chars = len (prompt )
1212+ total_chars = system_chars + prompt_chars
1213+ # Use auto-detection for accurate token estimation
1214+ estimated_tokens = estimate_tokens_auto (system_prompt + prompt )
1215+ _cloud_agent_log (
1216+ f'[CloudAgent] 📤 LLM Request: system={ system_chars } chars, '
1217+ f'prompt={ prompt_chars } chars (content={ len (content )} chars), '
1218+ f'total={ total_chars } chars ≈ { estimated_tokens } tokens' ,
1219+ level = 'info'
1220+ )
1221+
1222+ # Calculate dynamic timeout based on content size and type
1223+ base_timeout = 60.0
1224+ if len (content ) < 10000 :
1225+ size_multiplier = 1.0
1226+ elif len (content ) < 50000 :
1227+ size_multiplier = 1.5
1228+ elif len (content ) < 100000 :
1229+ size_multiplier = 2.0
1230+ else :
1231+ size_multiplier = 3.0
1232+
1233+ # CJK content needs more processing time
1234+ type_multiplier = 1.2 if content_type == 'cjk' else 1.0
1235+
1236+ timeout_seconds = min (base_timeout * size_multiplier * type_multiplier , 300.0 )
1237+ _cloud_agent_log (
1238+ f'[CloudAgent] Extract timeout: { timeout_seconds :.1f} s '
1239+ f'(content: { len (content )} chars, type: { content_type } )' ,
1240+ level = 'info'
1241+ )
1242+
10831243 response = await asyncio .wait_for (
10841244 page_extraction_llm .ainvoke (
10851245 [SystemMessage (content = system_prompt ), UserMessage (content = prompt )],
10861246 output_format = structured_model ,
10871247 ),
1088- timeout = 120.0 ,
1248+ timeout = timeout_seconds ,
10891249 )
10901250
10911251 result_data = response .completion .model_dump (mode = 'json' )
@@ -1143,11 +1303,44 @@ async def _cloud_extract_llm(
11431303 f'<webpage_content>\n { content } \n </webpage_content>'
11441304 )
11451305
1306+ # Log detailed token usage for debugging with auto-detection
1307+ system_chars = len (system_prompt )
1308+ prompt_chars = len (prompt )
1309+ total_chars = system_chars + prompt_chars
1310+ # Use auto-detection for accurate token estimation
1311+ from agent .ec_skills .browser_use_extension .token_utils import estimate_tokens_auto
1312+ estimated_tokens = estimate_tokens_auto (system_prompt + prompt )
1313+ _cloud_agent_log (
1314+ f'[CloudAgent] 📤 LLM Request (free-text): system={ system_chars } chars, '
1315+ f'prompt={ prompt_chars } chars (content={ len (content )} chars), '
1316+ f'total={ total_chars } chars ≈ { estimated_tokens } tokens' ,
1317+ level = 'info'
1318+ )
1319+
1320+ # Calculate dynamic timeout (same logic as structured extraction)
1321+ base_timeout = 60.0
1322+ if len (content ) < 10000 :
1323+ size_multiplier = 1.0
1324+ elif len (content ) < 50000 :
1325+ size_multiplier = 1.5
1326+ elif len (content ) < 100000 :
1327+ size_multiplier = 2.0
1328+ else :
1329+ size_multiplier = 3.0
1330+
1331+ type_multiplier = 1.2 if content_type == 'cjk' else 1.0
1332+ timeout_seconds = min (base_timeout * size_multiplier * type_multiplier , 300.0 )
1333+ _cloud_agent_log (
1334+ f'[CloudAgent] Extract timeout: { timeout_seconds :.1f} s '
1335+ f'(content: { len (content )} chars, type: { content_type } )' ,
1336+ level = 'info'
1337+ )
1338+
11461339 response = await asyncio .wait_for (
11471340 page_extraction_llm .ainvoke (
1148- [SystemMessage (content = system_prompt ), UserMessage (content = prompt )]
1341+ [SystemMessage (content = system_prompt ), UserMessage (content = prompt )],
11491342 ),
1150- timeout = 120.0 ,
1343+ timeout = timeout_seconds ,
11511344 )
11521345
11531346 extracted_content = (
0 commit comments