8080 )
8181except Exception :
8282 _CDP_EVALUATE_RECOVERY_THRESHOLD = 2
83+ try :
84+ _FEIGE_CDP_EVALUATE_RECOVERY_THRESHOLD = max (
85+ 0 , int (os .getenv ("ECAN_FEIGE_CDP_EVALUATE_RECOVERY_THRESHOLD" , "1" ))
86+ )
87+ except Exception :
88+ _FEIGE_CDP_EVALUATE_RECOVERY_THRESHOLD = 1
89+ try :
90+ _FEIGE_CDP_HEALTH_COOLDOWN_S = max (
91+ 0.0 , float (os .getenv ("ECAN_FEIGE_CDP_HEALTH_COOLDOWN_S" , "25.0" ))
92+ )
93+ except Exception :
94+ _FEIGE_CDP_HEALTH_COOLDOWN_S = 25.0
8395_CDP_EVALUATE_TIMEOUT_RECOVERY_LOCK = threading .Lock ()
8496_CDP_EVALUATE_TIMEOUT_RECOVERY : Dict [int , int ] = {}
8597_FEIGE_SEND_CDP_TIMEOUT_LOCK = threading .Lock ()
8698_FEIGE_SEND_CDP_TIMEOUT_UNTIL = 0.0
99+ _FEIGE_CDP_HEALTH_LOCK = threading .Lock ()
100+ _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL = 0.0
101+ _FEIGE_CDP_HEALTH_REASON = ""
87102from agent .ec_skills .label_utils .print_label import (
88103 print_labels_async ,
89104 reformat_labels_async ,
@@ -1006,13 +1021,54 @@ def _record_feige_send_cdp_success() -> None:
10061021 _FEIGE_SEND_CDP_TIMEOUT_UNTIL = 0.0
10071022
10081023
1024+ def feige_cdp_health_cooldown_remaining () -> float :
1025+ now = _time .monotonic ()
1026+ with _FEIGE_CDP_HEALTH_LOCK :
1027+ remaining = _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL - now
1028+ return remaining if remaining > 0.0 else 0.0
1029+
1030+
1031+ def mark_feige_cdp_unhealthy (reason : str = "" , * , cooldown_s : float | None = None ) -> float :
1032+ global _FEIGE_CDP_HEALTH_REASON
1033+ global _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL
1034+ cooldown = _FEIGE_CDP_HEALTH_COOLDOWN_S if cooldown_s is None else max (0.0 , float (cooldown_s ))
1035+ if cooldown <= 0.0 :
1036+ return 0.0
1037+ now = _time .monotonic ()
1038+ until = now + cooldown
1039+ with _FEIGE_CDP_HEALTH_LOCK :
1040+ _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL = max (_FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL , until )
1041+ if reason :
1042+ _FEIGE_CDP_HEALTH_REASON = str (reason )
1043+ remaining = _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL - now
1044+ logger .warning (
1045+ f"[Feige] CDP health cooldown active for { remaining :.1f} s "
1046+ f"reason={ _FEIGE_CDP_HEALTH_REASON !r} "
1047+ )
1048+ return remaining if remaining > 0.0 else 0.0
1049+
1050+
1051+ def mark_feige_cdp_healthy () -> None :
1052+ global _FEIGE_CDP_HEALTH_REASON
1053+ global _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL
1054+ with _FEIGE_CDP_HEALTH_LOCK :
1055+ _FEIGE_CDP_HEALTH_UNHEALTHY_UNTIL = 0.0
1056+ _FEIGE_CDP_HEALTH_REASON = ""
1057+
1058+
10091059def _record_cdp_evaluate_recovery_signal (browser_session : Any , trace_label : str , phase : str ) -> None :
1010- if _CDP_EVALUATE_RECOVERY_THRESHOLD <= 0 or browser_session is None :
1060+ label = str (trace_label or "" )
1061+ threshold = (
1062+ _FEIGE_CDP_EVALUATE_RECOVERY_THRESHOLD
1063+ if label .startswith ("feige_" )
1064+ else _CDP_EVALUATE_RECOVERY_THRESHOLD
1065+ )
1066+ if threshold <= 0 or browser_session is None :
10111067 return
10121068 session_key = id (browser_session )
10131069 with _CDP_EVALUATE_TIMEOUT_RECOVERY_LOCK :
10141070 count = _CDP_EVALUATE_TIMEOUT_RECOVERY .get (session_key , 0 ) + 1
1015- if count < _CDP_EVALUATE_RECOVERY_THRESHOLD :
1071+ if count < threshold :
10161072 _CDP_EVALUATE_TIMEOUT_RECOVERY [session_key ] = count
10171073 return
10181074 _CDP_EVALUATE_TIMEOUT_RECOVERY .pop (session_key , None )
@@ -1341,6 +1397,10 @@ async def _run_with_optional_operation_lock() -> Any:
13411397 timings ["pending_pruned_on_timeout" ] = _prune_cdp_pending_requests (
13421398 cdp_client_ref
13431399 )
1400+ if str (trace_label or "" ).startswith ("feige_" ):
1401+ mark_feige_cdp_unhealthy (
1402+ f"{ trace_label or 'feige' } :{ current_phase } :timeout"
1403+ )
13441404 _record_cdp_evaluate_recovery_signal (browser_session , trace_label , current_phase )
13451405 _emit_trace (
13461406 ok = False ,
@@ -1354,6 +1414,8 @@ async def _run_with_optional_operation_lock() -> Any:
13541414 except Exception as exc :
13551415 _emit_trace (ok = False , timed_out = False , error = str (exc ))
13561416 raise
1417+ if str (trace_label or "" ).startswith ("feige_" ):
1418+ mark_feige_cdp_healthy ()
13571419 _emit_trace (ok = True , timed_out = False )
13581420 value = result .get ("result" , {}).get ("value" , "" )
13591421 if isinstance (value , str ):
@@ -3259,6 +3321,19 @@ async def feige_list_sessions(params: FeigeListSessionsAction, browser_session:
32593321)
32603322async def feige_open_session (params : FeigeOpenSessionAction , browser_session : BrowserSession ) -> ActionResult :
32613323 try :
3324+ cooldown_remaining = feige_cdp_health_cooldown_remaining ()
3325+ if cooldown_remaining > 0.0 :
3326+ logger .warning (
3327+ f"[Feige] feige_open_session: CDP health cooldown active "
3328+ f"for { cooldown_remaining :.1f} s; skipping open for "
3329+ f"{ str (params .customer_name or '' )!r} "
3330+ )
3331+ return ActionResult (
3332+ error = (
3333+ "feige_open_session: cdp_health_cooldown_active "
3334+ f"{ cooldown_remaining :.1f} s"
3335+ )
3336+ )
32623337 name_js = json .dumps (params .customer_name , ensure_ascii = False ) if params .customer_name else "null"
32633338 idx_js = str (params .session_index ) if params .session_index is not None else "-1"
32643339 js = _FEIGE_OPEN_SESSION_JS .replace ("CUSTOMER_NAME" , name_js ).replace ("SESSION_INDEX" , idx_js )
@@ -3892,10 +3967,13 @@ async def feige_send_message(params: FeigeSendMessageAction, browser_session: Br
38923967 response_preview = str (getattr (params , "text" , "" ) or "" ),
38933968 response_len = len (str (getattr (params , "text" , "" ) or "" )),
38943969 )
3895- cooldown_remaining = _feige_send_cdp_timeout_remaining ()
3970+ cooldown_remaining = max (
3971+ _feige_send_cdp_timeout_remaining (),
3972+ feige_cdp_health_cooldown_remaining (),
3973+ )
38963974 if cooldown_remaining > 0.0 :
38973975 logger .warning (
3898- f"[Feige] feige_send_message: CDP timeout cooldown active "
3976+ f"[Feige] feige_send_message: CDP cooldown active "
38993977 f"for { cooldown_remaining :.1f} s; skipping send for "
39003978 f"{ expected_customer !r} "
39013979 )
0 commit comments