1- """PG 连接池诊断:周期汇总 + 慢持连统计(默认 INFO,便于长期观测背压 )。"""
1+ """PG 连接池诊断:周期汇总 + 慢持连统计(健康态 DEBUG,异常态 INFO )。"""
22
33from __future__ import annotations
44
1212
1313from src .foundation .config .repo_settings import repo_env_raw_value
1414
15- _TICK_SEC = 60 .0
15+ _TICK_SEC_DEFAULT = 300 .0
1616_diag_task : asyncio .Task [None ] | None = None
1717_bound = False
1818
@@ -34,6 +34,37 @@ def session_hold_warn_ms() -> float:
3434 return 500.0
3535
3636
37+ def pool_diag_tick_sec () -> float :
38+ raw = repo_env_raw_value ("PG_POOL_DIAG_TICK_SEC" )
39+ if raw is not None :
40+ try :
41+ return max (30.0 , float (str (raw ).strip ()))
42+ except ValueError :
43+ pass
44+ return _TICK_SEC_DEFAULT
45+
46+
47+ def pool_diag_tick_notable (
48+ * ,
49+ under_pressure : bool ,
50+ idle_in_tx : int | None ,
51+ slow_sessions : int ,
52+ remote_skipped_pressure : int ,
53+ remote_skipped_busy : int ,
54+ mirror_skip : int ,
55+ learn_pool_wait : int ,
56+ ) -> bool :
57+ if under_pressure or slow_sessions > 0 :
58+ return True
59+ if idle_in_tx and idle_in_tx > 0 :
60+ return True
61+ if remote_skipped_pressure > 0 or remote_skipped_busy > 0 :
62+ return True
63+ if mirror_skip > 0 or learn_pool_wait > 0 :
64+ return True
65+ return False
66+
67+
3768def _is_ignored_caller_path (path : str ) -> bool :
3869 return path .endswith (_CALLER_SCAN_SKIP_SUFFIXES ) or "/site-packages/" in path
3970
@@ -161,7 +192,20 @@ async def emit_pool_diagnostics_tick() -> None:
161192 if not slow_top :
162193 slow_top = "-"
163194
164- logger .info (
195+ skipped_pressure = int (remote .get ("skipped_pressure" , 0 ))
196+ skipped_busy = int (remote .get ("skipped_busy" , 0 ))
197+ learn_pool_wait = int (learn .get ("learn_pool_wait_spins" , 0 ) or 0 )
198+ notable = pool_diag_tick_notable (
199+ under_pressure = bool (budget .get ("under_pressure" )),
200+ idle_in_tx = idle_tx ,
201+ slow_sessions = _slow_session_total ,
202+ remote_skipped_pressure = skipped_pressure ,
203+ remote_skipped_busy = skipped_busy ,
204+ mirror_skip = _mirror_skipped_pressure ,
205+ learn_pool_wait = learn_pool_wait ,
206+ )
207+ diag_log = logger .info if notable else logger .debug
208+ diag_log (
165209 "pg pool diag: checked_out={}/{} util={} idle_in_tx={} pg_wait=[{}] "
166210 "remote_skip_pressure={} remote_skip_busy={} mirror_skip={} "
167211 "slow_sessions={} slow_max_ms={:.0f} learn_q={} learn_pool_wait={} slow_top=[{}]" ,
@@ -170,13 +214,13 @@ async def emit_pool_diagnostics_tick() -> None:
170214 util_pct ,
171215 idle_tx if idle_tx is not None else "?" ,
172216 wait_s ,
173- remote . get ( " skipped_pressure" , 0 ) ,
174- remote . get ( " skipped_busy" , 0 ) ,
217+ skipped_pressure ,
218+ skipped_busy ,
175219 _mirror_skipped_pressure ,
176220 _slow_session_total ,
177221 _slow_hold_max_ms ,
178222 learn .get ("learn_queue_size" , "?" ),
179- learn . get ( "learn_pool_wait_spins" , 0 ) ,
223+ learn_pool_wait ,
180224 slow_top ,
181225 )
182226
@@ -195,13 +239,13 @@ async def emit_pool_diagnostics_tick() -> None:
195239
196240
197241async def pool_diagnostics_loop () -> None :
198- await asyncio .sleep (_TICK_SEC )
242+ await asyncio .sleep (pool_diag_tick_sec () )
199243 while True :
200244 try :
201245 await emit_pool_diagnostics_tick ()
202246 except Exception as e :
203247 logger .warning ("pg pool diagnostics tick failed: {}" , e )
204- await asyncio .sleep (_TICK_SEC )
248+ await asyncio .sleep (pool_diag_tick_sec () )
205249
206250
207251def bind_pg_pool_diagnostics () -> None :
@@ -228,7 +272,7 @@ def start_pg_pool_diagnostics_task() -> None:
228272 _diag_task = asyncio .create_task (pool_diagnostics_loop (), name = "pg_pool_diagnostics" )
229273 logger .debug (
230274 "pg pool diagnostics started (tick={}s, session_hold_warn={}ms)" ,
231- int (_TICK_SEC ),
275+ int (pool_diag_tick_sec () ),
232276 int (session_hold_warn_ms ()),
233277 )
234278
0 commit comments