@@ -54,7 +54,7 @@ def __init__(self):
5454
5555 self ._inspector = None
5656 self ._inspector_last_time = 0
57- self ._inspector_ttl = 60 # Inspector cache time in seconds
57+ self ._inspector_ttl = 300 # 5 minutes - inspector is expensive to create (ping all workers)
5858 self ._inspector_lock = None
5959 self ._inspector_lock = threading .Lock ()
6060
@@ -105,7 +105,7 @@ async def stop(self):
105105 logger .info ("Data processing service stopped" )
106106
107107 def _get_celery_inspector (self ):
108- """Get Celery inspector"""
108+ """Get Celery inspector (cached for performance) """
109109 with self ._inspector_lock :
110110 now = time .time ()
111111 if self ._inspector and now - self ._inspector_last_time < self ._inspector_ttl :
@@ -117,9 +117,9 @@ def _get_celery_inspector(self):
117117 f"Celery broker URL is not configured properly, reconfiguring to { celery_app .conf .broker_url } " )
118118 try :
119119 inspector = celery_app .control .inspect ()
120- inspector .ping ()
121120 self ._inspector = inspector
122121 self ._inspector_last_time = now
122+ self ._inspector_init_time = now
123123 return inspector
124124 except Exception as e :
125125 self ._inspector = None
@@ -142,11 +142,9 @@ async def get_all_tasks(self, filter: bool = True) -> List[Dict[str, Any]]:
142142 all_tasks = []
143143 try :
144144 start_time = time .time ()
145- logger .debug (
146- "Getting inspector to check for active and reserved tasks (concurrent)" )
145+ inspector_start = time .time ()
147146 inspector = self ._get_celery_inspector ()
148- logger .debug (
149- f"⏰ Inspector initialization took { time .time () - start_time } s" )
147+ inspector_duration = time .time () - inspector_start
150148
151149 # Collect task IDs from different sources and keep runtime metadata
152150 task_ids = set ()
@@ -171,18 +169,37 @@ def _normalize_runtime_meta(task: Dict[str, Any]) -> Dict[str, Any]:
171169 'original_filename' : kwargs .get ('original_filename' , '' ),
172170 }
173171
172+ celery_start = time .time ()
173+
174+ # Use short timeout for inspector since workers can respond in ~0.1s
175+ # Default 1s timeout is unnecessary and causes delay
176+ short_timeout = 0.2
177+
174178 def get_active ():
175- return inspector .active ()
179+ t = time .time ()
180+ # Create fresh inspector with short timeout for each call
181+ short_inspector = celery_app .control .inspect (timeout = short_timeout )
182+ result = short_inspector .active ()
183+ elapsed = time .time () - t
184+ logger .info (f"[get_all_tasks] inspector.active() took { elapsed :.3f} s" )
185+ return result if result else {}
176186
177187 def get_reserved ():
178- return inspector .reserved ()
188+ t = time .time ()
189+ short_inspector = celery_app .control .inspect (timeout = short_timeout )
190+ result = short_inspector .reserved ()
191+ elapsed = time .time () - t
192+ logger .info (f"[get_all_tasks] inspector.reserved() took { elapsed :.3f} s" )
193+ return result if result else {}
194+
179195 with concurrent .futures .ThreadPoolExecutor (max_workers = 2 ) as executor :
180196 future_active = executor .submit (get_active )
181197 future_reserved = executor .submit (get_reserved )
182- active_tasks_dict = future_active .result ()
183- reserved_tasks_dict = future_reserved .result ()
184- logger .debug (
185- f"⏰ Get active and reserved tasks (concurrent) took { time .time () - start_time } s" )
198+ active_tasks_dict = future_active .result (timeout = short_timeout + 0.5 )
199+ reserved_tasks_dict = future_reserved .result (timeout = short_timeout + 0.5 )
200+ celery_duration = time .time () - celery_start
201+ if celery_duration > 0.5 :
202+ logger .warning (f"[get_all_tasks] Inspector took { celery_duration :.3f} s (expected <0.5s)" )
186203 if active_tasks_dict :
187204 for worker , tasks in active_tasks_dict .items ():
188205 for task in tasks :
@@ -199,23 +216,17 @@ def get_reserved():
199216 # Keep active metadata if already present
200217 runtime_task_meta .setdefault (task_id , _normalize_runtime_meta (task ))
201218
202- # Currently, we don't have scheduled tasks, so skip getting scheduled tasks here
203- start_time = time .time ()
204- logger .debug ("Getting task IDs from Redis backend" )
205- # Also get task IDs from Redis backend (covers completed/failed tasks within expiry)
219+ # Get task IDs from Redis backend (covers completed/failed tasks within expiry)
206220 try :
207221 redis_task_ids = get_all_task_ids_from_redis (self .redis_client )
208- logger .debug (
209- f"⏰ Get Redis task IDs took { time .time () - start_time } s" )
210222 for task_id in redis_task_ids :
211- # Add to the set, duplicates will be handled
212223 task_ids .add (task_id )
213224 except Exception as redis_error :
214225 logger .warning (
215226 f"Failed to query Redis for stored task IDs: { str (redis_error )} " )
216- logger .debug (
217- f"Total unique task IDs collected (inspector + Redis): { len (task_ids )} " )
227+
218228 task_id_list = list (task_ids )
229+ # Batch fetch all task info
219230 tasks = [get_task_info (task_id ) for task_id in task_id_list ]
220231 all_task_infos = await asyncio .gather (* tasks , return_exceptions = True )
221232 for idx , task_info in enumerate (all_task_infos ):
@@ -243,7 +254,6 @@ def get_reserved():
243254 if not task_info .get ('index_name' ):
244255 continue
245256 all_tasks .append (task_info )
246- logger .debug (f"Retrieved { len (all_tasks )} tasks." )
247257 except Exception as e :
248258 logger .error (f"Error retrieving all tasks: { str (e )} " )
249259 all_tasks = []
0 commit comments