3232from ....optimization .physical import optimize
3333from ....serialization import AioSerializer
3434from ....typing import BandType , ChunkType
35- from ....utils import get_chunk_key_to_data_keys , calc_data_size
35+ from ....utils import get_chunk_key_to_data_keys , calc_data_size , Timer
3636from ...context import ThreadedServiceContext
3737from ...meta .api import MetaAPI , WorkerMetaAPI
3838from ...session import SessionAPI
3939from ...storage import StorageAPI
4040from ...task import TaskAPI , task_options
41+ from ...task .task_info_collector import TaskInfoCollector
4142from ..core import Subtask , SubtaskStatus , SubtaskResult
4243from ..utils import iter_input_data_keys , iter_output_data , get_mapper_data_keys
4344
@@ -77,6 +78,7 @@ def __init__(
7778 meta_api : MetaAPI ,
7879 worker_meta_api : WorkerMetaAPI ,
7980 band : BandType ,
81+ slot_id : int ,
8082 supervisor_address : str ,
8183 engines : List [str ] = None ,
8284 ):
@@ -91,6 +93,7 @@ def __init__(
9193 ]
9294 )
9395 self ._band = band
96+ self ._slot_id = slot_id
9497 self ._supervisor_address = supervisor_address
9598 self ._engines = engines if engines is not None else task_options .runtime_engines
9699
@@ -120,6 +123,10 @@ def __init__(
120123 self ._storage_api = storage_api
121124 self ._meta_api = meta_api
122125 self ._worker_meta_api = worker_meta_api
126+ collect_task_info = subtask .extra_config and subtask .extra_config .get (
127+ "collect_task_info" , False
128+ )
129+ self ._task_info_collector = TaskInfoCollector (self ._band [0 ], collect_task_info )
123130
124131 # add metrics
125132 self ._subtask_execution_time = Metrics .gauge (
@@ -234,35 +241,37 @@ def cb(fut):
234241 to_wait .set_result (fut .result ())
235242
236243 future .add_done_callback (cb )
237-
238- try :
239- await to_wait
240- logger .debug (
241- "Finish executing operand: %s, chunk: %s, subtask id: %s" ,
242- chunk .op ,
243- chunk ,
244- self .subtask .subtask_id ,
245- )
246- except asyncio .CancelledError :
247- logger .debug (
248- "Receive cancel instruction for operand: %s,"
249- "chunk: %s, subtask id: %s" ,
250- chunk .op ,
251- chunk ,
252- self .subtask .subtask_id ,
253- )
254- # wait for this computation to finish
255- await future
256- # if cancelled, stop next computation
257- logger .debug (
258- "Cancelled operand: %s, chunk: %s, subtask id: %s" ,
259- chunk .op ,
260- chunk ,
261- self .subtask .subtask_id ,
262- )
263- self .result .status = SubtaskStatus .cancelled
264- raise
265-
244+ with Timer () as timer :
245+ try :
246+ await to_wait
247+ logger .debug (
248+ "Finish executing operand: %s, chunk: %s, subtask id: %s" ,
249+ chunk .op ,
250+ chunk ,
251+ self .subtask .subtask_id ,
252+ )
253+ except asyncio .CancelledError : # pragma: no cover
254+ logger .debug (
255+ "Receive cancel instruction for operand: %s,"
256+ "chunk: %s, subtask id: %s" ,
257+ chunk .op ,
258+ chunk ,
259+ self .subtask .subtask_id ,
260+ )
261+ # wait for this computation to finish
262+ await future
263+ # if cancelled, stop next computation
264+ logger .debug (
265+ "Cancelled operand: %s, chunk: %s, subtask id: %s" ,
266+ chunk .op ,
267+ chunk ,
268+ self .subtask .subtask_id ,
269+ )
270+ self .result .status = SubtaskStatus .cancelled
271+ raise
272+ await self ._task_info_collector .collect_runtime_operand_info (
273+ self .subtask , timer .duration , chunk , self ._processor_context
274+ )
266275 self .set_op_progress (chunk .op .key , 1.0 )
267276
268277 for inp in chunk_graph .iter_predecessors (chunk ):
@@ -541,6 +550,7 @@ async def done(self):
541550 self .is_done .set ()
542551
543552 async def run (self ):
553+ cost_times = defaultdict (dict )
544554 self .result .status = SubtaskStatus .running
545555 input_keys = None
546556 unpinned = False
@@ -561,29 +571,49 @@ async def run(self):
561571 }
562572
563573 # load inputs data
574+ cost_times ["load_data_time" ]["start_time" ] = time .time ()
564575 input_keys = await self ._load_input_data ()
576+ cost_times ["load_data_time" ]["end_time" ] = time .time ()
565577 try :
566578 # execute chunk graph
579+ cost_times ["execute_time" ]["start_time" ] = time .time ()
567580 await self ._execute_graph (chunk_graph )
581+ cost_times ["execute_time" ]["end_time" ] = time .time ()
568582 finally :
569583 # unpin inputs data
570584 unpinned = True
585+ cost_times ["unpin_time" ]["start_time" ] = time .time ()
571586 await self ._unpin_data (input_keys )
587+ cost_times ["unpin_time" ]["end_time" ] = time .time ()
572588 # store results data
589+ cost_times ["store_result_time" ]["start_time" ] = time .time ()
573590 (
574591 stored_keys ,
575592 store_sizes ,
576593 memory_sizes ,
577594 data_key_to_object_id ,
578595 ) = await self ._store_data (chunk_graph )
596+ cost_times ["store_result_time" ]["end_time" ] = time .time ()
579597 # store meta
598+ cost_times ["store_meta_time" ]["start_time" ] = time .time ()
580599 await self ._store_meta (
581600 chunk_graph ,
582601 store_sizes ,
583602 memory_sizes ,
584603 data_key_to_object_id ,
585604 update_meta_chunks ,
586605 )
606+ cost_times ["store_meta_time" ]["end_time" ] = time .time ()
607+ await self ._task_info_collector .collect_runtime_subtask_info (
608+ self .subtask ,
609+ self ._band ,
610+ self ._slot_id ,
611+ stored_keys ,
612+ store_sizes ,
613+ memory_sizes ,
614+ cost_times ,
615+ )
616+
587617 except asyncio .CancelledError :
588618 self .result .status = SubtaskStatus .cancelled
589619 self .result .progress = 1.0
@@ -708,7 +738,7 @@ async def _init_context(self, session_id: str) -> ThreadedServiceContext:
708738 await context .init ()
709739 return context
710740
711- async def run (self , subtask : Subtask ):
741+ async def run (self , subtask : Subtask , slot_id : int ):
712742 logger .info (
713743 "Start to run subtask: %r on %s. chunk graph contains %s" ,
714744 subtask ,
@@ -728,6 +758,7 @@ async def run(self, subtask: Subtask):
728758 self ._meta_api ,
729759 self ._worker_meta_api ,
730760 self ._band ,
761+ slot_id ,
731762 self ._supervisor_address ,
732763 )
733764 self ._processor = self ._last_processor = processor
0 commit comments