@@ -173,46 +173,59 @@ def __init__(self, rank: int, config: ProfilerConfig, tool_config: NPUToolConfig
173173 config = ProfilerConfig (ranks = [], enable = False )
174174 if not tool_config :
175175 assert not config .enable , "tool_config must be set when profiler is enabled"
176- self .enable : bool = config .enable
177- if not config .enable :
178- return
179- self .this_step : bool = False
180176 self .discrete : bool = tool_config .discrete
181- self .this_rank : bool = False
182- self .profile_npu = None
177+ self .e2e_profile_npu = None
178+ self .capture_profiler_npu = None
183179 self .profile_contents = tool_config .contents
184180 self .profile_level = tool_config .level
185181 self .profile_save_path = config .save_path
186182 self .analysis = tool_config .analysis
187- if config .all_ranks :
188- self .this_rank = True
189- elif config .ranks :
190- self .this_rank = rank in config .ranks
191-
192- def start (self , ** kwargs ):
193- role , profile_step = kwargs .get ("role" , None ), kwargs .get ("profile_step" , None )
194- profile_step = str (profile_step ) if profile_step is not None else None
195- if self .enable and self .this_rank :
196- self .this_step = True
197- if not self .discrete and NPUProfiler ._define_count == 0 :
198- self .profile_npu = get_npu_profiler (
199- contents = self .profile_contents ,
200- profile_level = self .profile_level ,
201- profile_save_path = self .profile_save_path ,
202- analysis = self .analysis ,
203- role = role ,
204- profile_step = profile_step ,
205- )
206- self .profile_npu .start ()
207- NPUProfiler ._define_count += 1
208-
209- def stop (self ):
210- if self .enable and self .this_rank :
211- self .this_step = False
212- if not self .discrete and NPUProfiler ._define_count == 1 :
213- self .profile_npu .step ()
214- self .profile_npu .stop ()
215- NPUProfiler ._define_count -= 1
183+
184+ def start_e2e_profiler (self , ** kwargs ):
185+ role = kwargs .get ("role" , None )
186+ if not self .discrete and NPUProfiler ._define_count == 0 :
187+ self .profile_npu = get_npu_profiler (
188+ contents = self .profile_contents ,
189+ profile_level = self .profile_level ,
190+ profile_save_path = self .profile_save_path ,
191+ analysis = self .analysis ,
192+ role = role ,
193+ )
194+ self .profile_npu .start ()
195+ NPUProfiler ._define_count += 1
196+
197+ def stop_e2e_profiler (self ):
198+ if not self .discrete and NPUProfiler ._define_count == 1 :
199+ self .profile_npu .step ()
200+ self .profile_npu .stop ()
201+ NPUProfiler ._define_count -= 1
202+
203+ def start_capture_profiler (self , ** kwargs ):
204+ """Start an on-demand profiling segment."""
205+ role = kwargs .get ("role" , "" )
206+
207+ if self .discrete :
208+ self .capture_profiler_npu = get_npu_profiler (
209+ contents = self .profile_contents ,
210+ profile_level = self .profile_level ,
211+ profile_save_path = self .profile_save_path ,
212+ analysis = self .analysis ,
213+ role = role ,
214+ )
215+ self .capture_profiler_npu .start ()
216+
217+ self ._capture_range_id = mark_start_range (message = role )
218+
219+ def stop_capture_profiler (self ):
220+ """Stop the on-demand profiling segment."""
221+ if hasattr (self , "_capture_range_id" ):
222+ mark_end_range (self ._capture_range_id )
223+ del self ._capture_range_id
224+
225+ if self .discrete and getattr (self , "capture_profiler_npu" , None ):
226+ self .capture_profiler_npu .step ()
227+ self .capture_profiler_npu .stop ()
228+ del self .capture_profiler_npu
216229
217230 def annotate (self , message : Optional [str ] = None , role : Optional [str ] = None , ** kwargs_outer ) -> Callable :
218231 """Decorate a Worker member function to profile the current rank in the current training step.
@@ -230,42 +243,33 @@ def annotate(self, message: Optional[str] = None, role: Optional[str] = None, **
230243 def decorator (func ):
231244 @functools .wraps (func )
232245 def wrapper (* args , ** kwargs_inner ):
233- if not self .enable :
234- return func (* args , ** kwargs_inner )
235-
236246 profile_name = message or func .__name__
237247 discrete_mode = self .discrete
238- profile_enable = self .this_step and self .enable
239-
240- if not profile_enable :
241- return func (* args , ** kwargs_inner )
242-
243- if profile_enable :
244- if not discrete_mode :
245- mark_range = mark_start_range (message = profile_name )
246- else :
247- profile_npu = get_npu_profiler (
248- contents = self .profile_contents ,
249- profile_level = self .profile_level ,
250- profile_save_path = self .profile_save_path ,
251- analysis = self .analysis ,
252- role = role ,
253- )
254- profile_npu .start ()
255- mark_range = mark_start_range (message = profile_name )
248+
249+ if not discrete_mode :
250+ mark_range = mark_start_range (message = profile_name )
251+ else :
252+ profile_npu = get_npu_profiler (
253+ contents = self .profile_contents ,
254+ profile_level = self .profile_level ,
255+ profile_save_path = self .profile_save_path ,
256+ analysis = self .analysis ,
257+ role = role ,
258+ )
259+ profile_npu .start ()
260+ mark_range = mark_start_range (message = profile_name )
256261
257262 result = func (* args , ** kwargs_inner )
258263
259- if profile_enable :
260- if not discrete_mode :
261- mark_end_range (mark_range )
262- else :
263- mark_end_range (mark_range )
264- profile_npu .step ()
265- profile_npu .stop ()
264+ if not discrete_mode :
265+ mark_end_range (mark_range )
266+ else :
267+ mark_end_range (mark_range )
268+ profile_npu .step ()
269+ profile_npu .stop ()
266270
267271 return result
268272
269273 return wrapper
270274
271- return decorator
275+ return decorator
0 commit comments