@@ -173,46 +173,58 @@ def __init__(self, rank: int, config: ProfilerConfig, tool_config: NPUToolConfig
173173 config = ProfilerConfig (ranks = [], enable = False )
174174 if not tool_config :
175175 assert not config .enable , "tool_config must be set when profiler is enabled"
176- self .enable : bool = config .enable
177- if not config .enable :
178- return
179- self .this_step : bool = False
180176 self .discrete : bool = tool_config .discrete
181- self .this_rank : bool = False
182- self .profile_npu = None
177+ self .e2e_profile_npu = None
183178 self .profile_contents = tool_config .contents
184179 self .profile_level = tool_config .level
185180 self .profile_save_path = config .save_path
186181 self .analysis = tool_config .analysis
187- if config .all_ranks :
188- self .this_rank = True
189- elif config .ranks :
190- self .this_rank = rank in config .ranks
191-
192- def start (self , ** kwargs ):
193- role , profile_step = kwargs .get ("role" , None ), kwargs .get ("profile_step" , None )
194- profile_step = str (profile_step ) if profile_step is not None else None
195- if self .enable and self .this_rank :
196- self .this_step = True
197- if not self .discrete and NPUProfiler ._define_count == 0 :
198- self .profile_npu = get_npu_profiler (
199- contents = self .profile_contents ,
200- profile_level = self .profile_level ,
201- profile_save_path = self .profile_save_path ,
202- analysis = self .analysis ,
203- role = role ,
204- profile_step = profile_step ,
205- )
206- self .profile_npu .start ()
207- NPUProfiler ._define_count += 1
208-
209- def stop (self ):
210- if self .enable and self .this_rank :
211- self .this_step = False
212- if not self .discrete and NPUProfiler ._define_count == 1 :
213- self .profile_npu .step ()
214- self .profile_npu .stop ()
215- NPUProfiler ._define_count -= 1
182+
183+ def start_e2e_profiler (self , ** kwargs ):
184+ role = kwargs .get ("role" , None )
185+ if not self .discrete and NPUProfiler ._define_count == 0 :
186+ self .e2e_profile_npu = get_npu_profiler (
187+ contents = self .profile_contents ,
188+ profile_level = self .profile_level ,
189+ profile_save_path = self .profile_save_path ,
190+ analysis = self .analysis ,
191+ role = role ,
192+ )
193+ self .e2e_profile_npu .start ()
194+ NPUProfiler ._define_count += 1
195+
196+ def stop_e2e_profiler (self ):
197+ if not self .discrete and NPUProfiler ._define_count == 1 :
198+ self .e2e_profile_npu .step ()
199+ self .e2e_profile_npu .stop ()
200+ NPUProfiler ._define_count -= 1
201+
202+ def start_capture_profiler (self , ** kwargs ):
203+ """Start an on-demand profiling segment."""
204+ role = kwargs .get ("role" , "" )
205+
206+ if self .discrete :
207+ self .capture_profiler_npu = get_npu_profiler (
208+ contents = self .profile_contents ,
209+ profile_level = self .profile_level ,
210+ profile_save_path = self .profile_save_path ,
211+ analysis = self .analysis ,
212+ role = role ,
213+ )
214+ self .capture_profiler_npu .start ()
215+
216+ self ._capture_range_id = mark_start_range (message = role )
217+
218+ def stop_capture_profiler (self ):
219+ """Stop the on-demand profiling segment."""
220+ if hasattr (self , "_capture_range_id" ):
221+ mark_end_range (self ._capture_range_id )
222+ del self ._capture_range_id
223+
224+ if self .discrete and getattr (self , "capture_profiler_npu" , None ):
225+ self .capture_profiler_npu .step ()
226+ self .capture_profiler_npu .stop ()
227+ del self .capture_profiler_npu
216228
217229 def annotate (self , message : Optional [str ] = None , role : Optional [str ] = None , ** kwargs_outer ) -> Callable :
218230 """Decorate a Worker member function to profile the current rank in the current training step.
@@ -230,42 +242,33 @@ def annotate(self, message: Optional[str] = None, role: Optional[str] = None, **
230242 def decorator (func ):
231243 @functools .wraps (func )
232244 def wrapper (* args , ** kwargs_inner ):
233- if not self .enable :
234- return func (* args , ** kwargs_inner )
235-
236245 profile_name = message or func .__name__
237246 discrete_mode = self .discrete
238- profile_enable = self .this_step and self .enable
239-
240- if not profile_enable :
241- return func (* args , ** kwargs_inner )
242-
243- if profile_enable :
244- if not discrete_mode :
245- mark_range = mark_start_range (message = profile_name )
246- else :
247- profile_npu = get_npu_profiler (
248- contents = self .profile_contents ,
249- profile_level = self .profile_level ,
250- profile_save_path = self .profile_save_path ,
251- analysis = self .analysis ,
252- role = role ,
253- )
254- profile_npu .start ()
255- mark_range = mark_start_range (message = profile_name )
247+
248+ if not discrete_mode :
249+ mark_range = mark_start_range (message = profile_name )
250+ else :
251+ profile_npu = get_npu_profiler (
252+ contents = self .profile_contents ,
253+ profile_level = self .profile_level ,
254+ profile_save_path = self .profile_save_path ,
255+ analysis = self .analysis ,
256+ role = role ,
257+ )
258+ profile_npu .start ()
259+ mark_range = mark_start_range (message = profile_name )
256260
257261 result = func (* args , ** kwargs_inner )
258262
259- if profile_enable :
260- if not discrete_mode :
261- mark_end_range (mark_range )
262- else :
263- mark_end_range (mark_range )
264- profile_npu .step ()
265- profile_npu .stop ()
263+ if not discrete_mode :
264+ mark_end_range (mark_range )
265+ else :
266+ mark_end_range (mark_range )
267+ profile_npu .step ()
268+ profile_npu .stop ()
266269
267270 return result
268271
269272 return wrapper
270273
271- return decorator
274+ return decorator
0 commit comments