@@ -475,6 +475,7 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
475
475
< span class ="n "> DEFAULT_PROFILER_ACTIVITIES</ span > < span class ="o "> =</ span > < span class ="p "> {</ span >
476
476
< span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> CPU</ span > < span class ="p "> ,</ span >
477
477
< span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> CUDA</ span > < span class ="p "> ,</ span >
478
+ < span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> XPU</ span > < span class ="p "> ,</ span >
478
479
< span class ="p "> }</ span >
479
480
480
481
< span class ="n "> DEFAULT_SCHEDULE</ span > < span class ="p "> :</ span > < span class ="nb "> dict</ span > < span class ="o "> =</ span > < span class ="p "> {</ span >
@@ -559,7 +560,7 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
559
560
< span class ="n "> log</ span > < span class ="o "> .</ span > < span class ="n "> info</ span > < span class ="p "> (</ span > < span class ="sa "> f</ span > < span class ="s2 "> "Finished dumping traces in </ span > < span class ="si "> {</ span > < span class ="n "> time</ span > < span class ="o "> .</ span > < span class ="n "> monotonic</ span > < span class ="p "> ()</ span > < span class ="w "> </ span > < span class ="o "> -</ span > < span class ="w "> </ span > < span class ="n "> begin</ span > < span class ="si "> :</ span > < span class ="s2 "> .2f</ span > < span class ="si "> }</ span > < span class ="s2 "> seconds"</ span > < span class ="p "> )</ span >
560
561
561
562
< span class ="c1 "> # Memory timeline sometimes fails to export</ span >
562
- < span class ="k "> if</ span > < span class ="n "> prof</ span > < span class ="o "> .</ span > < span class ="n "> profile_memory</ span > < span class ="p "> :</ span >
563
+ < span class ="k "> if</ span > < span class ="n "> prof</ span > < span class ="o "> .</ span > < span class ="n "> profile_memory</ span > < span class ="ow " > and </ span > < span class =" n " > torch </ span > < span class =" o " > . </ span > < span class =" n " > cuda </ span > < span class =" o " > . </ span > < span class =" n " > is_available </ span > < span class =" p "> () :</ span >
563
564
< span class ="k "> if</ span > < span class ="n "> rank</ span > < span class ="o "> ==</ span > < span class ="mi "> 0</ span > < span class ="p "> :</ span >
564
565
< span class ="k "> try</ span > < span class ="p "> :</ span >
565
566
< span class ="n "> prof</ span > < span class ="o "> .</ span > < span class ="n "> export_memory_timeline</ span > < span class ="p "> (</ span >
@@ -633,6 +634,7 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
633
634
< span class ="n "> enabled</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span >
634
635
< span class ="n "> cpu</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span > < span class ="p "> ,</ span >
635
636
< span class ="n "> cuda</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span > < span class ="p "> ,</ span >
637
+ < span class ="n "> xpu</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span > < span class ="p "> ,</ span >
636
638
< span class ="n "> profile_memory</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="n "> DEFAULT_TRACE_OPTS</ span > < span class ="p "> [</ span > < span class ="s2 "> "profile_memory"</ span > < span class ="p "> ],</ span >
637
639
< span class ="n "> with_stack</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="n "> DEFAULT_TRACE_OPTS</ span > < span class ="p "> [</ span > < span class ="s2 "> "with_stack"</ span > < span class ="p "> ],</ span >
638
640
< span class ="n "> record_shapes</ span > < span class ="p "> :</ span > < span class ="nb "> bool</ span > < span class ="o "> =</ span > < span class ="n "> DEFAULT_TRACE_OPTS</ span > < span class ="p "> [</ span > < span class ="s2 "> "record_shapes"</ span > < span class ="p "> ],</ span >
@@ -700,6 +702,7 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
700
702
< span class ="sd "> enabled (bool): Enable pytorch profiler. Default is False.</ span >
701
703
< span class ="sd "> cpu (bool): Enable cpu profiling. Default is True.</ span >
702
704
< span class ="sd "> cuda (bool): Enable cuda profiling. Default is True.</ span >
705
+ < span class ="sd "> xpu (bool): Enable xpu profiling. Default is True.</ span >
703
706
< span class ="sd "> profile_memory (bool): Profile memory usage. Default is False.</ span >
704
707
< span class ="sd "> with_stack (bool): Profile stack. Default is False.</ span >
705
708
< span class ="sd "> record_shapes (bool): Record shapes. Default is True.</ span >
@@ -724,10 +727,12 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
724
727
< span class ="n "> activities</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> (</ span > < span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> CPU</ span > < span class ="p "> )</ span >
725
728
< span class ="k "> if</ span > < span class ="n "> cuda</ span > < span class ="p "> :</ span >
726
729
< span class ="n "> activities</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> (</ span > < span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> CUDA</ span > < span class ="p "> )</ span >
730
+ < span class ="k "> if</ span > < span class ="n "> xpu</ span > < span class ="p "> :</ span >
731
+ < span class ="n "> activities</ span > < span class ="o "> .</ span > < span class ="n "> append</ span > < span class ="p "> (</ span > < span class ="n "> torch</ span > < span class ="o "> .</ span > < span class ="n "> profiler</ span > < span class ="o "> .</ span > < span class ="n "> ProfilerActivity</ span > < span class ="o "> .</ span > < span class ="n "> XPU</ span > < span class ="p "> )</ span >
727
732
< span class ="k "> if</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> activities</ span > < span class ="p "> )</ span > < span class ="o "> ==</ span > < span class ="mi "> 0</ span > < span class ="p "> :</ span >
728
733
< span class ="n "> _warn</ span > < span class ="p "> (</ span > < span class ="s2 "> "No activities specified, defaulting to CPU + CUDA"</ span > < span class ="p "> )</ span >
729
734
< span class ="n "> activities</ span > < span class ="o "> =</ span > < span class ="n "> DEFAULT_PROFILER_ACTIVITIES</ span >
730
- < span class ="n "> cpu</ span > < span class ="o "> =</ span > < span class ="n "> cuda</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span >
735
+ < span class ="n "> cpu</ span > < span class ="o "> =</ span > < span class ="n "> cuda</ span > < span class ="o "> =</ span > < span class ="n " > xpu </ span > < span class =" o " > = </ span > < span class =" kc "> True</ span >
731
736
732
737
< span class ="c1 "> # Check for schedule</ span >
733
738
< span class ="c1 "> # 1) If no schedule is provided, set to DEFAULT_SCHEDULE</ span >
@@ -820,6 +825,7 @@ <h1>Source code for torchtune.training._profiler</h1><div class="highlight"><pre
820
825
< span class ="s2 "> "output_dir"</ span > < span class ="p "> :</ span > < span class ="n "> output_dir</ span > < span class ="p "> ,</ span >
821
826
< span class ="s2 "> "cpu"</ span > < span class ="p "> :</ span > < span class ="n "> cpu</ span > < span class ="p "> ,</ span >
822
827
< span class ="s2 "> "cuda"</ span > < span class ="p "> :</ span > < span class ="n "> cuda</ span > < span class ="p "> ,</ span >
828
+ < span class ="s2 "> "xpu"</ span > < span class ="p "> :</ span > < span class ="n "> xpu</ span > < span class ="p "> ,</ span >
823
829
< span class ="s2 "> "profile_memory"</ span > < span class ="p "> :</ span > < span class ="n "> profile_memory</ span > < span class ="p "> ,</ span >
824
830
< span class ="s2 "> "with_stack"</ span > < span class ="p "> :</ span > < span class ="n "> with_stack</ span > < span class ="p "> ,</ span >
825
831
< span class ="s2 "> "record_shapes"</ span > < span class ="p "> :</ span > < span class ="n "> record_shapes</ span > < span class ="p "> ,</ span >
0 commit comments