1717
1818import argparse
1919import asyncio
20+ import copy
2021import glob
2122import json
2223import os
@@ -314,8 +315,8 @@ def __getitem__(self, idx: int) -> RequestFuncInput:
314315 height = self .args .height ,
315316 num_frames = self .args .num_frames ,
316317 fps = self .args .fps ,
317- num_inference_steps = getattr ( self .args , " num_inference_steps" , None ) ,
318- guidance_scale = getattr ( self .args , " guidance_scale" , None ) ,
318+ num_inference_steps = self .args . num_inference_steps ,
319+ guidance_scale = self .args . guidance_scale ,
319320 image_paths = image_paths ,
320321 )
321322
@@ -374,7 +375,7 @@ async def async_request_image_sglang(
374375 data .add_field ("guidance_scale" , str (input .guidance_scale ))
375376
376377 # Add profiling and other extra parameters
377- extra_params = input .extra_body . copy ( )
378+ extra_params = copy . deepcopy ( input .extra_body )
378379 if extra_params .pop ("profile" , None ):
379380 data .add_field ("profile" , "true" )
380381 for key , value in extra_params .items ():
@@ -766,9 +767,9 @@ async def limited_request_func(req, session, pbar):
766767 api_url = f"{ args .base_url } /start_profile"
767768 )
768769 if profile_output .success :
769- print (f"Profiler started: { profile_output .message } " )
770+ logger . info (f"Profiler started: { profile_output .message } " )
770771 else :
771- print (f"Warning: Failed to start profiler: { profile_output .error } " )
772+ logger . warning (f"Failed to start profiler: { profile_output .error } " )
772773
773774 # Run benchmark
774775 pbar = tqdm (total = len (requests_list ), disable = args .disable_tqdm )
@@ -792,77 +793,81 @@ async def limited_request_func(req, session, pbar):
792793
793794 # Stop profiler if it was started
794795 if args .profile :
795- print ("Stopping profiler and saving traces..." )
796+ logger . info ("Stopping profiler and saving traces..." )
796797 profile_output = await async_request_profile (
797798 api_url = f"{ args .base_url } /stop_profile"
798799 )
799800 if profile_output .success :
800- print (f"Profiler stopped: { profile_output .message } " )
801+ logger . info (f"Profiler stopped: { profile_output .message } " )
801802 else :
802- print (f"Warning: Failed to stop profiler: { profile_output .error } " )
803+ logger . warning (f"Failed to stop profiler: { profile_output .error } " )
803804
804805 # Calculate metrics
805806 metrics = calculate_metrics (outputs , total_duration )
806807
807- print ("\n {s:{c}^{n}}" .format (s = " Serving Benchmark Result " , n = 60 , c = "=" ))
808+ logger . info ("\n {s:{c}^{n}}" .format (s = " Serving Benchmark Result " , n = 60 , c = "=" ))
808809
809810 # Section 1: Configuration
810- print ("{:<40} {:<15}" .format ("Task:" , task_name ))
811- print ("{:<40} {:<15}" .format ("Model:" , args .model ))
812- print ("{:<40} {:<15}" .format ("Dataset:" , args .dataset ))
811+ logger . info ("{:<40} {:<15}" .format ("Task:" , task_name ))
812+ logger . info ("{:<40} {:<15}" .format ("Model:" , args .model ))
813+ logger . info ("{:<40} {:<15}" .format ("Dataset:" , args .dataset ))
813814
814815 # Section 2: Execution & Traffic
815- print (f"{ '-' * 50 } " )
816- print ("{:<40} {:<15.2f}" .format ("Benchmark duration (s):" , metrics ["duration" ]))
817- print ("{:<40} {:<15}" .format ("Request rate:" , str (args .request_rate )))
818- print (
816+ logger .info (f"{ '-' * 50 } " )
817+ logger .info (
818+ "{:<40} {:<15.2f}" .format ("Benchmark duration (s):" , metrics ["duration" ])
819+ )
820+ logger .info ("{:<40} {:<15}" .format ("Request rate:" , str (args .request_rate )))
821+ logger .info (
819822 "{:<40} {:<15}" .format (
820823 "Max request concurrency:" ,
821824 str (args .max_concurrency ) if args .max_concurrency else "not set" ,
822825 )
823826 )
824- print (
827+ logger . info (
825828 "{:<40} {}/{:<15}" .format (
826829 "Successful requests:" , metrics ["completed_requests" ], len (requests_list )
827830 )
828831 )
829832
830833 # Section 3: Performance Metrics
831- print (f"{ '-' * 50 } " )
834+ logger . info (f"{ '-' * 50 } " )
832835
833- print (
836+ logger . info (
834837 "{:<40} {:<15.2f}" .format (
835838 "Request throughput (req/s):" , metrics ["throughput_qps" ]
836839 )
837840 )
838- print ("{:<40} {:<15.4f}" .format ("Latency Mean (s):" , metrics ["latency_mean" ]))
839- print ("{:<40} {:<15.4f}" .format ("Latency Median (s):" , metrics ["latency_median" ]))
840- print ("{:<40} {:<15.4f}" .format ("Latency P99 (s):" , metrics ["latency_p99" ]))
841+ logger .info ("{:<40} {:<15.4f}" .format ("Latency Mean (s):" , metrics ["latency_mean" ]))
842+ logger .info (
843+ "{:<40} {:<15.4f}" .format ("Latency Median (s):" , metrics ["latency_median" ])
844+ )
845+ logger .info ("{:<40} {:<15.4f}" .format ("Latency P99 (s):" , metrics ["latency_p99" ]))
841846
842847 if metrics ["peak_memory_mb_max" ] > 0 :
843- print (f"{ '-' * 50 } " )
844- print (
848+ logger . info (f"{ '-' * 50 } " )
849+ logger . info (
845850 "{:<40} {:<15.2f}" .format (
846851 "Peak Memory Max (MB):" , metrics ["peak_memory_mb_max" ]
847852 )
848853 )
849- print (
854+ logger . info (
850855 "{:<40} {:<15.2f}" .format (
851856 "Peak Memory Mean (MB):" , metrics ["peak_memory_mb_mean" ]
852857 )
853858 )
854- print (
859+ logger . info (
855860 "{:<40} {:<15.2f}" .format (
856861 "Peak Memory Median (MB):" , metrics ["peak_memory_mb_median" ]
857862 )
858863 )
859864
860- print ("=" * 60 )
865+ logger . info ("=" * 60 )
861866
862867 if args .output_file :
863868 with open (args .output_file , "w" ) as f :
864869 json .dump (metrics , f , indent = 2 )
865- print (f"Metrics saved to { args .output_file } " )
870+ logger . info (f"Metrics saved to { args .output_file } " )
866871
867872
868873if __name__ == "__main__" :
0 commit comments