diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 5d56c7f96..bab913c5c 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -11,6 +11,7 @@ def preprocess(i): if os_info['platform'] == 'windows': return {'return': 1, 'error': 'Windows is not supported in this script yet'} env = i['env'] + state = i['state'] if is_true(env.get('MLC_RUN_STATE_DOCKER', '')): return {'return': 0} @@ -110,7 +111,8 @@ def preprocess(i): shutil.rmtree(target_data_path) if not os.path.exists(tsv_file): os.makedirs(target_data_path, exist_ok=True) - # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") + # cmds.append("make download_data + # BENCHMARKS='stable-diffusion-xl'") env['MLC_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( f"""cp -r \\$MLC_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") @@ -154,7 +156,8 @@ def preprocess(i): if not os.path.exists(target_data_path) or not os.path.exists( inference_cases_json_path) or not os.path.exists(calibration_cases_json_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='3d-unet'") model_path = os.path.join( @@ -174,7 +177,8 @@ def preprocess(i): if not os.path.exists(target_data_path_base_dir): cmds.append(f"mkdir -p {target_data_path_base_dir}") if not os.path.exists(target_data_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='rnnt'") model_path = os.path.join( @@ -522,79 +526,79 @@ def preprocess(i): if gpu_batch_size: run_config += f" --gpu_batch_size={gpu_batch_size}" - dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') + dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE' if dla_batch_size: run_config += f" --dla_batch_size={dla_batch_size}" - input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') + input_format=env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') if input_format: run_config += f" --input_format={input_format}" - performance_sample_count = env.get( + performance_sample_count=env.get( 'MLC_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT') if performance_sample_count: run_config += f" --performance_sample_count={performance_sample_count}" - devices = env.get('MLC_MLPERF_NVIDIA_HARNESS_DEVICES') + devices=env.get('MLC_MLPERF_NVIDIA_HARNESS_DEVICES') if devices: run_config += f" --devices={devices}" - audio_batch_size = env.get( + audio_batch_size=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BATCH_SIZE') if audio_batch_size: run_config += f" --audio_batch_size={audio_batch_size}" - disable_encoder_plugin = str( + disable_encoder_plugin=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN', '')) if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false", "0", ""]: run_config += " --disable_encoder_plugin" - disable_beta1_smallk = str( + disable_beta1_smallk=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_DISABLE_BETA1_SMALLK', '')) if disable_beta1_smallk and disable_beta1_smallk.lower() in [ "yes", "true", "1"]: run_config += " --disable_beta1_smallk" - workspace_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE') + workspace_size=env.get('MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE') if workspace_size: run_config += f" --workspace_size={workspace_size}" if env.get('MLC_MLPERF_LOADGEN_LOGS_DIR'): - env['MLPERF_LOADGEN_LOGS_DIR'] = env['MLC_MLPERF_LOADGEN_LOGS_DIR'] + env['MLPERF_LOADGEN_LOGS_DIR']=env['MLC_MLPERF_LOADGEN_LOGS_DIR'] - log_dir = env.get('MLC_MLPERF_NVIDIA_HARNESS_LOG_DIR') + log_dir=env.get('MLC_MLPERF_NVIDIA_HARNESS_LOG_DIR') if log_dir: run_config += f" --log_dir={log_dir}" - use_graphs = str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS', '')) + use_graphs=str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS', '')) if use_graphs and use_graphs.lower() not in ["no", "false", "0", ""]: run_config += " --use_graphs" - use_deque_limit = str( + use_deque_limit=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT')) if use_deque_limit and use_deque_limit.lower() not in [ "no", "false", "0"]: run_config += " --use_deque_limit" - deque_timeout_usec = env.get( + deque_timeout_usec=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC') if deque_timeout_usec: run_config += f" --deque_timeout_usec={deque_timeout_usec}" - use_cuda_thread_per_device = str( + use_cuda_thread_per_device=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE', '')) if use_cuda_thread_per_device and use_cuda_thread_per_device.lower() not in [ "no", "false", "0", ""]: run_config += " --use_cuda_thread_per_device" - run_infer_on_copy_streams = str( + run_infer_on_copy_streams=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS', '')) if run_infer_on_copy_streams and not is_false( run_infer_on_copy_streams): run_config += " --run_infer_on_copy_streams" - start_from_device = str( + start_from_device=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE', '')) @@ -602,7 +606,7 @@ def preprocess(i): "no", "false", "0", ""]: run_config += " --start_from_device" - end_on_device = str( + end_on_device=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE', '')) @@ -610,36 +614,36 @@ def preprocess(i): "no", "false", "0", ""]: run_config += " --end_on_device" - max_dlas = env.get('MLC_MLPERF_NVIDIA_HARNESS_MAX_DLAS') + max_dlas=env.get('MLC_MLPERF_NVIDIA_HARNESS_MAX_DLAS') if max_dlas: run_config += f" --max_dlas={max_dlas}" - graphs_max_seqlen = env.get( + graphs_max_seqlen=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_GRAPHS_MAX_SEQLEN') if graphs_max_seqlen: run_config += f" --graphs_max_seqlen={graphs_max_seqlen}" - num_issue_query_threads = env.get( + num_issue_query_threads=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_NUM_ISSUE_QUERY_THREADS') if num_issue_query_threads: run_config += f" --num_issue_query_threads={num_issue_query_threads}" - soft_drop = env.get('MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP') + soft_drop=env.get('MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP') if soft_drop: run_config += f" --soft_drop={soft_drop}" - use_small_tile_gemm_plugin = str( + use_small_tile_gemm_plugin=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN', '')) if use_small_tile_gemm_plugin and use_small_tile_gemm_plugin.lower() not in [ "no", "false", "0", ""]: run_config += f" --use_small_tile_gemm_plugin" - audio_buffer_num_lines = env.get( + audio_buffer_num_lines=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES') if audio_buffer_num_lines: run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}" - use_fp8 = str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_FP8', '')) + use_fp8=str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_FP8', '')) if use_fp8 and not is_false(use_fp8): run_config += f" --use_fp8" @@ -647,30 +651,30 @@ def preprocess(i): run_config += f" --fp8_quant_model_path={fp8_model_path}" run_config += f" --tensor_parallelism={tmp_tp_size}" - enable_sort = env.get('MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') + enable_sort=env.get('MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') if enable_sort and not is_false(enable_sort): run_config += f" --enable_sort" - sdxl_server_batcher_time_limit = env.get( + sdxl_server_batcher_time_limit=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') if sdxl_server_batcher_time_limit: run_config += f" --sdxl_batcher_time_limit {sdxl_server_batcher_time_limit}" - num_sort_segments = env.get( + num_sort_segments=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS') if num_sort_segments: run_config += f" --num_sort_segments={num_sort_segments}" - embedding_weights_on_gpu_part = env.get( + embedding_weights_on_gpu_part=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART', '') if embedding_weights_on_gpu_part != '': run_config += f" --embedding_weights_on_gpu_part={embedding_weights_on_gpu_part}" - num_warmups = env.get('MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS', '') + num_warmups=env.get('MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS', '') if num_warmups != '': run_config += f" --num_warmups={num_warmups}" - skip_postprocess = str( + skip_postprocess=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS', '')) @@ -678,14 +682,14 @@ def preprocess(i): run_config += f" --skip_postprocess" if test_mode: - test_mode_string = " --test_mode={}".format(test_mode) + test_mode_string=" --test_mode={}".format(test_mode) else: - test_mode_string = "" + test_mode_string="" - extra_build_engine_options_string = env.get( + extra_build_engine_options_string=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EXTRA_BUILD_ENGINE_OPTIONS', '') - extra_run_options_string = env.get( + extra_run_options_string=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EXTRA_RUN_OPTIONS', '') # will be ignored during build engine @@ -700,13 +704,13 @@ def preprocess(i): cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") - run_cmd = " && ".join(cmds) - env['MLC_MLPERF_RUN_CMD'] = run_cmd - env['MLC_RUN_CMD'] = run_cmd - env['MLC_RUN_DIR'] = env['MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH'] + run_cmd=" && ".join(cmds) + env['MLC_MLPERF_RUN_CMD']=run_cmd + env['MLC_RUN_CMD']=run_cmd + env['MLC_RUN_DIR']=env['MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH'] if '+LD_LIBRARY_PATH' not in env: - env['+LD_LIBRARY_PATH'] = [] + env['+LD_LIBRARY_PATH']=[] if os.path.exists("/opt/hpcx/ucx/lib"): env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucx/lib") @@ -721,7 +725,7 @@ def preprocess(i): def postprocess(i): - env = i['env'] - state = i['state'] + env=i['env'] + state=i['state'] return {'return': 0} diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 00aa41969..ea1025b13 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -350,6 +350,7 @@ post_deps: # Variations to customize dependencies variations: + pre5.0: {} # MLPerf inference version v5.0: group: version @@ -363,8 +364,6 @@ variations: tags: _for-nvidia-mlperf-inference-v5.0 pycuda: version: "2024.1" - nvidia-inference-server: - tags: _mlcommons,_v5.0 v4.1: group: version env: @@ -595,6 +594,8 @@ variations: names: - onnx version: "1.17.0" + - tags: get,generic-python-lib,_package.onnx-graphsurgeon + version: "0.5.2" - tags: get,generic-python-lib,_package.numpy names: - numpy @@ -1595,22 +1596,42 @@ variations: env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_a6000,resnet50,offline,run_harness: + rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + rtx_a6000,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_a6000,resnet50,server,run_harness: + rtx_a6000,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 - rtx_a6000,retinanet,offline,run_harness: + rtx_a6000,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" + + rtx_a6000,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_a6000,retinanet,server,run_harness: + rtx_a6000,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_a6000,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_a6000,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1619,14 +1640,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_a6000,3d-unet_,offline,run_harness: + rtx_a6000,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 + + rtx_a6000,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" - rtx_a6000,3d-unet_,server,run_harness: + rtx_a6000,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000,v5.0,3d-unet_,server,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_a6000,rnnt,offline,run_harness: default_variations: batch-size: batch_size.2048 @@ -1635,31 +1664,51 @@ variations: default_variations: batch-size: batch_size.512 - rtx_a6000,dlrm_,offline,run_harness: + rtx_a6000,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + rtx_a6000,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" rtx_6000_ada: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_6000_ada,resnet50,offline,run_harness: + rtx_6000_ada,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000_ada,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_6000_ada,resnet50,server,run_harness: + rtx_6000_ada,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 + + rtx_a6000_ada,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" - rtx_6000_ada,retinanet,offline,run_harness: + rtx_6000_ada,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_6000_ada,retinanet,server,run_harness: + rtx_a6000_ada,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_6000_ada,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000_ada,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_6000_ada,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1668,14 +1717,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_6000_ada,3d-unet_,offline,run_harness: + rtx_6000_ada,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - rtx_6000_ada,3d-unet_,server,run_harness: + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + rtx_6000_ada,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_6000_ada,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 @@ -1684,45 +1741,73 @@ variations: default_variations: batch-size: batch_size.512 - rtx_6000_ada,dlrm_,offline,run_harness: + rtx_6000_ada,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + rtx_a6000_ada,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" + l4: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" l4,sdxl,offline,run_harness: - default_variations: - batch-size: batch_size.1 env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 0.6 - - l4,sdxl,offline,run_harness,num-gpu.8: + + l4,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,offline,run_harness,num-gpu.8: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4.8 - - l4,sdxl,server,run_harness,num-gpu.1: + + l4,pre5.0,sdxl,offline,run_harness,num-gpu.8: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,offline,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,server,run_harness,num-gpu.1: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 0.55 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 - - l4,sdxl,server,run_harness,num-gpu.8: + + l4,pre5.0,sdxl,server,run_harness,num-gpu.1: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,server,run_harness,num-gpu.8: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 5.05 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 + l4,pre5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + l4,resnet50: default_env: MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 10500 @@ -1731,16 +1816,20 @@ variations: MLC_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 l4,resnet50,offline,run_harness: - default_variations: - batch-size: batch_size.32 env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' - l4,resnet50,server,run_harness: + l4,pre5.0,resnet50,offline,run_harness: default_variations: - batch-size: batch_size.16 + batch-size: batch_size.32 + + l4,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" + + l4,resnet50,server,run_harness: env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "9" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" @@ -1749,13 +1838,23 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: 'True' - l4,retinanet,offline,run_harness: + l4,pre5.0,resnet50,server,run_harness: default_variations: - batch-size: batch_size.2 + batch-size: batch_size.16 - l4,retinanet,server,run_harness: + l4,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:16" + + l4,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 + + l4,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + l4,retinanet,server,run_harness: env: MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" @@ -1763,6 +1862,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 30000 MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 + l4,pre5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size.2 + + l4,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + l4,bert_,offline,run_harness: default_variations: batch-size: batch_size.16 @@ -1776,10 +1883,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "1.0" MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "True" - l4,3d-unet_,offline,run_harness: + l4,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.1 + l4,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:1" + l4,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 @@ -1792,9 +1903,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES: "1024" MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: "1024" - l4,dlrm_,offline,run_harness: + l4,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + l4,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" + t4: group: gpu-name env: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 5eb368fb1..0912cb803 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -18,7 +18,16 @@ def preprocess(i): env = i['env'] state = i['state'] + logger = i['automation'].logger + if env.get('MLC_MLPERF_IMPLEMENTATION', '') == 'nvidia': + if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '').lower() and env.get( + 'MLC_NVIDIA_GPU_NAME', '') == '': + # extract the Nvidia GPU model name automatically + env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower( + ).split()[-1].strip() + logger.info( + f"Extracted Nvidia GPU name: {env['MLC_NVIDIA_GPU_NAME']}") if env.get('MLC_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom"]: env['MLC_NVIDIA_HARNESS_GPU_VARIATION'] = "_" + \ diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index abf480eff..4fa893b71 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1715,6 +1715,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' MLC_TEST_QUERY_COUNT: '100' @@ -1733,6 +1735,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' @@ -1746,6 +1750,8 @@ variations: nvidia-inference-server: version: r3.0 tags: _nvidia-only + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1766,6 +1772,8 @@ variations: tags: _v3.1 nvidia-scratch-space: tags: _version.4_0-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1784,6 +1792,8 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1806,6 +1816,8 @@ variations: tags: _v4.0 nvidia-scratch-space: tags: _version.4_1-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1824,6 +1836,8 @@ variations: tags: _v4.1 nvidia-scratch-space: tags: _version.4_1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1844,7 +1858,9 @@ variations: intel-harness: tags: _v4.1 inference-src: - version: r5.0 + version: r5.0 + nvidia-original-mlperf-inference: + tags: _pre5.0 nvidia-scratch-space: tags: _version.5.0-dev default_env: @@ -1870,6 +1886,8 @@ variations: tags: _version.5.0 pycuda: version: "2024.1" + nvidia-harness: + tags: _v5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes'