From 4baae86d87b9ea77ba9ee1eff4836c741b724069 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 02:03:49 +0530 Subject: [PATCH 01/18] Update batch size to state --- script/app-mlperf-inference-nvidia/meta.yaml | 71 ++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 00aa41969..1dabc2ed9 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -1699,6 +1699,11 @@ variations: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 0.6 + + l4,sdxl,offline,run_harness,batch_size.1: + state: + batch_size: + sdxl: 1 l4,sdxl,offline,run_harness,num-gpu.8: default_variations: @@ -1706,6 +1711,11 @@ variations: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4.8 + + l4,sdxl,offline,run_harness,num-gpu.8,batch_size.1: + state: + batch_size: + sdxl: 1 l4,sdxl,server,run_harness,num-gpu.1: default_variations: @@ -1714,6 +1724,11 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 0.55 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 + + l4,sdxl,server,run_harness,num-gpu.1,batch_size.1: + state: + batch_size: + sdxl: 1 l4,sdxl,server,run_harness,num-gpu.8: default_variations: @@ -1723,6 +1738,11 @@ variations: MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 5.05 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 + l4,sdxl,server,run_harness,num-gpu.8,batch_size.1: + state: + batch_size: + sdxl: 1 + l4,resnet50: default_env: MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 10500 @@ -1738,6 +1758,11 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' + l4,resnet50,offline,run_harness,batch_size.32: + state: + batch_size: + resnet50: 32 + l4,resnet50,server,run_harness: default_variations: batch-size: batch_size.16 @@ -1749,10 +1774,20 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: 'True' + l4,resnet50,server,run_harness,batch_size.16: + state: + batch_size: + resnet50: 16 + l4,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 + l4,retinanet,offline,run_harness,batch_size.2: + state: + batch_size: + retinanet: 2 + l4,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 @@ -1763,10 +1798,20 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 30000 MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 + l4,retinanet,server,run_harness,batch_size.2: + state: + batch_size: + retinanet: 2 + l4,bert_,offline,run_harness: default_variations: batch-size: batch_size.16 + l4,bert_,offline,run_harness,batch_size.16: + state: + batch_size: + bert: 16 + l4,bert_,server,run_harness: default_variations: batch-size: batch_size.16 @@ -1776,14 +1821,29 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "1.0" MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "True" + l4,bert_,server,run_harness,batch_size.16: + state: + batch_size: + bert: 16 + l4,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.1 + l4,3d-unet_,offline,run_harness,batch_size.1: + state: + batch_size: + 3d-unet: 1 + l4,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 + l4,rnnt,offline,run_harness,batch_size.512: + state: + batch_size: + rnnt: 512 + l4,rnnt,server,run_harness: default_variations: batch-size: batch_size.512 @@ -1792,9 +1852,20 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES: "1024" MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: "1024" + l4,rnnt,server,run_harness,batch_size.512: + state: + batch_size: + rnnt: 512 + l4,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + l4,dlrm_,offline,run_harness,batch_size.1400: + state: + batch_size: + dlrm: 1400 + t4: group: gpu-name env: From 7cfca342c38b8201875d5b8e3bf41bc24be9ccde Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 02:09:00 +0530 Subject: [PATCH 02/18] batch size - priority to state dict --- script/app-mlperf-inference-nvidia/customize.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 5d56c7f96..b38abd057 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -11,6 +11,7 @@ def preprocess(i): if os_info['platform'] == 'windows': return {'return': 1, 'error': 'Windows is not supported in this script yet'} env = i['env'] + state = i['state'] if is_true(env.get('MLC_RUN_STATE_DOCKER', '')): return {'return': 0} @@ -518,11 +519,11 @@ def preprocess(i): if dla_inference_streams: run_config += f" --dla_inference_streams={dla_inference_streams}" - gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') + gpu_batch_size = state.get('batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) if gpu_batch_size: run_config += f" --gpu_batch_size={gpu_batch_size}" - dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') + dla_batch_size = state.get('dla_batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) if dla_batch_size: run_config += f" --dla_batch_size={dla_batch_size}" From 06d81d6739f7696303e65833aaf315fa8e42ca6c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 19 May 2025 20:39:12 +0000 Subject: [PATCH 03/18] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference-nvidia/customize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index b38abd057..5bb2e6c30 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -519,11 +519,13 @@ def preprocess(i): if dla_inference_streams: run_config += f" --dla_inference_streams={dla_inference_streams}" - gpu_batch_size = state.get('batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) + gpu_batch_size = state.get('batch_size', env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) if gpu_batch_size: run_config += f" --gpu_batch_size={gpu_batch_size}" - dla_batch_size = state.get('dla_batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) + dla_batch_size = state.get('dla_batch_size', env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) if dla_batch_size: run_config += f" --dla_batch_size={dla_batch_size}" From 61e8eaa4f19099ab7ec02669e8518a25f0f9ef4e Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 12:05:32 +0530 Subject: [PATCH 04/18] update run command arguments for v5.0 --- script/app-mlperf-inference-nvidia/customize.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 5bb2e6c30..909561fc4 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -522,11 +522,15 @@ def preprocess(i): gpu_batch_size = state.get('batch_size', env.get( 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) if gpu_batch_size: + if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": + gpu_batch_size = ",".join(f"{key}:{value}" for key, value in gpu_batch_size.items()) run_config += f" --gpu_batch_size={gpu_batch_size}" dla_batch_size = state.get('dla_batch_size', env.get( 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) if dla_batch_size: + if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": + dla_batch_size = ",".join(f"{key}:{value}" for key, value in dla_batch_size.items()) run_config += f" --dla_batch_size={dla_batch_size}" input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') From 24ff3a02cf60915a2e83e1229fb1d68e655bef5c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 20 May 2025 06:35:44 +0000 Subject: [PATCH 05/18] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference-nvidia/customize.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 909561fc4..c4c520d02 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -523,14 +523,18 @@ def preprocess(i): 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) if gpu_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - gpu_batch_size = ",".join(f"{key}:{value}" for key, value in gpu_batch_size.items()) + gpu_batch_size = ",".join( + f"{key}:{value}" for key, + value in gpu_batch_size.items()) run_config += f" --gpu_batch_size={gpu_batch_size}" dla_batch_size = state.get('dla_batch_size', env.get( 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) if dla_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - dla_batch_size = ",".join(f"{key}:{value}" for key, value in dla_batch_size.items()) + dla_batch_size = ",".join( + f"{key}:{value}" for key, + value in dla_batch_size.items()) run_config += f" --dla_batch_size={dla_batch_size}" input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') From 618996a6e5f1e7bdae6a5c2ea86a8ee993b681d5 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 12:24:36 +0530 Subject: [PATCH 06/18] extract nvidia gpu name automatically --- script/app-mlperf-inference/customize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 5eb368fb1..68334a854 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -18,7 +18,13 @@ def preprocess(i): env = i['env'] state = i['state'] + logger = i['automation'].logger + if env.get('MLC_MLPERF_IMPLEMENTATION', '') == 'nvidia': + if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '') and env.get('MLC_NVIDIA_GPU_NAME', '') == '': + # extract the Nvidia GPU model name automatically + env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower().split()[-1].strip() + logger.info(f"Extracted Nvidia GPU name: {env['MLC_NVIDIA_GPU_NAME']}") if env.get('MLC_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom"]: env['MLC_NVIDIA_HARNESS_GPU_VARIATION'] = "_" + \ From 537e6471103cfe5b01c901a37432b3bd6d093d20 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 20 May 2025 06:54:48 +0000 Subject: [PATCH 07/18] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference/customize.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 68334a854..361e3ff79 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -21,10 +21,13 @@ def preprocess(i): logger = i['automation'].logger if env.get('MLC_MLPERF_IMPLEMENTATION', '') == 'nvidia': - if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '') and env.get('MLC_NVIDIA_GPU_NAME', '') == '': + if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '') and env.get( + 'MLC_NVIDIA_GPU_NAME', '') == '': # extract the Nvidia GPU model name automatically - env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower().split()[-1].strip() - logger.info(f"Extracted Nvidia GPU name: {env['MLC_NVIDIA_GPU_NAME']}") + env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower( + ).split()[-1].strip() + logger.info( + f"Extracted Nvidia GPU name: {env['MLC_NVIDIA_GPU_NAME']}") if env.get('MLC_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom"]: env['MLC_NVIDIA_HARNESS_GPU_VARIATION'] = "_" + \ From 8b2ccfc7300173e3e4b2ffa3e57ef4429cbf4512 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 12:38:34 +0530 Subject: [PATCH 08/18] Update customize.py --- script/app-mlperf-inference/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 361e3ff79..0912cb803 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -21,7 +21,7 @@ def preprocess(i): logger = i['automation'].logger if env.get('MLC_MLPERF_IMPLEMENTATION', '') == 'nvidia': - if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '') and env.get( + if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '').lower() and env.get( 'MLC_NVIDIA_GPU_NAME', '') == '': # extract the Nvidia GPU model name automatically env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower( From 796c363439f3551c383db08aa6155548039fdec6 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 13:02:41 +0530 Subject: [PATCH 09/18] update batch size extraction logic --- script/app-mlperf-inference-nvidia/customize.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index c4c520d02..b4f6fc437 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -519,17 +519,20 @@ def preprocess(i): if dla_inference_streams: run_config += f" --dla_inference_streams={dla_inference_streams}" - gpu_batch_size = state.get('batch_size', env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) + if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": + gpu_batch_size = state.get('batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) + dla_batch_size = state.get('dla_batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) + else: + gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') + dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') + if gpu_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": gpu_batch_size = ",".join( f"{key}:{value}" for key, value in gpu_batch_size.items()) run_config += f" --gpu_batch_size={gpu_batch_size}" - - dla_batch_size = state.get('dla_batch_size', env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) + if dla_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": dla_batch_size = ",".join( From cdca7194f3251ae8e75a0d2be336864f20665a83 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 20 May 2025 07:32:53 +0000 Subject: [PATCH 10/18] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference-nvidia/customize.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index b4f6fc437..f403fbad9 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -520,19 +520,23 @@ def preprocess(i): run_config += f" --dla_inference_streams={dla_inference_streams}" if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - gpu_batch_size = state.get('batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) - dla_batch_size = state.get('dla_batch_size', env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) + gpu_batch_size = state.get('batch_size', env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) + dla_batch_size = state.get('dla_batch_size', env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) else: - gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') - dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') - + gpu_batch_size = env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') + dla_batch_size = env.get( + 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') + if gpu_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": gpu_batch_size = ",".join( f"{key}:{value}" for key, value in gpu_batch_size.items()) run_config += f" --gpu_batch_size={gpu_batch_size}" - + if dla_batch_size: if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": dla_batch_size = ",".join( From 90ec0df9bf8530265eba1e5f32107106e1e8d0e4 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 17:37:05 +0530 Subject: [PATCH 11/18] update packages --- script/app-mlperf-inference-nvidia/meta.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 1dabc2ed9..7d5f6dbed 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -586,15 +586,19 @@ variations: sdxl,v5.0: # nvidia-ammo is decommisioned and model-opt is being used which is built with TRTLLM deps: + - tags: get,generic-python-lib,_package.torch + version: "2.7.0" - tags: get,generic-python-lib,_package.torchrec - version: "0.6.0" + version: "1.1.0" - tags: get,generic-python-lib,_package.torchmetrics - version: "1.0.3" + version: "1.7.1" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: - onnx version: "1.17.0" + - tags: get,generic-python-lib,_package.onnx-graphsurgeon + version: "0.5.2" - tags: get,generic-python-lib,_package.numpy names: - numpy From c27999073d8435b64e9ac8e8426478860217541a Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 19:47:45 +0530 Subject: [PATCH 12/18] add version for nvidia harness --- script/app-mlperf-inference/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index abf480eff..ef73da3b2 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1870,6 +1870,8 @@ variations: tags: _version.5.0 pycuda: version: "2024.1" + nvidia-harness: + tags: _v5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' From f707c9f69ff826824e58a920e25464cb2c043239 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 19:53:36 +0530 Subject: [PATCH 13/18] Update meta.yaml --- script/app-mlperf-inference-nvidia/meta.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 7d5f6dbed..cefc32316 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -363,8 +363,6 @@ variations: tags: _for-nvidia-mlperf-inference-v5.0 pycuda: version: "2024.1" - nvidia-inference-server: - tags: _mlcommons,_v5.0 v4.1: group: version env: From 07046e7218f1c9635c0217b84f424ce82cc63eea Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 20 May 2025 20:11:14 +0530 Subject: [PATCH 14/18] updated package version --- script/app-mlperf-inference-nvidia/meta.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index cefc32316..5637509de 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -589,7 +589,11 @@ variations: - tags: get,generic-python-lib,_package.torchrec version: "1.1.0" - tags: get,generic-python-lib,_package.torchmetrics - version: "1.7.1" + version: "1.0.3" + - tags: get,generic-python-lib,_package.torchvision + version: "0.22.0" + - tags: get,generic-python-lib,_package.torch-tensorrt + version: "2.7.0" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: From ec27be0d4783b9eb4daa75013699f6001cac8a1d Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sat, 24 May 2025 18:51:47 +0530 Subject: [PATCH 15/18] modify batch size format based on inference version --- script/app-mlperf-inference-nvidia/meta.yaml | 227 +++++++++++-------- script/app-mlperf-inference/meta.yaml | 18 +- 2 files changed, 153 insertions(+), 92 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 5637509de..11f771b0f 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -350,6 +350,7 @@ post_deps: # Variations to customize dependencies variations: + _pre5.0: {} # MLPerf inference version v5.0: group: version @@ -1601,22 +1602,42 @@ variations: env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_a6000,resnet50,offline,run_harness: + rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + rtx_a6000,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_a6000,resnet50,server,run_harness: + rtx_a6000,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 - rtx_a6000,retinanet,offline,run_harness: + rtx_a6000,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" + + rtx_a6000,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_a6000,retinanet,server,run_harness: + rtx_a6000,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_a6000,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_a6000,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1625,14 +1646,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_a6000,3d-unet_,offline,run_harness: + rtx_a6000,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 + + rtx_a6000,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" - rtx_a6000,3d-unet_,server,run_harness: + rtx_a6000,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000,v5.0,3d-unet_,server,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_a6000,rnnt,offline,run_harness: default_variations: batch-size: batch_size.2048 @@ -1641,31 +1670,51 @@ variations: default_variations: batch-size: batch_size.512 - rtx_a6000,dlrm_,offline,run_harness: + rtx_a6000,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + rtx_a6000,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" rtx_6000_ada: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_6000_ada,resnet50,offline,run_harness: + rtx_6000_ada,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000_ada,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_6000_ada,resnet50,server,run_harness: + rtx_6000_ada,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 + + rtx_a6000_ada,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" - rtx_6000_ada,retinanet,offline,run_harness: + rtx_6000_ada,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_6000_ada,retinanet,server,run_harness: + rtx_a6000_ada,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_6000_ada,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000_ada,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_6000_ada,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1674,14 +1723,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_6000_ada,3d-unet_,offline,run_harness: + rtx_6000_ada,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - rtx_6000_ada,3d-unet_,server,run_harness: + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + rtx_6000_ada,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_6000_ada,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 @@ -1690,64 +1747,72 @@ variations: default_variations: batch-size: batch_size.512 - rtx_6000_ada,dlrm_,offline,run_harness: + rtx_6000_ada,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + rtx_a6000_ada,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" + l4: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" l4,sdxl,offline,run_harness: - default_variations: - batch-size: batch_size.1 env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 0.6 + + l4,pre5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size.1 - l4,sdxl,offline,run_harness,batch_size.1: - state: - batch_size: - sdxl: 1 + l4,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" l4,sdxl,offline,run_harness,num-gpu.8: - default_variations: - batch-size: batch_size.1 env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4.8 + + l4,pre5.0,sdxl,offline,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size.1 - l4,sdxl,offline,run_harness,num-gpu.8,batch_size.1: - state: - batch_size: - sdxl: 1 + l4,v5.0,sdxl,offline,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" l4,sdxl,server,run_harness,num-gpu.1: - default_variations: - batch-size: batch_size.1 env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 0.55 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 - l4,sdxl,server,run_harness,num-gpu.1,batch_size.1: - state: - batch_size: - sdxl: 1 - - l4,sdxl,server,run_harness,num-gpu.8: + l4,pre5.0,sdxl,server,run_harness,num-gpu.1: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,server,run_harness,num-gpu.8: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 5.05 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 - l4,sdxl,server,run_harness,num-gpu.8,batch_size.1: - state: - batch_size: - sdxl: 1 + l4,pre5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" l4,resnet50: default_env: @@ -1757,21 +1822,20 @@ variations: MLC_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 l4,resnet50,offline,run_harness: - default_variations: - batch-size: batch_size.32 env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' - l4,resnet50,offline,run_harness,batch_size.32: - state: - batch_size: - resnet50: 32 + l4,pre5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size.32 - l4,resnet50,server,run_harness: + l4,v5.0,resnet50,offline,run_harness: default_variations: - batch-size: batch_size.16 + batch-size: batch_size."resnet50:32" + + l4,resnet50,server,run_harness: env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "9" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" @@ -1780,23 +1844,23 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: 'True' - l4,resnet50,server,run_harness,batch_size.16: - state: - batch_size: - resnet50: 16 + l4,pre5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size.16 + + l4,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:16" - l4,retinanet,offline,run_harness: + l4,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - l4,retinanet,offline,run_harness,batch_size.2: - state: - batch_size: - retinanet: 2 + l4,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" l4,retinanet,server,run_harness: - default_variations: - batch-size: batch_size.2 env: MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" @@ -1804,20 +1868,18 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 30000 MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 - l4,retinanet,server,run_harness,batch_size.2: - state: - batch_size: - retinanet: 2 + l4,pre5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size.2 + + l4,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" l4,bert_,offline,run_harness: default_variations: batch-size: batch_size.16 - l4,bert_,offline,run_harness,batch_size.16: - state: - batch_size: - bert: 16 - l4,bert_,server,run_harness: default_variations: batch-size: batch_size.16 @@ -1827,29 +1889,18 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "1.0" MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "True" - l4,bert_,server,run_harness,batch_size.16: - state: - batch_size: - bert: 16 - - l4,3d-unet_,offline,run_harness: + l4,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.1 - l4,3d-unet_,offline,run_harness,batch_size.1: - state: - batch_size: - 3d-unet: 1 + l4,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:1" l4,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 - l4,rnnt,offline,run_harness,batch_size.512: - state: - batch_size: - rnnt: 512 - l4,rnnt,server,run_harness: default_variations: batch-size: batch_size.512 @@ -1858,19 +1909,13 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES: "1024" MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: "1024" - l4,rnnt,server,run_harness,batch_size.512: - state: - batch_size: - rnnt: 512 - - l4,dlrm_,offline,run_harness: + l4,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 - l4,dlrm_,offline,run_harness,batch_size.1400: - state: - batch_size: - dlrm: 1400 + l4,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" t4: group: gpu-name diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index ef73da3b2..4fa893b71 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1715,6 +1715,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' MLC_TEST_QUERY_COUNT: '100' @@ -1733,6 +1735,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' @@ -1746,6 +1750,8 @@ variations: nvidia-inference-server: version: r3.0 tags: _nvidia-only + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1766,6 +1772,8 @@ variations: tags: _v3.1 nvidia-scratch-space: tags: _version.4_0-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1784,6 +1792,8 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1806,6 +1816,8 @@ variations: tags: _v4.0 nvidia-scratch-space: tags: _version.4_1-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1824,6 +1836,8 @@ variations: tags: _v4.1 nvidia-scratch-space: tags: _version.4_1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1844,7 +1858,9 @@ variations: intel-harness: tags: _v4.1 inference-src: - version: r5.0 + version: r5.0 + nvidia-original-mlperf-inference: + tags: _pre5.0 nvidia-scratch-space: tags: _version.5.0-dev default_env: From 572d5386895ecd11a6cb5e435b3f389d8ca6a528 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 25 May 2025 12:26:13 +0530 Subject: [PATCH 16/18] revert gpu and dla info extraction from state --- .../app-mlperf-inference-nvidia/customize.py | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index f403fbad9..71ba3dc86 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -518,30 +518,13 @@ def preprocess(i): 'MLC_MLPERF_NVIDIA_HARNESS_DLA_INFERENCE_STREAMS') if dla_inference_streams: run_config += f" --dla_inference_streams={dla_inference_streams}" - - if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - gpu_batch_size = state.get('batch_size', env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')) - dla_batch_size = state.get('dla_batch_size', env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')) - else: - gpu_batch_size = env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') - dla_batch_size = env.get( - 'MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') - + + gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') if gpu_batch_size: - if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - gpu_batch_size = ",".join( - f"{key}:{value}" for key, - value in gpu_batch_size.items()) run_config += f" --gpu_batch_size={gpu_batch_size}" + dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE' if dla_batch_size: - if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": - dla_batch_size = ",".join( - f"{key}:{value}" for key, - value in dla_batch_size.items()) run_config += f" --dla_batch_size={dla_batch_size}" input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') From 7605329b43aafa19d728a49e9cd11ecd64199e49 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 25 May 2025 06:56:25 +0000 Subject: [PATCH 17/18] [Automated Commit] Format Codebase [skip ci] --- .../app-mlperf-inference-nvidia/customize.py | 91 ++++++++++--------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 71ba3dc86..bab913c5c 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -111,7 +111,8 @@ def preprocess(i): shutil.rmtree(target_data_path) if not os.path.exists(tsv_file): os.makedirs(target_data_path, exist_ok=True) - # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") + # cmds.append("make download_data + # BENCHMARKS='stable-diffusion-xl'") env['MLC_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( f"""cp -r \\$MLC_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") @@ -155,7 +156,8 @@ def preprocess(i): if not os.path.exists(target_data_path) or not os.path.exists( inference_cases_json_path) or not os.path.exists(calibration_cases_json_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='3d-unet'") model_path = os.path.join( @@ -175,7 +177,8 @@ def preprocess(i): if not os.path.exists(target_data_path_base_dir): cmds.append(f"mkdir -p {target_data_path_base_dir}") if not os.path.exists(target_data_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='rnnt'") model_path = os.path.join( @@ -518,7 +521,7 @@ def preprocess(i): 'MLC_MLPERF_NVIDIA_HARNESS_DLA_INFERENCE_STREAMS') if dla_inference_streams: run_config += f" --dla_inference_streams={dla_inference_streams}" - + gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') if gpu_batch_size: run_config += f" --gpu_batch_size={gpu_batch_size}" @@ -527,75 +530,75 @@ def preprocess(i): if dla_batch_size: run_config += f" --dla_batch_size={dla_batch_size}" - input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') + input_format=env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') if input_format: run_config += f" --input_format={input_format}" - performance_sample_count = env.get( + performance_sample_count=env.get( 'MLC_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT') if performance_sample_count: run_config += f" --performance_sample_count={performance_sample_count}" - devices = env.get('MLC_MLPERF_NVIDIA_HARNESS_DEVICES') + devices=env.get('MLC_MLPERF_NVIDIA_HARNESS_DEVICES') if devices: run_config += f" --devices={devices}" - audio_batch_size = env.get( + audio_batch_size=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BATCH_SIZE') if audio_batch_size: run_config += f" --audio_batch_size={audio_batch_size}" - disable_encoder_plugin = str( + disable_encoder_plugin=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN', '')) if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false", "0", ""]: run_config += " --disable_encoder_plugin" - disable_beta1_smallk = str( + disable_beta1_smallk=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_DISABLE_BETA1_SMALLK', '')) if disable_beta1_smallk and disable_beta1_smallk.lower() in [ "yes", "true", "1"]: run_config += " --disable_beta1_smallk" - workspace_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE') + workspace_size=env.get('MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE') if workspace_size: run_config += f" --workspace_size={workspace_size}" if env.get('MLC_MLPERF_LOADGEN_LOGS_DIR'): - env['MLPERF_LOADGEN_LOGS_DIR'] = env['MLC_MLPERF_LOADGEN_LOGS_DIR'] + env['MLPERF_LOADGEN_LOGS_DIR']=env['MLC_MLPERF_LOADGEN_LOGS_DIR'] - log_dir = env.get('MLC_MLPERF_NVIDIA_HARNESS_LOG_DIR') + log_dir=env.get('MLC_MLPERF_NVIDIA_HARNESS_LOG_DIR') if log_dir: run_config += f" --log_dir={log_dir}" - use_graphs = str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS', '')) + use_graphs=str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS', '')) if use_graphs and use_graphs.lower() not in ["no", "false", "0", ""]: run_config += " --use_graphs" - use_deque_limit = str( + use_deque_limit=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT')) if use_deque_limit and use_deque_limit.lower() not in [ "no", "false", "0"]: run_config += " --use_deque_limit" - deque_timeout_usec = env.get( + deque_timeout_usec=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC') if deque_timeout_usec: run_config += f" --deque_timeout_usec={deque_timeout_usec}" - use_cuda_thread_per_device = str( + use_cuda_thread_per_device=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE', '')) if use_cuda_thread_per_device and use_cuda_thread_per_device.lower() not in [ "no", "false", "0", ""]: run_config += " --use_cuda_thread_per_device" - run_infer_on_copy_streams = str( + run_infer_on_copy_streams=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS', '')) if run_infer_on_copy_streams and not is_false( run_infer_on_copy_streams): run_config += " --run_infer_on_copy_streams" - start_from_device = str( + start_from_device=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE', '')) @@ -603,7 +606,7 @@ def preprocess(i): "no", "false", "0", ""]: run_config += " --start_from_device" - end_on_device = str( + end_on_device=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE', '')) @@ -611,36 +614,36 @@ def preprocess(i): "no", "false", "0", ""]: run_config += " --end_on_device" - max_dlas = env.get('MLC_MLPERF_NVIDIA_HARNESS_MAX_DLAS') + max_dlas=env.get('MLC_MLPERF_NVIDIA_HARNESS_MAX_DLAS') if max_dlas: run_config += f" --max_dlas={max_dlas}" - graphs_max_seqlen = env.get( + graphs_max_seqlen=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_GRAPHS_MAX_SEQLEN') if graphs_max_seqlen: run_config += f" --graphs_max_seqlen={graphs_max_seqlen}" - num_issue_query_threads = env.get( + num_issue_query_threads=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_NUM_ISSUE_QUERY_THREADS') if num_issue_query_threads: run_config += f" --num_issue_query_threads={num_issue_query_threads}" - soft_drop = env.get('MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP') + soft_drop=env.get('MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP') if soft_drop: run_config += f" --soft_drop={soft_drop}" - use_small_tile_gemm_plugin = str( + use_small_tile_gemm_plugin=str( env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN', '')) if use_small_tile_gemm_plugin and use_small_tile_gemm_plugin.lower() not in [ "no", "false", "0", ""]: run_config += f" --use_small_tile_gemm_plugin" - audio_buffer_num_lines = env.get( + audio_buffer_num_lines=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES') if audio_buffer_num_lines: run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}" - use_fp8 = str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_FP8', '')) + use_fp8=str(env.get('MLC_MLPERF_NVIDIA_HARNESS_USE_FP8', '')) if use_fp8 and not is_false(use_fp8): run_config += f" --use_fp8" @@ -648,30 +651,30 @@ def preprocess(i): run_config += f" --fp8_quant_model_path={fp8_model_path}" run_config += f" --tensor_parallelism={tmp_tp_size}" - enable_sort = env.get('MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') + enable_sort=env.get('MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') if enable_sort and not is_false(enable_sort): run_config += f" --enable_sort" - sdxl_server_batcher_time_limit = env.get( + sdxl_server_batcher_time_limit=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_ENABLE_SORT') if sdxl_server_batcher_time_limit: run_config += f" --sdxl_batcher_time_limit {sdxl_server_batcher_time_limit}" - num_sort_segments = env.get( + num_sort_segments=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS') if num_sort_segments: run_config += f" --num_sort_segments={num_sort_segments}" - embedding_weights_on_gpu_part = env.get( + embedding_weights_on_gpu_part=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART', '') if embedding_weights_on_gpu_part != '': run_config += f" --embedding_weights_on_gpu_part={embedding_weights_on_gpu_part}" - num_warmups = env.get('MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS', '') + num_warmups=env.get('MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS', '') if num_warmups != '': run_config += f" --num_warmups={num_warmups}" - skip_postprocess = str( + skip_postprocess=str( env.get( 'MLC_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS', '')) @@ -679,14 +682,14 @@ def preprocess(i): run_config += f" --skip_postprocess" if test_mode: - test_mode_string = " --test_mode={}".format(test_mode) + test_mode_string=" --test_mode={}".format(test_mode) else: - test_mode_string = "" + test_mode_string="" - extra_build_engine_options_string = env.get( + extra_build_engine_options_string=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EXTRA_BUILD_ENGINE_OPTIONS', '') - extra_run_options_string = env.get( + extra_run_options_string=env.get( 'MLC_MLPERF_NVIDIA_HARNESS_EXTRA_RUN_OPTIONS', '') # will be ignored during build engine @@ -701,13 +704,13 @@ def preprocess(i): cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") - run_cmd = " && ".join(cmds) - env['MLC_MLPERF_RUN_CMD'] = run_cmd - env['MLC_RUN_CMD'] = run_cmd - env['MLC_RUN_DIR'] = env['MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH'] + run_cmd=" && ".join(cmds) + env['MLC_MLPERF_RUN_CMD']=run_cmd + env['MLC_RUN_CMD']=run_cmd + env['MLC_RUN_DIR']=env['MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH'] if '+LD_LIBRARY_PATH' not in env: - env['+LD_LIBRARY_PATH'] = [] + env['+LD_LIBRARY_PATH']=[] if os.path.exists("/opt/hpcx/ucx/lib"): env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucx/lib") @@ -722,7 +725,7 @@ def preprocess(i): def postprocess(i): - env = i['env'] - state = i['state'] + env=i['env'] + state=i['state'] return {'return': 0} From f41b1522817f534fe960473e52affb5df7c33614 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 25 May 2025 12:35:59 +0530 Subject: [PATCH 18/18] Update meta.yaml --- script/app-mlperf-inference-nvidia/meta.yaml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 11f771b0f..ea1025b13 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -350,7 +350,7 @@ post_deps: # Variations to customize dependencies variations: - _pre5.0: {} + pre5.0: {} # MLPerf inference version v5.0: group: version @@ -585,16 +585,10 @@ variations: sdxl,v5.0: # nvidia-ammo is decommisioned and model-opt is being used which is built with TRTLLM deps: - - tags: get,generic-python-lib,_package.torch - version: "2.7.0" - tags: get,generic-python-lib,_package.torchrec - version: "1.1.0" + version: "0.6.0" - tags: get,generic-python-lib,_package.torchmetrics version: "1.0.3" - - tags: get,generic-python-lib,_package.torchvision - version: "0.22.0" - - tags: get,generic-python-lib,_package.torch-tensorrt - version: "2.7.0" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: