Merge pull request #144 from mlcommons/dev

gfursin · web-flow · commit f840da4d948d · 2024-07-29T20:20:10.000+02:00
Stable rev-20240729
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
@@ -1,3 +1,5 @@
 Copyright (c) 2021-2024 MLCommons
 
-The cTuning foundation donated this project to MLCommons in 2021 to benefit everyone.
+The cTuning foundation and OctoML donated this project to MLCommons to benefit everyone.
+
+Copyright (c) 2014-2021 cTuning foundation
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ and web services adaptable to continuously changing models, data sets, software
 We develop and test [CM scripts](script) as a community effort to support the following projects:
 * [CM for MLPerf](https://docs.mlcommons.org/inference): modularize and automate MLPerf benchmarks 
   (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org), [OctoML](https://octoml.ai) and [cTuning.org](https://cTuning.org))
+  * [Modular C++ harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp)
+  * [Modular Python harness for MLPerf loadgen](https://github.com/mlcommons/cm4mlops/tree/main/script/app-loadgen-generic-python)
 * [CM for research and education](https://cTuning.org/ae): provide a common interface to automate and reproduce results from research papers 
   and MLPerf benchmarks (maintained by [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org))
 * [CM for ABTF](https://github.com/mlcommons/cm4abtf): provide a unified CM interface to run automotive benchmarks
diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -910,6 +910,8 @@ variations:
         version: 1.10.1
 
   llama2-70b_:
+    env:
+      CM_MLPERF_MODEL_SKIP_BATCHING: false
     deps:
       - tags: get,generic-python-lib,_package.transformers
         names:
@@ -951,7 +953,7 @@ variations:
 
   llama2-70b_,cuda:
     default_env:
-      CM_MLPERF_LOADGEN_BATCH_SIZE: 8
+      CM_MLPERF_LOADGEN_MAX_BATCHSIZE: 8
 
   llama2-70b-99.9:
     group: models
diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py
@@ -54,7 +54,7 @@ def preprocess(i):
         else:
             env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1')
 
-    if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and not env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False):
+    if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE','') != '' and str(env.get('CM_MLPERF_MODEL_SKIP_BATCHING', False)).lower() not in [ "true", "1", "yes"] :
         env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'])
 
     if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE','') != '':
@@ -318,6 +318,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
             cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}"
 
         cmd = cmd.replace("--count", "--total-sample-count")
+        cmd = cmd.replace("--max-batchsize", "--batch-size")
     
     elif "mixtral-8x7b" in env['CM_MODEL']:
         env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "mixtral-8x7b")
diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -262,6 +262,9 @@ deps:
       CM_MLPERF_NVIDIA_HARNESS_RUN_MODE:
       - run_harness
 
+  - tags: get,generic-python-lib,_package.pycuda
+    version: "2022.2.2"
+
   - tags: get,generic-python-lib,_package.nvmitten
     update_tags_from_env_with_prefix:
       _path.:
diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py
@@ -33,6 +33,9 @@ def preprocess(i):
     run_state = i['run_script_input']['run_state']
     state['mlperf-inference-implementation']['script_id'] = run_state['script_id']+":"+",".join(run_state['script_variation_tags'])
 
+    if env.get('CM_VLLM_SERVER_MODEL_NAME', '') != '' and env.get('CM_ML_MODEL_FULL_NAME', '') == '':
+        env['CM_ML_MODEL_FULL_NAME'] = env['CM_VLLM_SERVER_MODEL_NAME'].replace("/", "_")
+
     return {'return':0}
 
 def postprocess(i):
@@ -288,20 +291,20 @@ def postprocess(i):
             cmd = ""
             xcmd = ""
 
-        readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n"
+        readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).\n\n"
 
-        readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n"
+        readme_init+= "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n"
 
         readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), 
             platform.processor(), sys.version, cm.__version__)
 
         x = repo_name
         if repo_hash!='': x+=' --checkout='+str(repo_hash)
         
-        readme_body += "## CM Run Command\n\nSee [CM installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md).\n\n"+ \
+        readme_body += "## CM Run Command\n\nSee [CM installation guide](https://docs.mlcommons.org/inference/install/).\n\n"+ \
             "```bash\npip install -U cmind\n\ncm rm cache -f\n\ncm pull repo {}\n\n{}\n```".format(x, xcmd)
 
-        readme_body += "\n*Note that if you want to use the [latest automation recipes](https://access.cknowledge.org/playground/?action=scripts) for MLPerf (CM scripts),\n"+ \
+        readme_body += "\n*Note that if you want to use the [latest automation recipes](https://docs.mlcommons.org/inference) for MLPerf (CM scripts),\n"+ \
                        " you should simply reload {} without checkout and clean CM cache as follows:*\n\n".format(repo_name) + \
                        "```bash\ncm rm repo {}\ncm pull repo {}\ncm rm cache -f\n\n```".format(repo_name, repo_name)
         
@@ -360,7 +363,11 @@ def postprocess(i):
         OUTPUT_DIR = os.path.dirname(COMPLIANCE_DIR)
 
         SCRIPT_PATH = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "compliance", "nvidia", test, "run_verification.py")
-        cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR
+        if test == "TEST06":
+            cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']}  {SCRIPT_PATH}  -c  {COMPLIANCE_DIR}  -o  {OUTPUT_DIR} --scenario {scenario} --dtype int32"
+        else:
+            cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']}  {SCRIPT_PATH}  -r {RESULT_DIR} -c  {COMPLIANCE_DIR}  -o  {OUTPUT_DIR}"
+
         print(cmd)
         os.system(cmd)
 
@@ -412,12 +419,9 @@ def postprocess(i):
                 r = automation.run_native_script({'run_script_input':run_script_input, 'env':env, 'script_name':'verify_accuracy'})
                 if r['return']>0: return r
         import submission_checker as checker
-        is_valid = checker.check_compliance_perf_dir(COMPLIANCE_DIR)
+        is_valid = checker.check_compliance_perf_dir(COMPLIANCE_DIR) if test != "TEST06" else True
         state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model][scenario][test] = "passed" if is_valid else "failed"
 
-    else:
-        print(test)
-
 
     if state.get('mlperf-inference-implementation') and state['mlperf-inference-implementation'].get('version_info'):
         with open(os.path.join(output_dir, "cm-version-info.json"), "w") as f:
diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml
@@ -111,6 +111,7 @@ deps:
 
   # Detect pycuda
   - tags: get,generic-python-lib,_pycuda
+    version: "2022.2.2"
     skip_if_env:
       CM_RUN_STATE_DOCKER:
         - 'yes'
diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py
@@ -403,7 +403,11 @@ def run_files_exist(mode, OUTPUT_DIR, run_files, env):
         test = env['CM_MLPERF_LOADGEN_COMPLIANCE_TEST']
 
         SCRIPT_PATH = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "compliance", "nvidia", test, "run_verification.py")
-        cmd = env['CM_PYTHON_BIN'] + " " + SCRIPT_PATH + " -r " + RESULT_DIR + " -c " + COMPLIANCE_DIR + " -o "+ OUTPUT_DIR
+        if test == "TEST06":
+            cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']}  {SCRIPT_PATH}  -c  {COMPLIANCE_DIR}  -o  {OUTPUT_DIR} --scenario {scenario} --dtype int32"
+        else:
+            cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']}  {SCRIPT_PATH}  -r {RESULT_DIR} -c  {COMPLIANCE_DIR}  -o  {OUTPUT_DIR}"
+
         print(cmd)
         os.system(cmd)
 
diff --git a/script/get-mlperf-inference-utils/mlperf_utils.py b/script/get-mlperf-inference-utils/mlperf_utils.py
@@ -70,16 +70,22 @@ def get_accuracy_metric(config, model, path):
     acc_upper_limit = config.get_accuracy_upper_limit(model)
     patterns = []
     acc_targets = []
-    acc_limits = []
-    up_patterns = []
+    acc_limits = [None] * (len(target)//2)
+    up_patterns = [None] * (len(target)//2)
     acc_types = []
 
     if acc_upper_limit is not None:
         acc_limit_check = True
-        for i in range(0, len(acc_upper_limit), 2):
-            acc_type, acc_target = acc_upper_limit[i:i+2]
-            acc_limits.append(acc_target)
-            up_patterns.append(checker.ACC_PATTERN[acc_type])
+
+        for ii in range(0, len(target), 2):
+            acc_type1,tmp = target[ii:ii+2]
+            for i in range(0, len(acc_upper_limit), 2):
+                acc_type, acc_target = acc_upper_limit[i:i+2]
+                if acc_type != acc_type1:
+                    continue
+                acc_limits[ii//2] = acc_target
+                up_patterns[ii//2] = checker.ACC_PATTERN[acc_type]
+
 
     for i in range(0, len(target), 2):
         acc_type, acc_target = target[i:i+2]
@@ -109,6 +115,8 @@ def get_accuracy_metric(config, model, path):
                 acc = None
             if acc_upper_limit is not None:
                 for i, (pattern, acc_limit) in enumerate(zip(up_patterns, acc_limits)):
+                    if not pattern:
+                        continue
                     m = re.match(pattern, line)
                     if m:
                         acc = m.group(1)
@@ -168,13 +176,13 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res,
         result['power'] = power_result
         result['power_efficiency'] = power_efficiency_result
 
-    compliance_list = [ "TEST01", "TEST05", "TEST04" ]
+    compliance_list = [ "TEST01", "TEST05", "TEST04", "TEST06" ]
     if division == "closed":
         for test in compliance_list:
             test_path = os.path.join(result_path, test)
             if os.path.exists(test_path): #We dont consider missing test folders now - submission checker will do that
                 #test_pass = checker.check_compliance_dir(test_path, mlperf_model, scenario, config, "closed", system_json, sub_res)
-                test_pass = checker.check_compliance_perf_dir(test_path)
+                test_pass = checker.check_compliance_perf_dir(test_path) if test != "TEST06" else True
                 if test_pass and test in [ "TEST01", "TEST06" ]:
                     #test_pass = checker.check_compliance_acc_dir(test_path, mlperf_model, config)
                     pass # accuracy truncation script is done after submission generation. We assume here that it'll pass 
@@ -197,7 +205,7 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res,
     for i, acc in enumerate(acc_results):
         accuracy_results.append(str(round(float(acc_results[acc]), 5)))
         accuracy_result_string += f"`{acc}`: `{round(float(acc_results[acc]), 5)}`"
-        if not acc_limits:
+        if not acc_limits or not acc_limits[i]:
             accuracy_result_string += f", Required accuracy for closed division `>= {round(acc_targets[i], 5)}`"
         else:
             accuracy_result_string += f", Required accuracy for closed division `>= {round(acc_targets[i], 5)}` and `<= {round(acc_limits[i], 5)}`"
diff --git a/script/get-nvidia-mitten/_cm.json b/script/get-nvidia-mitten/_cm.json
@@ -17,7 +17,8 @@
       "tags": "get,python3"
     },
     {
-      "tags": "get,generic-python-lib,_pycuda"
+      "tags": "get,generic-python-lib,_pycuda",
+      "version": "2022.2.2"
     },
     {
       "tags": "get,git,_repo.https://github.com/NVIDIA/mitten",
diff --git a/script/run-all-mlperf-models/run-pruned-bert.sh b/script/run-all-mlperf-models/run-pruned-bert.sh
@@ -38,10 +38,10 @@ rerun=""
 power=" --power=yes --adr.mlperf-power-client.power_server=192.168.0.15 --env.CM_MLPERF_SKIP_POWER_CHECKS=yes"
 power=" --power=yes --adr.mlperf-power-client.power_server=192.168.0.15"
 power=""
-max_batchsize=128
 max_batchsize=1
-scenario="Offline"
+max_batchsize=128
 scenario="SingleStream"
+scenario="Offline"
 
 if [[ $scenario == "Offline" ]]; then
 for stub in ${zoo_stub_list[@]}; do
@@ -55,7 +55,6 @@ cmd="cm run script --tags=run,mlperf,inference,generate-run-cmds,_find-performan
    --scenario=Offline \
    --test_query_count=15000 \
    --adr.mlperf-inference-implementation.max_batchsize=$max_batchsize \
-   --results_dir=$HOME/results_dir \
    --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=$stub \
    ${rerun} \
    --quiet"
@@ -77,7 +76,6 @@ for stub in ${zoo_stub_list[@]}; do
    --execution_mode=valid \
    --adr.mlperf-inference-implementation.max_batchsize=$max_batchsize \
    ${power} \
-   --results_dir=$HOME/results_dir \
    --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=$stub \
    --quiet"
   echo ${cmd}
diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py
@@ -101,10 +101,13 @@ def preprocess(i):
     test_list = ["TEST01",  "TEST05"]
     if env['CM_MODEL']  in ["resnet50"]:
         test_list.append("TEST04")
-    if "gpt" in env['CM_MODEL'] or "sdxl" in env['CM_MODEL'] or "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env['CM_MODEL']:
+    if "gpt" in env['CM_MODEL'] or "llama2-70b" in env['CM_MODEL'] or "mixtral-8x7b" in env['CM_MODEL']:
         test_list.remove("TEST01")
         test_list.remove("TEST05")
 
+    if "llama2" in env['CM_MODEL'].lower() or "mixtral-8x7b" in env['CM_MODEL']:
+        test_list.append("TEST06")
+
     variation_implementation= "_" + env.get("CM_MLPERF_IMPLEMENTATION", "reference")
     variation_model= ",_" + env["CM_MLPERF_MODEL"]
     variation_backend= ",_" + env["CM_MLPERF_BACKEND"] if env.get("CM_MLPERF_BACKEND","") != "" else ""
diff --git a/script/run-mlperf-inference-submission-checker/customize.py b/script/run-mlperf-inference-submission-checker/customize.py
@@ -53,15 +53,16 @@ def preprocess(i):
 
     x_version = ' --version ' + version +' ' if version!='' else ''
 
-    CMD = env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + submission_checker_file + ' --input "' + submission_dir + '"' + \
+    CMD = env['CM_PYTHON_BIN_WITH_PATH'] + ' \'' + submission_checker_file + '\' --input \'' + submission_dir + '\'' + \
             x_submitter + \
             x_version + \
             skip_compliance + extra_map + power_check + extra_args
 
     report_generator_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission",
             "generate_final_report.py")
     env['CM_RUN_CMD'] = CMD
-    env['CM_POST_RUN_CMD'] = env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + report_generator_file + ' --input summary.csv'
+    print(CMD)
+    env['CM_POST_RUN_CMD'] = env['CM_PYTHON_BIN_WITH_PATH'] + ' \'' + report_generator_file + '\' --input summary.csv'
 
     return {'return':0}
 

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,8 @@`
`17`	`17`	`"tags": "get,python3"`
`18`	`18`	`},`
`19`	`19`	`{`
`20`		`- "tags": "get,generic-python-lib,_pycuda"`
	`20`	`+ "tags": "get,generic-python-lib,_pycuda",`
	`21`	`+ "version": "2022.2.2"`
`21`	`22`	`},`
`22`	`23`	`{`
`23`	`24`	`"tags": "get,git,_repo.https://github.com/NVIDIA/mitten",`