[Automated Commit] Format Codebase

github-actions[bot] · github-actions[bot] · commit 5b337ddcae82 · 2025-10-22T19:47:30.000Z
diff --git a/language/qwen2.5-VL-7B-docker/SUT_VLLM.py b/language/qwen2.5-VL-7B-docker/SUT_VLLM.py
@@ -23,6 +23,7 @@
 PORT = int(os.environ.get("VLLM_PORT", "8000"))
 BASE_URL = f"http://{HOST}:{PORT}/v1"
 
+
 class SUT:
     def __init__(
         self,
@@ -64,7 +65,7 @@ def __init__(
             "temperature": 0.0,
             "max_tokens": 1024,
         }
-        
+
         if scenario == "offline":
             from vllm import SamplingParams
             from transformers import AutoProcessor
@@ -109,23 +110,23 @@ def process_queries(self):
             prompts = []
             for item in qitems:
                 question = self.data_object.prompts[item.index]
-                
-                placeholders = [{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}} for b64img in self.data_object.images[item.index]]
+
+                placeholders = [{"type": "image_url", "image_url": {
+                    "url": f"data:image/png;base64,{b64img}"}} for b64img in self.data_object.images[item.index]]
                 messages = [
                     {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": [*placeholders, {"type": "text", "text": question}]},
+                    {"role": "user", "content": [
+                        *placeholders, {"type": "text", "text": question}]},
                 ]
-                
+
                 prompt = self.processor.apply_chat_template(
                     messages, tokenize=False, add_generation_prompt=True
                 )
                 prompts.append({
                     "prompt": prompt,
                     "multi_modal_data": {"image": self.data_object.images[item.index]}
                 })
-            
-            
-                
+
             tik2 = time.time()
             outputs = self.model.generate(
                 prompts=prompts, sampling_params=self.sampling_params
@@ -168,10 +169,10 @@ def load_model(self):
         from vllm import LLM
         log.info("Loading model...")
         self.model = LLM(
-             self.model_path,
-             dtype=self.dtype,
-             tensor_parallel_size=self.tensor_parallel_size,
-         )
+            self.model_path,
+            dtype=self.dtype,
+            tensor_parallel_size=self.tensor_parallel_size,
+        )
         log.info("Loaded model")
 
     def get_sut(self):
@@ -199,7 +200,6 @@ def issue_queries(self, query_samples):
     def flush_queries(self):
         pass
 
-
     def __del__(self):
         pass
 
@@ -231,11 +231,9 @@ def __init__(
             api_key="EMPTY"
         )
 
-
     def start(self):
         pass
 
-
     async def _issue_one(
         self,
         sample: Dict[str, Any],
@@ -244,7 +242,8 @@ async def _issue_one(
         log.info("CALLED _issue_one")
         """Send one streaming chat.completion request and record timings."""
 
-        contents = [{"type": "text", "text": self.data_object.prompts[sample.index]}]
+        contents = [
+            {"type": "text", "text": self.data_object.prompts[sample.index]}]
         for img_b64 in self.data_object.images[sample.index]:
             contents.append({
                 "type": "image_url",
@@ -274,32 +273,40 @@ async def _issue_one(
                 text = getattr(delta, "content", None)
                 if text:
                     if ttft_set is False:
-                        text_int32 = np.array([ord(c) for c in text], dtype=np.int32)
+                        text_int32 = np.array([ord(c)
+                                              for c in text], dtype=np.int32)
                         response_data = array.array("B", text_int32.tobytes())
                         bi = response_data.buffer_info()
-                        response = [lg.QuerySampleResponse(sample.id, bi[0], bi[1])]
+                        response = [
+                            lg.QuerySampleResponse(
+                                sample.id, bi[0], bi[1])]
                         lg.FirstTokenComplete(response)
                         ttft_set = True
                     out.append(text)
 
-            # when the stream ends, total latency   
+            # when the stream ends, total latency
             final_tokens = "".join(out)
-            final_tokens_int32 = np.array([ord(c) for c in final_tokens], dtype=np.int32)
+            final_tokens_int32 = np.array(
+                [ord(c) for c in final_tokens], dtype=np.int32)
             n_tokens = len(final_tokens_int32)
             response_array = array.array("B", final_tokens_int32.tobytes())
             bi = response_array.buffer_info()
-            response = [lg.QuerySampleResponse(sample.id, bi[0], bi[1], n_tokens)]
+            response = [
+                lg.QuerySampleResponse(
+                    sample.id,
+                    bi[0],
+                    bi[1],
+                    n_tokens)]
             lg.QuerySamplesComplete(response)
 
-
     async def _issue_queries_async(self, query_samples):
         """Async internal version used by the sync wrapper."""
-        log.info(f"CALLED _issue_queries_async, num workers: {self.num_workers}")
+        log.info(
+            f"CALLED _issue_queries_async, num workers: {self.num_workers}")
         semaphore = asyncio.Semaphore(self.num_workers)
         tasks = [self._issue_one(s, semaphore) for s in query_samples]
         return await asyncio.gather(*tasks)
 
-
     def issue_queries(self, query_samples):
         try:
             loop = asyncio.get_running_loop()
@@ -314,4 +321,4 @@ def issue_queries(self, query_samples):
         asyncio.run(self._issue_queries_async(query_samples))
 
     def stop(self):
-        pass
+        pass
diff --git a/language/qwen2.5-VL-7B-docker/generate_total_val_output.py b/language/qwen2.5-VL-7B-docker/generate_total_val_output.py
@@ -17,6 +17,7 @@
 filepath = Path(os.getcwd(), "./output/mlperf_log_accuracy.json")
 target_path = Path(os.getcwd(), "./datasets/mmmu_data.json")
 
+
 def main(mode: str):
     with open(filepath, "r") as f:
         data = json.load(f)
@@ -26,11 +27,18 @@ def main(mode: str):
     ids = target.ids
     store_data = dict()
 
-    decoded_output = [np.frombuffer(bytes.fromhex(sorted_list[i]["data"]), np.int32) for i in range(len(sorted_list))]
+    decoded_output = [
+        np.frombuffer(
+            bytes.fromhex(
+                sorted_list[i]["data"]),
+            np.int32) for i in range(
+            len(sorted_list))]
     if mode == "offline":
-        text_outputs = processor.batch_decode(decoded_output, skip_special_tokens=True) 
+        text_outputs = processor.batch_decode(
+            decoded_output, skip_special_tokens=True)
     else:
-        text_outputs = ["".join(chr(j) for j in text_integers) for text_integers in decoded_output]
+        text_outputs = ["".join(chr(j) for j in text_integers)
+                        for text_integers in decoded_output]
     for i in range(len(sorted_list)):
         pred_txt = text_outputs[i]
         qsl_idx = sorted_list[i]["qsl_idx"]
@@ -42,6 +50,7 @@ def main(mode: str):
     with open("total_val_output.json", "w") as f:
         json.dump(store_data, f, indent=4)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--mode", type=str, default="offline")
diff --git a/language/qwen2.5-VL-7B-docker/main.py b/language/qwen2.5-VL-7B-docker/main.py
@@ -169,7 +169,6 @@ def main():
 
     sut_cls = sut_map[args.scenario.lower()]
 
-    
     sut = sut_cls(
         model_path=args.model_path,
         dtype=args.dtype,
@@ -180,7 +179,6 @@ def main():
         tensor_parallel_size=args.tensor_parallel_size,
         scenario=args.scenario.lower()
     )
-    
 
     # Start sut before loadgen starts
     sut.start()
diff --git a/language/qwen2.5-VL-7B-docker/main_eval_only.py b/language/qwen2.5-VL-7B-docker/main_eval_only.py
@@ -1,18 +1,26 @@
 """Parse and Evaluate"""
+from utils.eval_utils import evaluate, parse_open_response, calculate_ins_level_acc
+from utils.data_utils import CAT_SHORT2LONG, DOMAIN_CAT2SUB_CAT
 import os
 import json
 import sys
 from argparse import ArgumentParser
 sys.path.insert(0, os.getcwd())
-from utils.data_utils import CAT_SHORT2LONG, DOMAIN_CAT2SUB_CAT
-from utils.eval_utils import evaluate, parse_open_response, calculate_ins_level_acc
 
 
 if __name__ == '__main__':
 
     parser = ArgumentParser()
-    parser.add_argument('--output_path', type=str, default="./example_outputs/qwen_vl/total_val_output.json", help="The path to model output file.")
-    parser.add_argument('--answer_path', type=str, default="./answer_dict_val.json", help="Answer file path.")
+    parser.add_argument(
+        '--output_path',
+        type=str,
+        default="./example_outputs/qwen_vl/total_val_output.json",
+        help="The path to model output file.")
+    parser.add_argument(
+        '--answer_path',
+        type=str,
+        default="./answer_dict_val.json",
+        help="Answer file path.")
     args = parser.parse_args()
 
     output_dict = json.load(open(args.output_path))
@@ -45,12 +53,13 @@
         except KeyError:
             print("Skipping {} for not found".format(category))
             continue
-        
+
         exampels_to_eval = []
         for data_id, parsed_pred in cat_outputs.items():
             question_type = cat_answers[data_id]['question_type']
             if question_type != 'multiple-choice':
-                parsed_pred = parse_open_response(parsed_pred) # mainly for type consistency (make it number, etc.)
+                # mainly for type consistency (make it number, etc.)
+                parsed_pred = parse_open_response(parsed_pred)
             else:
                 parsed_pred = parsed_pred
 
@@ -71,13 +80,14 @@
     # add domain Subject
     for domain, in_domain_cats in DOMAIN_CAT2SUB_CAT.items():
         in_domain_cat_results = {}
-        for cat_name in in_domain_cats: # use the order in DOMAIN_CAT2SUB_CAT
+        for cat_name in in_domain_cats:  # use the order in DOMAIN_CAT2SUB_CAT
             if cat_name in evaluation_result.keys():
                 in_domain_cat_results[cat_name] = evaluation_result[cat_name]
             else:
                 pass
         in_domain_ins_acc = calculate_ins_level_acc(in_domain_cat_results)
-        in_domain_data_num = sum([cat_results['num_example'] for cat_results in in_domain_cat_results.values()])
+        in_domain_data_num = sum([cat_results['num_example']
+                                 for cat_results in in_domain_cat_results.values()])
         printable_results['Overall-' + domain] = {"num": int(in_domain_data_num),
                                                   "acc": round(in_domain_ins_acc, 3)
                                                   }
@@ -86,7 +96,7 @@
             printable_results[cat_name] = {"num": int(cat_results['num_example']),
                                            "acc": round(cat_results['acc'], 3)
                                            }
-        
+
     # table.append(["-----------------------------", "-----", "----"])
     all_ins_acc = calculate_ins_level_acc(evaluation_result)
     printable_results['Overall'] = {"num": sum([cat_results['num_example'] for cat_results in evaluation_result.values()]),
@@ -95,4 +105,4 @@
 
     # print(printable_results)
     with open("evaluation_results.json", "w") as f:
-        json.dump(printable_results, f, indent=4)
+        json.dump(printable_results, f, indent=4)
diff --git a/language/qwen2.5-VL-7B-docker/utils/data_utils.py b/language/qwen2.5-VL-7B-docker/utils/data_utils.py
diff --git a/language/qwen2.5-VL-7B-docker/utils/eval_utils.py b/language/qwen2.5-VL-7B-docker/utils/eval_utils.py