Merge branch 'wangshangsam/fix-req-timeout' of github.com:CentML/mlperf-inference into wangshangsam/fix-req-timeout

wangshangsam · wangshangsam · commit 55a8cf10ce41 · 2025-12-16T13:22:07.000-05:00
diff --git a/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/deploy.py b/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/deploy.py
@@ -208,7 +208,9 @@ def _startup(self) -> None:
         """Start the local process."""
         cmd = self._build_command()
         logger.info("Starting local process with command: {}", cmd)
-        logger.info("Starting local process with environment variables: {}", os.environ)
+        logger.info(
+            "Starting local process with environment variables: {}",
+            os.environ)
 
         # Start the server
         process = subprocess.Popen(  # noqa: S603
@@ -251,7 +253,8 @@ def _shutdown(self) -> None:
         # Try graceful termination first
         self._process.terminate()
         try:
-            self._process.wait(timeout=self.endpoint.shutdown_timeout.total_seconds())
+            self._process.wait(
+                timeout=self.endpoint.shutdown_timeout.total_seconds())
             logger.info("Local process terminated gracefully")
         except subprocess.TimeoutExpired:
             logger.warning(
diff --git a/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/evaluation.py b/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/evaluation.py
@@ -62,7 +62,8 @@ def get_hierarchical_components(
     intersection_count = 0
 
     # Iterate through the paths simultaneously
-    for pred_cat, true_cat in zip(predicted_categories, true_categories, strict=False):
+    for pred_cat, true_cat in zip(
+            predicted_categories, true_categories, strict=False):
         if pred_cat == true_cat:
             intersection_count += 1
         else:
@@ -228,7 +229,8 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
                     ),
                 ),
                 brand=_PRED_BRAND_PAD,
-                is_secondhand=local_rng.choice([True, False], size=1).tolist()[0],
+                is_secondhand=local_rng.choice(
+                    [True, False], size=1).tolist()[0],
             )
             error_messages.append(
                 (
@@ -251,14 +253,16 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
         )
         # random category selection
         # Uniform distribution is the default
-        rand_cat = local_rng.choice(ground_truth_item["potential_product_categories"])
+        rand_cat = local_rng.choice(
+            ground_truth_item["potential_product_categories"])
         category_rand_pred_src.append(
             (rand_cat, ground_truth_item["ground_truth_category"]),
         )
         # random is_secondhand selection
         rand_is_secondhand = local_rng.choice([True, False])
         is_secondhand_rand_pred_src.append(
-            (rand_is_secondhand, ground_truth_item["ground_truth_is_secondhand"]),
+            (rand_is_secondhand,
+             ground_truth_item["ground_truth_is_secondhand"]),
         )
 
     return {
@@ -273,7 +277,8 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
     }
 
 
-def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) -> None:
+def run_evaluation(random_seed: int, filename: FilePath,
+                   dataset: DatasetCLI) -> None:
     """Main function to run the evaluation."""
     master_rng = np.random.default_rng(seed=random_seed)
     with Path.open(filename) as f:
@@ -290,7 +295,7 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
     chunk_size = max(len(model_output) // cpu_count, 1)
     # Create chunks
     output_chunks = [
-        model_output[i : i + chunk_size]
+        model_output[i: i + chunk_size]
         for i in range(0, len(model_output), chunk_size)
     ]
 
@@ -326,7 +331,8 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
         category_dataset_pred_src.extend(chunk["category_dataset_pred_src"])
         category_rand_pred_src.extend(chunk["category_rand_pred_src"])
         is_secondhand_pred_src.extend(chunk["is_secondhand_pred_src"])
-        is_secondhand_rand_pred_src.extend(chunk["is_secondhand_rand_pred_src"])
+        is_secondhand_rand_pred_src.extend(
+            chunk["is_secondhand_rand_pred_src"])
         brand_pred_src.extend(chunk["brand_pred_src"])
         all_possible_brands.extend(chunk["all_possible_brands"])
 
@@ -339,7 +345,8 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
 
     rand_cat_f1_score = calculate_hierarchical_f1(category_rand_pred_src)
 
-    rand_is_seconhand_f1_score = calculate_secondhand_f1(is_secondhand_rand_pred_src)
+    rand_is_seconhand_f1_score = calculate_secondhand_f1(
+        is_secondhand_rand_pred_src)
 
     all_brands_list = list(set(all_possible_brands))
     random_brand_predictions = master_rng.choice(
@@ -354,7 +361,10 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
 
     with ProcessPoolExecutor() as executor:
         rand_brand_data = list(
-            executor.map(_process_chunk_rnd_brand, args_list, chunksize=chunk_size),
+            executor.map(
+                _process_chunk_rnd_brand,
+                args_list,
+                chunksize=chunk_size),
         )
 
     rand_brand_score = calculate_brand_f1_score(
diff --git a/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/task.py b/multimodal/vl2l/src/mlperf_inference_multimodal_vl2l/task.py
@@ -67,7 +67,8 @@ def __init__(
         self.openai_api_client = AsyncOpenAI(
             base_url=endpoint.url,
             http_client=DefaultAioHttpClient(
-                timeout=httpx.Timeout(timeout=request_timeout_seconds, connect=5.0),
+                timeout=httpx.Timeout(
+                    timeout=request_timeout_seconds, connect=5.0),
             ),
             api_key=endpoint.api_key,
             timeout=request_timeout_seconds,
@@ -187,7 +188,9 @@ def estimated_num_performance_samples(self) -> int:
         """
         estimation_indices = random.sample(
             range(self.total_num_samples),
-            k=min(MAX_NUM_ESTIMATION_PERFORMANCE_SAMPLES, self.total_num_samples),
+            k=min(
+                MAX_NUM_ESTIMATION_PERFORMANCE_SAMPLES,
+                self.total_num_samples),
         )
         estimation_samples = [
             self.formulate_loaded_sample(
@@ -274,7 +277,8 @@ def _unload_samples_from_ram(query_sample_indices: list[int]) -> None:
             _unload_samples_from_ram,
         )
 
-    async def _query_endpoint_async_batch(self, query_sample: lg.QuerySample) -> None:
+    async def _query_endpoint_async_batch(
+            self, query_sample: lg.QuerySample) -> None:
         """Query the endpoint through the async OpenAI API client."""
         try:
             sample = self.loaded_samples[query_sample.index]
@@ -351,7 +355,8 @@ async def _query_endpoint_async_batch(self, query_sample: lg.QuerySample) -> Non
                 ],
             )
 
-    async def _query_endpoint_async_stream(self, query_sample: lg.QuerySample) -> None:
+    async def _query_endpoint_async_stream(
+            self, query_sample: lg.QuerySample) -> None:
         """Query the endpoint through the async OpenAI API client."""
         ttft_set = False
         try: