@@ -62,7 +62,8 @@ def get_hierarchical_components(
6262 intersection_count = 0
6363
6464 # Iterate through the paths simultaneously
65- for pred_cat , true_cat in zip (predicted_categories , true_categories , strict = False ):
65+ for pred_cat , true_cat in zip (
66+ predicted_categories , true_categories , strict = False ):
6667 if pred_cat == true_cat :
6768 intersection_count += 1
6869 else :
@@ -228,7 +229,8 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
228229 ),
229230 ),
230231 brand = _PRED_BRAND_PAD ,
231- is_secondhand = local_rng .choice ([True , False ], size = 1 ).tolist ()[0 ],
232+ is_secondhand = local_rng .choice (
233+ [True , False ], size = 1 ).tolist ()[0 ],
232234 )
233235 error_messages .append (
234236 (
@@ -251,14 +253,16 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
251253 )
252254 # random category selection
253255 # Uniform distribution is the default
254- rand_cat = local_rng .choice (ground_truth_item ["potential_product_categories" ])
256+ rand_cat = local_rng .choice (
257+ ground_truth_item ["potential_product_categories" ])
255258 category_rand_pred_src .append (
256259 (rand_cat , ground_truth_item ["ground_truth_category" ]),
257260 )
258261 # random is_secondhand selection
259262 rand_is_secondhand = local_rng .choice ([True , False ])
260263 is_secondhand_rand_pred_src .append (
261- (rand_is_secondhand , ground_truth_item ["ground_truth_is_secondhand" ]),
264+ (rand_is_secondhand ,
265+ ground_truth_item ["ground_truth_is_secondhand" ]),
262266 )
263267
264268 return {
@@ -273,7 +277,8 @@ def _process_chunk(args: tuple[list[dict], int]) -> dict[str, Any]:
273277 }
274278
275279
276- def run_evaluation (random_seed : int , filename : FilePath , dataset : DatasetCLI ) -> None :
280+ def run_evaluation (random_seed : int , filename : FilePath ,
281+ dataset : DatasetCLI ) -> None :
277282 """Main function to run the evaluation."""
278283 master_rng = np .random .default_rng (seed = random_seed )
279284 with Path .open (filename ) as f :
@@ -290,7 +295,7 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
290295 chunk_size = max (len (model_output ) // cpu_count , 1 )
291296 # Create chunks
292297 output_chunks = [
293- model_output [i : i + chunk_size ]
298+ model_output [i : i + chunk_size ]
294299 for i in range (0 , len (model_output ), chunk_size )
295300 ]
296301
@@ -326,7 +331,8 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
326331 category_dataset_pred_src .extend (chunk ["category_dataset_pred_src" ])
327332 category_rand_pred_src .extend (chunk ["category_rand_pred_src" ])
328333 is_secondhand_pred_src .extend (chunk ["is_secondhand_pred_src" ])
329- is_secondhand_rand_pred_src .extend (chunk ["is_secondhand_rand_pred_src" ])
334+ is_secondhand_rand_pred_src .extend (
335+ chunk ["is_secondhand_rand_pred_src" ])
330336 brand_pred_src .extend (chunk ["brand_pred_src" ])
331337 all_possible_brands .extend (chunk ["all_possible_brands" ])
332338
@@ -339,7 +345,8 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
339345
340346 rand_cat_f1_score = calculate_hierarchical_f1 (category_rand_pred_src )
341347
342- rand_is_seconhand_f1_score = calculate_secondhand_f1 (is_secondhand_rand_pred_src )
348+ rand_is_seconhand_f1_score = calculate_secondhand_f1 (
349+ is_secondhand_rand_pred_src )
343350
344351 all_brands_list = list (set (all_possible_brands ))
345352 random_brand_predictions = master_rng .choice (
@@ -354,7 +361,10 @@ def run_evaluation(random_seed: int, filename: FilePath, dataset: DatasetCLI) ->
354361
355362 with ProcessPoolExecutor () as executor :
356363 rand_brand_data = list (
357- executor .map (_process_chunk_rnd_brand , args_list , chunksize = chunk_size ),
364+ executor .map (
365+ _process_chunk_rnd_brand ,
366+ args_list ,
367+ chunksize = chunk_size ),
358368 )
359369
360370 rand_brand_score = calculate_brand_f1_score (
0 commit comments