3434 vqa_evaluation_case_sensitive ,
3535)
3636
37- # Add the following functions to your existing utils.py file
38- OCRBench_v2_score = {
39- "text_recognition_en" : [],
40- "text_detection_en" : [],
41- "text_spotting_en" : [],
42- "relationship_extraction_en" : [],
43- "element_parsing_en" : [],
44- "mathematical_calculation_en" : [],
45- "visual_text_understanding_en" : [],
46- "knowledge_reasoning_en" : [],
47- "text_recognition_cn" : [],
48- "relationship_extraction_cn" : [],
49- "element_parsing_cn" : [],
50- "visual_text_understanding_cn" : [],
51- "knowledge_reasoning_cn" : [],
52- }
37+
38+ def _make_score_buckets ():
39+ return {
40+ "text_recognition_en" : [],
41+ "text_detection_en" : [],
42+ "text_spotting_en" : [],
43+ "relationship_extraction_en" : [],
44+ "element_parsing_en" : [],
45+ "mathematical_calculation_en" : [],
46+ "visual_text_understanding_en" : [],
47+ "knowledge_reasoning_en" : [],
48+ "text_recognition_cn" : [],
49+ "relationship_extraction_cn" : [],
50+ "element_parsing_cn" : [],
51+ "visual_text_understanding_cn" : [],
52+ "knowledge_reasoning_cn" : [],
53+ }
5354
5455
5556teds = TEDS (n_jobs = 32 )
@@ -253,7 +254,7 @@ def ocrbench_v2_process_results(doc, results):
253254 else :
254255 pred_chart_html = dict_to_html (pred_chart_dict )
255256 if isinstance (answer , str ):
256- answer = convert_str_to_multi_dict (pred )
257+ answer = convert_str_to_multi_dict (answer )
257258 gt_chart_html = dict_to_html (answer )
258259 score = teds .evaluate (pred_chart_html , gt_chart_html )
259260 else :
@@ -332,7 +333,7 @@ def ocrbench_v2_process_results(doc, results):
332333 score = (get_value_or_zero (ocr_metric ["bleu" ]) + get_value_or_zero (ocr_metric ["meteor" ]) + get_value_or_zero (ocr_metric ["f_measure" ]) + (1 - get_value_or_zero (ocr_metric ["edit_dist" ]))) / 4
333334 elif data_type == "full-page OCR en" :
334335 if not pred :
335- score == 0
336+ score = 0
336337 else :
337338 ocr_metric = cal_per_metrics (pred , gt_ans [0 ])
338339 score = (get_value_or_zero (ocr_metric ["bleu" ]) + get_value_or_zero (ocr_metric ["meteor" ]) + get_value_or_zero (ocr_metric ["f_measure" ]) + (1 - get_value_or_zero (ocr_metric ["edit_dist" ]))) / 4
@@ -372,12 +373,13 @@ def ocrbench_v2_process_results(doc, results):
372373 }
373374
374375
375- def calculate_average_score (categories ):
376- return sum (sum (OCRBench_v2_score [cat ]) / len (OCRBench_v2_score [cat ]) if len (OCRBench_v2_score [cat ]) > 0 else 0 for cat in categories ) / len (categories )
376+ def calculate_average_score (categories , score_buckets ):
377+ return sum (sum (score_buckets [cat ]) / len (score_buckets [cat ]) if len (score_buckets [cat ]) > 0 else 0 for cat in categories ) / len (categories )
377378
378379
379380def ocrbench_v2_aggregate_accuracy (results , args ):
380381 question_type_scores = {}
382+ score_buckets = _make_score_buckets ()
381383
382384 for result in results :
383385 if "ignore" in result .keys () and result ["ignore" ] == "True" :
@@ -391,43 +393,43 @@ def ocrbench_v2_aggregate_accuracy(results, args):
391393 question_type_scores [question_type ].append (score )
392394
393395 if question_type in ["text recognition en" , "fine-grained text recognition en" , "full-page OCR en" ]:
394- OCRBench_v2_score ["text_recognition_en" ].append (score )
396+ score_buckets ["text_recognition_en" ].append (score )
395397
396398 elif question_type in ["text grounding en" , "VQA with position en" ]:
397- OCRBench_v2_score ["text_detection_en" ].append (score )
399+ score_buckets ["text_detection_en" ].append (score )
398400
399401 elif question_type == "text spotting en" :
400- OCRBench_v2_score ["text_spotting_en" ].append (score )
402+ score_buckets ["text_spotting_en" ].append (score )
401403
402404 elif question_type in ["key information extraction en" , "key information mapping en" ]:
403- OCRBench_v2_score ["relationship_extraction_en" ].append (score )
405+ score_buckets ["relationship_extraction_en" ].append (score )
404406
405407 elif question_type in ["document parsing en" , "chart parsing en" , "table parsing en" , "formula recognition en" ]:
406- OCRBench_v2_score ["element_parsing_en" ].append (score )
408+ score_buckets ["element_parsing_en" ].append (score )
407409
408410 elif question_type in ["math QA en" , "text counting en" ]:
409- OCRBench_v2_score ["mathematical_calculation_en" ].append (score )
411+ score_buckets ["mathematical_calculation_en" ].append (score )
410412
411413 elif question_type in ["document classification en" , "cognition VQA en" , "diagram QA en" ]:
412- OCRBench_v2_score ["visual_text_understanding_en" ].append (score )
414+ score_buckets ["visual_text_understanding_en" ].append (score )
413415
414416 elif question_type in ["reasoning VQA en" , "science QA en" , "APP agent en" , "ASCII art classification en" ]:
415- OCRBench_v2_score ["knowledge_reasoning_en" ].append (score )
417+ score_buckets ["knowledge_reasoning_en" ].append (score )
416418
417419 elif question_type == "full-page OCR cn" :
418- OCRBench_v2_score ["text_recognition_cn" ].append (score )
420+ score_buckets ["text_recognition_cn" ].append (score )
419421
420422 elif question_type in ["key information extraction cn" , "handwritten answer extraction cn" ]:
421- OCRBench_v2_score ["relationship_extraction_cn" ].append (score )
423+ score_buckets ["relationship_extraction_cn" ].append (score )
422424
423425 elif question_type in ["document parsing cn" , "table parsing cn" , "formula recognition cn" ]:
424- OCRBench_v2_score ["element_parsing_cn" ].append (score )
426+ score_buckets ["element_parsing_cn" ].append (score )
425427
426428 elif question_type == "cognition VQA cn" :
427- OCRBench_v2_score ["visual_text_understanding_cn" ].append (score )
429+ score_buckets ["visual_text_understanding_cn" ].append (score )
428430
429431 elif question_type in ["reasoning VQA cn" , "text translation cn" ]:
430- OCRBench_v2_score ["knowledge_reasoning_cn" ].append (score )
432+ score_buckets ["knowledge_reasoning_cn" ].append (score )
431433
432434 else :
433435 print ("No such task!" )
@@ -437,8 +439,8 @@ def ocrbench_v2_aggregate_accuracy(results, args):
437439
438440 chinese_tasks = ["text_recognition_cn" , "relationship_extraction_cn" , "element_parsing_cn" , "visual_text_understanding_cn" , "knowledge_reasoning_cn" ]
439441
440- OCRBench_v2_English_subset_score = calculate_average_score (english_tasks )
441- OCRBench_v2_Chinese_subset_score = calculate_average_score (chinese_tasks )
442+ OCRBench_v2_English_subset_score = calculate_average_score (english_tasks , score_buckets )
443+ OCRBench_v2_Chinese_subset_score = calculate_average_score (chinese_tasks , score_buckets )
442444
443445 Final_score = (OCRBench_v2_English_subset_score + OCRBench_v2_Chinese_subset_score ) / 2
444446 file_name = generate_submission_file ("ocrbench_v2_results.txt" , args , subpath = "results" )
@@ -450,14 +452,14 @@ def ocrbench_v2_aggregate_accuracy(results, args):
450452 print (f"{ q_type } (sample number: { len (scores )} ): { avg_score :.2f} " , file = f )
451453 print ("######################### English Subsets ######################" , file = f )
452454 for task in english_tasks :
453- num_samples = len (OCRBench_v2_score [task ])
454- avg_score = sum (OCRBench_v2_score [task ]) / num_samples if num_samples > 0 else 0
455+ num_samples = len (score_buckets [task ])
456+ avg_score = sum (score_buckets [task ]) / num_samples if num_samples > 0 else 0
455457 print (f"{ task .replace ('_' , ' ' ).title ()} (Total { num_samples } ): { avg_score :.2f} " , file = f )
456458 print (f"Overall English Score: { OCRBench_v2_English_subset_score :.2f} " , file = f )
457459 print ("######################### Chinese Subsets ######################" , file = f )
458460 for task in chinese_tasks :
459- num_samples = len (OCRBench_v2_score [task ])
460- avg_score = sum (OCRBench_v2_score [task ]) / num_samples if num_samples > 0 else 0
461+ num_samples = len (score_buckets [task ])
462+ avg_score = sum (score_buckets [task ]) / num_samples if num_samples > 0 else 0
461463 print (f"{ task .replace ('_' , ' ' ).title ()} (Total { num_samples } ): { avg_score :.2f} " , file = f )
462464 print (f"Overall Chinese Score: { OCRBench_v2_Chinese_subset_score :.2f} " , file = f )
463465 print ("######################### Final Score ##########################" , file = f )
0 commit comments