diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index f2512b31b..c3c30c14d 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -735,7 +735,7 @@ ACC_PATTERN = { "acc": r"^(?:\{\"accuracy|accuracy)[\": ]*=?\s*([\d\.]+).*", "AUC": r"^AUC=([\d\.]+).*", - "mAP": r".*(?:mAP=|Total:)\s*([\d.]+)", + "mAP": r".*(?:mAP=|'Total':)\s*([\d.]+)", "bleu": r"^BLEU\:\s*([\d\.]+).*", "F1": r"^{[\"\']exact_match[\"\']\:\s*[\d\.]+,\s*[\"\']f1[\"\']\:\s*([\d\.]+)}", "WER": r"Word Error Rate\:.*, accuracy=([0-9\.]+)%", @@ -746,8 +746,8 @@ "ROUGELSUM": r".*'rougeLsum':\s([\d.]+).*", "GEN_LEN": r".*'gen_len':\s([\d.]+).*", "TOKENS_PER_SAMPLE": r".*'tokens_per_sample':\s([\d.]+).*", - "CLIP_SCORE": r".*'CLIP_SCORE':\s.([\d.]+).*", - "FID_SCORE": r".*'FID_SCORE':\s.([\d.]+).*", + "CLIP_SCORE": r".*'CLIP_SCORE':\s+'?([\d.]+).*", + "FID_SCORE": r".*'FID_SCORE':\s+'?([\d.]+).*", "gsm8k_accuracy": r".*'gsm8k':\s([\d.]+).*", "mbxp_accuracy": r".*'mbxp':\s([\d.]+).*", "exact_match": r".*'exact_match':\s([\d.]+).*" @@ -2407,7 +2407,7 @@ def log_result( .replace('"', "") .replace("{", "") .replace("}", "") - ) + ).strip() if mlperf_model in REQUIRED_ACC_BENCHMARK: if ( config.version @@ -2425,7 +2425,7 @@ def log_result( missing_files, ) accuracy_is_valid = False - if not accuracy_is_valid and not is_closed_or_network: + if not accuracy_is_valid and acc and not is_closed_or_network: if debug: log.warning( "%s, accuracy not valid but taken for open",