Skip to content

Commit 02ced58

Browse files
committed
update
1 parent fb95811 commit 02ced58

1 file changed

Lines changed: 66 additions & 2 deletions

File tree

autotest/utils/compare_results.py

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
_SUMMARY_TS_RE = re.compile(r'summary_(\d{8}_\d{6})', re.IGNORECASE)
1111
_SUMMARY_COMPARE_EXTS = ('csv', 'md')
12+
_LCB_PATH_MARKERS = ('lcb', 'livecodebench', 'livecodebench_pro')
13+
_TRACEBACK_OBJ_RE = re.compile(r'<traceback object at 0x[0-9a-fA-F]+>')
14+
_MEM_ADDR_RE = re.compile(r'0x[0-9a-fA-F]+')
1215

1316

1417
def _load_json(path: str) -> Any:
@@ -42,6 +45,55 @@ def _path_uses_do_sample(rel_path: str) -> bool:
4245
return 'do_sample' in normalized
4346

4447

48+
def _path_is_lcb_results(rel_path: str, compare_type: str) -> bool:
49+
if compare_type != 'results':
50+
return False
51+
normalized = rel_path.lower().replace('-', '_')
52+
return any(marker in normalized for marker in _LCB_PATH_MARKERS)
53+
54+
55+
def _normalize_trace_text(text: str) -> str:
56+
text = _TRACEBACK_OBJ_RE.sub('<traceback>', text)
57+
return _MEM_ADDR_RE.sub('0x0', text)
58+
59+
60+
def _normalize_final_metadata(value: Any) -> Any:
61+
"""Drop volatile traceback addresses from LCB final_metadata blobs."""
62+
if isinstance(value, str):
63+
stripped = value.strip()
64+
if stripped.startswith('{'):
65+
try:
66+
return _normalize_final_metadata(json.loads(stripped))
67+
except json.JSONDecodeError:
68+
pass
69+
return _normalize_trace_text(value)
70+
if isinstance(value, dict):
71+
normalized = {}
72+
for key, item in value.items():
73+
if key == 'error' and isinstance(item, str):
74+
normalized[key] = _normalize_trace_text(item)
75+
elif key in ('error_code', 'error_message'):
76+
normalized[key] = item
77+
else:
78+
normalized[key] = _normalize_final_metadata(item)
79+
return normalized
80+
if isinstance(value, list):
81+
return [_normalize_final_metadata(item) for item in value]
82+
return value
83+
84+
85+
def _normalize_lcb_results(obj: Any) -> Any:
86+
if isinstance(obj, dict):
87+
return {
88+
key: (_normalize_final_metadata(val)
89+
if key == 'final_metadata' else _normalize_lcb_results(val))
90+
for key, val in obj.items()
91+
}
92+
if isinstance(obj, list):
93+
return [_normalize_lcb_results(item) for item in obj]
94+
return obj
95+
96+
4597
def _sort_keys_for_report(keys):
4698
"""Sort keys: numeric strings by int value, else lexically."""
4799

@@ -218,6 +270,7 @@ def _json_pair_compare_reason(
218270
path2: str,
219271
rel_path: str,
220272
json_diff_max_lines: int,
273+
compare_type: str = '',
221274
) -> Optional[str]:
222275
"""None if pair matches; else multi-line reason (with header). Loads each file once.""" # noqa: F401, E501
223276
try:
@@ -228,6 +281,10 @@ def _json_pair_compare_reason(
228281
except OSError as e:
229282
return f'Could not read JSON file: {e}'
230283

284+
if _path_is_lcb_results(rel_path, compare_type):
285+
left = _normalize_lcb_results(left)
286+
right = _normalize_lcb_results(right)
287+
231288
label1, label2 = os.path.basename(path1), os.path.basename(path2)
232289
do_sample = _path_uses_do_sample(rel_path)
233290

@@ -391,6 +448,7 @@ def compare_results(
391448
results_ignore_list=results_ignore_list,
392449
raise_on_diff=raise_on_diff,
393450
json_diff_max_lines=json_diff_max_lines,
451+
compare_type=compare_type,
394452
)
395453

396454

@@ -400,6 +458,7 @@ def compare_folders(
400458
results_ignore_list: Optional[list] = None,
401459
raise_on_diff: bool = True,
402460
json_diff_max_lines: int = 10,
461+
compare_type: str = '',
403462
) -> Optional[List[Tuple[str, str]]]:
404463
"""
405464
Walk both trees; same rel_path must match (JSON per module rules, else binary). # noqa: F401, E501
@@ -431,8 +490,13 @@ def compare_folders(
431490
continue
432491

433492
if _is_json_file(file):
434-
reason = _json_pair_compare_reason(path1, path2, rel_path,
435-
json_diff_max_lines)
493+
reason = _json_pair_compare_reason(
494+
path1,
495+
path2,
496+
rel_path,
497+
json_diff_max_lines,
498+
compare_type=compare_type,
499+
)
436500
if reason is not None:
437501
diff_files.append((rel_path, reason))
438502
elif not filecmp.cmp(path1, path2, shallow=False):

0 commit comments

Comments
 (0)