Skip to content

Commit a2682af

Browse files
fix: round floats to 4 decimal places in CSV output (#1080)
Raw IEEE 754 values like 0.4650000000000001 were written directly. Also, values with exactly 3 decimal digits (e.g. 0.575) were misread by LibreOffice in European locales as integers (575) because the dot is treated as a thousands separator. Formatting all floats as fixed 4-decimal strings (e.g. 0.4650, 0.5750) eliminates both the precision noise and the thousands-separator ambiguity. Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 1d76e91 commit a2682af

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

scripts/condense_mass_eval_jsonl.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ def _collect_backend_names(input_path: Path) -> list[str]:
4444
return sorted(backend_names)
4545

4646

47+
def _fmt_float(value: Any) -> Any:
48+
"""Round float to 4 decimal places to avoid IEEE 754 noise and LibreOffice misreads."""
49+
if isinstance(value, float):
50+
return f"{round(value, 4):.4f}"
51+
return value
52+
53+
4754
def _build_base_row(record: dict[str, Any]) -> dict[str, Any]:
4855
"""Build CSV row with condensed top-level fields."""
4956
predatory_list_hits = record.get("predatory_list_hits", [])
@@ -80,8 +87,8 @@ def _build_base_row(record: dict[str, Any]) -> dict[str, Any]:
8087
"issn": record.get("issn"),
8188
"eissn": record.get("eissn"),
8289
"final_assessment": record.get("final_assessment"),
83-
"confidence": record.get("confidence"),
84-
"overall_score": record.get("overall_score"),
90+
"confidence": _fmt_float(record.get("confidence")),
91+
"overall_score": _fmt_float(record.get("overall_score")),
8592
"is_suspicious": record.get("is_suspicious"),
8693
"has_conflict": record.get("has_conflict"),
8794
"predatory_votes": record.get("predatory_votes"),
@@ -182,7 +189,7 @@ def condense_jsonl_to_csv(
182189
if backend_columns == 2:
183190
confidence_value = backend_result.get("confidence")
184191
row[confidence_key] = (
185-
"" if confidence_value is None else confidence_value
192+
"" if confidence_value is None else _fmt_float(confidence_value)
186193
)
187194

188195
writer.writerow(row)

0 commit comments

Comments
 (0)