Skip to content

Commit 969d406

Browse files
authored
feat: trim flanking whitespace when reading a metric (#217)
Fixes #216 Also, strips any flanking whitespace when formatting a float into a string.
1 parent 11f46ed commit 969d406

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed

fgpyo/util/metric.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
210210
return {}
211211

212212
@classmethod
213-
def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
213+
def read(
214+
cls, path: Path, ignore_extra_fields: bool = True, strip_whitespace: bool = False
215+
) -> Iterator[Any]:
214216
"""Reads in zero or more metrics from the given path.
215217
216218
The metric file must contain a matching header.
@@ -221,6 +223,8 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
221223
Args:
222224
path: the path to the metrics file.
223225
ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
226+
strip_whitespace: True to strip leading and trailing whitespace from each field,
227+
False to keep as-is.
224228
"""
225229
parsers = cls._parsers()
226230
with io.to_reader(path) as reader:
@@ -263,6 +267,8 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]:
263267
for lineno, line in enumerate(reader, 2):
264268
# parse the raw values
265269
values: List[str] = line.rstrip("\r\n").split("\t")
270+
if strip_whitespace:
271+
values = [v.strip() for v in values]
266272

267273
# raise an exception if there aren't the same number of values as the header
268274
if len(header) != len(values):
@@ -353,11 +359,11 @@ def format_value(cls, value: Any) -> str: # noqa: C901
353359
+ "}"
354360
)
355361
elif isinstance(value, float):
356-
return str(round(value, 5))
362+
return f"{round(value, 5)}"
357363
elif value is None:
358364
return ""
359365
else:
360-
return str(value)
366+
return f"{value}"
361367

362368
@classmethod
363369
def to_list(cls, value: str) -> List[Any]:

tests/fgpyo/util/test_metric.py

+41
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,11 @@ class PersonDefault(Metric["PersonDefault"]):
154154
name: str
155155
age: int = 0
156156

157+
@make_dataclass(use_attr=use_attr)
158+
class PersonAgeFloat(Metric["PersonAgeFloat"]):
159+
name: Optional[str]
160+
age: Optional[float]
161+
157162
@make_dataclass(use_attr=use_attr)
158163
class ListPerson(Metric["ListPerson"]):
159164
name: List[Optional[str]]
@@ -403,6 +408,42 @@ def test_metric_values(data_and_classes: DataBuilder) -> None:
403408
assert list(data_and_classes.Person(name="name", age=42).values()) == ["name", 42]
404409

405410

411+
@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
412+
def test_metric_round_floats(data_and_classes: DataBuilder) -> None:
413+
assert list(data_and_classes.Person(name="John Doe", age=42.123456).formatted_values()) == [
414+
"John Doe",
415+
"42.12346",
416+
]
417+
418+
419+
@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
420+
def test_metric_strips_trailing_whitespace(tmp_path: Path, data_and_classes: DataBuilder) -> None:
421+
test_tsv = tmp_path / "test.tsv"
422+
with test_tsv.open("w") as fout:
423+
fout.write("name\tage\n")
424+
fout.write(" John Doe \t42\n") # whitespace around name
425+
fout.write("Jane Doe\t 35 \n") # whitespace around age
426+
fout.write(" Someone Else \t 47 \n") # whitespace around both
427+
428+
persons = list(data_and_classes.Person.read(test_tsv))
429+
assert len(persons) == 3
430+
assert persons[0].name == " John Doe "
431+
assert persons[0].age == 42
432+
assert persons[1].name == "Jane Doe"
433+
assert persons[1].age == 35
434+
assert persons[2].name == " Someone Else "
435+
assert persons[2].age == 47
436+
437+
persons = list(data_and_classes.Person.read(test_tsv, strip_whitespace=True))
438+
assert len(persons) == 3
439+
assert persons[0].name == "John Doe"
440+
assert persons[0].age == 42
441+
assert persons[1].name == "Jane Doe"
442+
assert persons[1].age == 35
443+
assert persons[2].name == "Someone Else"
444+
assert persons[2].age == 47
445+
446+
406447
@pytest.mark.parametrize("data_and_classes", (attr_data_and_classes, dataclasses_data_and_classes))
407448
def test_metric_items(data_and_classes: DataBuilder) -> None:
408449
"""`metric.items()` should return a list of (key, value) tuples."""

0 commit comments

Comments
 (0)