Skip to content

Commit 16c395d

Browse files
committed
fix: check for all or none value fields
Signed-off-by: Henry Schreiner <henryfs@princeton.edu>
1 parent 4ec9735 commit 16c395d

2 files changed

Lines changed: 149 additions & 1 deletion

File tree

src/boost_histogram/serialization/__init__.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,44 @@ def __dir__() -> list[str]:
2323
return __all__
2424

2525

26+
def _storage_has_data_keys(storage_data: dict[str, Any], storage_type: str) -> bool:
27+
"""
28+
Check if storage data dict has the required keys for the given storage type.
29+
30+
Returns True if all required data keys are present, False if it's structure-only.
31+
Raises ValueError if required keys are missing (malformed/partial data).
32+
"""
33+
match storage_type:
34+
case "int" | "double":
35+
required_keys = {"values"}
36+
case "weighted":
37+
required_keys = {"values", "variances"}
38+
case "mean":
39+
required_keys = {"counts", "values", "variances"}
40+
case "weighted_mean":
41+
required_keys = {
42+
"sum_of_weights",
43+
"sum_of_weights_squared",
44+
"values",
45+
"variances",
46+
}
47+
case _:
48+
msg = f"Unknown storage type: {storage_type}"
49+
raise ValueError(msg)
50+
51+
present_keys = required_keys & set(storage_data.keys())
52+
53+
if not present_keys:
54+
return False
55+
56+
if present_keys != required_keys:
57+
missing = required_keys - present_keys
58+
msg = f"{storage_type.capitalize()} storage missing required keys: {missing}"
59+
raise ValueError(msg)
60+
61+
return True
62+
63+
2664
def to_uhi(
2765
h: histogram.Histogram[Any], /, *, keep_storage: bool = True
2866
) -> dict[str, Any]:
@@ -61,7 +99,11 @@ def from_uhi(data: dict[str, Any], /) -> histogram.Histogram[Any]:
6199
h.__dict__ = data.get("metadata", {})
62100

63101
# Check if storage has data (if not, it's a structure-only histogram)
64-
if "values" not in storage_data:
102+
# Validate required keys per storage type before deciding to skip data loading
103+
storage_type = storage_data["type"]
104+
has_data_keys = _storage_has_data_keys(storage_data, storage_type)
105+
106+
if not has_data_keys:
65107
return h
66108

67109
raw_data = _data_from_dict(storage_data)

tests/test_serialization_uhi.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,109 @@ def test_round_trip_3d_histogram_json_constructor() -> None:
502502

503503
assert h.ndim == h2.ndim
504504
assert h == h2
505+
506+
507+
def test_from_uhi_malformed_weight_storage() -> None:
508+
"""Test that malformed Weight storage ( missing required keys) raises ValueError."""
509+
data = {
510+
"uhi_schema": 1,
511+
"axes": [
512+
{
513+
"type": "regular",
514+
"lower": 0,
515+
"upper": 10,
516+
"bins": 5,
517+
"underflow": True,
518+
"overflow": True,
519+
"circular": False,
520+
}
521+
],
522+
"storage": {
523+
"type": "weighted",
524+
"values": [1, 2, 3, 4, 5],
525+
},
526+
"metadata": {},
527+
}
528+
529+
with pytest.raises(ValueError, match="Weighted storage missing required keys"):
530+
from_uhi(data)
531+
532+
533+
def test_from_uhi_malformed_mean_storage() -> None:
534+
"""Test that malformed Mean storage (missing required keys) raises ValueError."""
535+
data = {
536+
"uhi_schema": 1,
537+
"axes": [
538+
{
539+
"type": "regular",
540+
"lower": 0,
541+
"upper": 10,
542+
"bins": 5,
543+
"underflow": True,
544+
"overflow": True,
545+
"circular": False,
546+
}
547+
],
548+
"storage": {
549+
"type": "mean",
550+
"counts": [1, 2, 3, 4, 5],
551+
"values": [1, 2, 3, 4, 5],
552+
},
553+
"metadata": {},
554+
}
555+
556+
with pytest.raises(ValueError, match="Mean storage missing required keys"):
557+
from_uhi(data)
558+
559+
560+
def test_from_uhi_malformed_weighted_mean_storage() -> None:
561+
"""Test that malformed WeightedMean storage (missing required keys) raises ValueError."""
562+
data = {
563+
"uhi_schema": 1,
564+
"axes": [
565+
{
566+
"type": "regular",
567+
"lower": 0,
568+
"upper": 10,
569+
"bins": 5,
570+
"underflow": True,
571+
"overflow": True,
572+
"circular": False,
573+
}
574+
],
575+
"storage": {
576+
"type": "weighted_mean",
577+
"sum_of_weights": [1, 2, 3, 4, 5],
578+
"values": [1, 2, 3, 4, 5],
579+
},
580+
"metadata": {},
581+
}
582+
583+
with pytest.raises(ValueError, match="Weighted_mean storage missing required keys"):
584+
from_uhi(data)
585+
586+
587+
def test_from_uhi_structure_only_no_error() -> None:
588+
"""Test that structure-only (no data keys) histograms load correctly."""
589+
data = {
590+
"uhi_schema": 1,
591+
"axes": [
592+
{
593+
"type": "regular",
594+
"lower": 0,
595+
"upper": 10,
596+
"bins": 5,
597+
"underflow": True,
598+
"overflow": True,
599+
"circular": False,
600+
}
601+
],
602+
"storage": {
603+
"type": "double",
604+
},
605+
"metadata": {},
606+
}
607+
608+
h = from_uhi(data)
609+
assert h.storage_type is bh.storage.Double
610+
assert np.asarray(h) == pytest.approx(np.zeros(5))

0 commit comments

Comments
 (0)