Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions src/boost_histogram/serialization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
from .. import histogram, version
from ._axis import _axis_from_dict, _axis_to_dict
from ._common import serialize_metadata
from ._storage import _data_from_dict, _storage_from_dict, _storage_to_dict
from ._storage import (
_data_from_dict,
_storage_from_dict,
_storage_to_dict,
_storage_type_to_str,
)

__all__ = ["from_uhi", "remove_writer_info", "to_uhi"]

Expand All @@ -18,34 +23,46 @@ def __dir__() -> list[str]:
return __all__


def to_uhi(h: histogram.Histogram[Any], /) -> dict[str, Any]:
def to_uhi(
h: histogram.Histogram[Any], /, *, keep_storage: bool = True
) -> dict[str, Any]:
"""Convert an Histogram to a dictionary."""

# Convert the histogram to a dictionary
data = {
"uhi_schema": 1,
"writer_info": {"boost-histogram": {"version": version.version}},
"axes": [_axis_to_dict(axis) for axis in h.axes],
"storage": _storage_to_dict(h.storage_type(), h.view(flow=True)),
}
if keep_storage:
data["storage"] = _storage_to_dict(h.storage_type(), h.view(flow=True))
else:
data["storage"] = {"type": _storage_type_to_str(h.storage_type())}
Comment thread
henryiii marked this conversation as resolved.
Outdated
data["metadata"] = serialize_metadata(h.__dict__)

return data


def from_uhi(data: dict[str, Any], /) -> histogram.Histogram[Any]:
"""Convert a dictionary to an Histogram."""
# One time use
axis = (_axis_from_dict(ax) for ax in data["axes"])

storage_data = data["storage"]
storage = _storage_from_dict(storage_data)
h = histogram.Histogram[Any](*axis, storage=storage)

# Check if storage has data (if not, it's a structure-only histogram)
if "values" not in storage_data:
h.__dict__ = data.get("metadata", {})
return h
Comment thread
henryiii marked this conversation as resolved.

h = histogram.Histogram(
*(_axis_from_dict(ax) for ax in data["axes"]),
storage=_storage_from_dict(data["storage"]),
)
raw_data = _data_from_dict(data["storage"])
raw_data = _data_from_dict(storage_data)
view_shape = h.view(flow=True).shape
# Reshape raw_data to the expected shape. This is necessary because JSON
# serialization can collapse empty dimensions (e.g. (5, 0, 0) -> (5, 0)),
# so we must restore the correct number of dimensions.
storage_type = data["storage"]["type"]
storage_type = storage_data["type"]
if storage_type in {"weighted", "mean", "weighted_mean"}:
raw_data = np.asarray(raw_data)
raw_data = raw_data.reshape(view_shape + raw_data.shape[-1:])
Expand Down
42 changes: 32 additions & 10 deletions src/boost_histogram/serialization/_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,50 @@

from .. import storage

__all__ = ["_data_from_dict", "_storage_from_dict", "_storage_to_dict"]
__all__ = [
"_data_from_dict",
"_storage_from_dict",
"_storage_to_dict",
"_storage_type_to_str",
]


def __dir__() -> list[str]:
return __all__


def _storage_type_to_str(_storage: storage.Storage, /) -> str:
"""Return the canonical storage type string for a storage object."""
match _storage:
case storage.Int64():
return "int"
case storage.Double():
return "double"
case storage.AtomicInt64():
return "int"
case storage.Unlimited():
return "double"
case storage.Weight():
return "weighted"
case storage.Mean():
return "mean"
case storage.WeightedMean():
return "weighted_mean"
case _:
raise TypeError(f"Unsupported storage type: {_storage}")


@functools.singledispatch
def _storage_to_dict(_storage: Any, /, data: Any) -> dict[str, Any]: # noqa: ARG001
def _storage_to_dict(_storage: storage.Storage, /, data: Any) -> dict[str, Any]: # noqa: ARG001
"""Convert a storage to a dictionary."""
msg = f"Unsupported storage type: {_storage}"
raise TypeError(msg)


@_storage_to_dict.register(storage.Int64)
def _(_storage: storage.Int64, /, data: Any) -> dict[str, Any]:
return {"type": "int", "values": data}


@_storage_to_dict.register(storage.Double)
def _(_storage: storage.Double, /, data: Any) -> dict[str, Any]:
Comment thread
henryiii marked this conversation as resolved.
Outdated
return {"type": "double", "values": data}
return {"type": _storage_type_to_str(_storage), "values": data}


@_storage_to_dict.register(storage.AtomicInt64)
Expand All @@ -48,7 +70,7 @@ def _(
@_storage_to_dict.register(storage.Weight)
def _(_storage: storage.Weight, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted",
"type": _storage_type_to_str(_storage),
"values": data.value,
"variances": data.variance,
}
Expand All @@ -57,7 +79,7 @@ def _(_storage: storage.Weight, /, data: Any) -> dict[str, Any]:
@_storage_to_dict.register(storage.Mean)
def _(_storage: storage.Mean, /, data: Any) -> dict[str, Any]:
return {
"type": "mean",
"type": _storage_type_to_str(_storage),
"counts": data.count,
"values": data.value,
"variances": data.variance,
Expand All @@ -67,7 +89,7 @@ def _(_storage: storage.Mean, /, data: Any) -> dict[str, Any]:
@_storage_to_dict.register(storage.WeightedMean)
def _(_storage: storage.WeightedMean, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted_mean",
"type": _storage_type_to_str(_storage),
"sum_of_weights": data.sum_of_weights,
"sum_of_weights_squared": data.sum_of_weights_squared,
"values": data.value,
Expand Down
35 changes: 32 additions & 3 deletions tests/test_serialization_uhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,38 @@ def test_round_trip_native() -> None:

assert h == h2

assert isinstance(h2.axes[0], bh.axis.Integer)
assert h2.storage_type is bh.storage.AtomicInt64
assert h2.axes[0].traits.growth == h.axes[0].traits.growth

def test_to_uhi_keep_storage_option() -> None:
h = bh.Histogram(
bh.axis.Regular(3, 0, 1),
storage=bh.storage.Double(),
)
data_with = to_uhi(h)
data_without = to_uhi(h, keep_storage=False)

assert "storage" in data_with
assert "storage" in data_without
# Storage with data has "values" key
assert "values" in data_with["storage"]
# Storage without data has only type information
assert "values" not in data_without["storage"]
assert data_without["storage"]["type"] == "double"

Comment thread
henryiii marked this conversation as resolved.
Outdated

def test_from_uhi_missing_storage_data() -> None:
h = bh.Histogram(
bh.axis.Regular(4, 0.0, 1.0),
storage=bh.storage.Double(),
)
# produce a UHI dict with storage type but no data
data = to_uhi(h, keep_storage=False)

h2 = from_uhi(data)

# axes and storage type should round-trip, data should be zeros
assert pytest.approx(np.array(h.axes[0])) == np.array(h2.axes[0])
assert h2.storage_type is bh.storage.Double
assert np.asarray(h2) == pytest.approx(np.zeros_like(np.asarray(h2)))


@pytest.mark.parametrize(
Expand Down
Loading