Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shorten text repr for DataTree #10139

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions xarray/core/datatree_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def __init__(
style=None,
childiter: type = list,
maxlevel: int | None = None,
maxchildren: int | None = None,
):
"""
Render tree starting at `node`.
Expand All @@ -88,6 +89,10 @@ def __init__(
Iterables that change the order of children cannot be used
(e.g., `reversed`).
maxlevel: Limit rendering to this depth.
maxchildren: Limit number of children to roughly this number. In practice,
for an arbitrarily large DataTree the number of children returned
will be (maxchildren * maxchildren - 1) / 2. The last child is also
included.
:any:`RenderDataTree` is an iterator, returning a tuple with 3 items:
`pre`
tree prefix.
Expand Down Expand Up @@ -160,6 +165,14 @@ def __init__(
root
├── sub0
└── sub1

# `maxchildren` roughly limits the total number of children

>>> print(RenderDataTree(root, maxchildren=3).by_attr())
root
├── sub0
│ ├── sub0B
└── sub1
"""
if style is None:
style = ContStyle()
Expand All @@ -169,20 +182,36 @@ def __init__(
self.style = style
self.childiter = childiter
self.maxlevel = maxlevel
self.maxchildren = maxchildren

def __iter__(self) -> Iterator[Row]:
return self.__next(self.node, tuple())

def __next(
self, node: DataTree, continues: tuple[bool, ...], level: int = 0
self,
node: DataTree,
continues: tuple[bool, ...],
level: int = 0,
nchildren: int = 0,
) -> Iterator[Row]:
yield RenderDataTree.__item(node, continues, self.style)
children = node.children.values()
level += 1
if children and (self.maxlevel is None or level < self.maxlevel):
children = self.childiter(children)
for child, is_last in _is_last(children):
yield from self.__next(child, continues + (not is_last,), level=level)
nchildren += 1
if (
self.maxchildren is None
or nchildren < self.maxchildren
or (not any(continues) and is_last)
):
yield from self.__next(
child,
continues + (not is_last,),
level=level,
nchildren=nchildren,
)

@staticmethod
def __item(
Expand Down
14 changes: 12 additions & 2 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,14 +1137,24 @@ def _datatree_node_repr(node: DataTree, show_inherited: bool) -> str:

def datatree_repr(dt: DataTree) -> str:
"""A printable representation of the structure of this entire tree."""
renderer = RenderDataTree(dt)
max_rows = OPTIONS["display_max_rows"]

renderer = RenderDataTree(dt, maxchildren=max_rows)

name_info = "" if dt.name is None else f" {dt.name!r}"
header = f"<xarray.DataTree{name_info}>"

lines = [header]
show_inherited = True
for pre, fill, node in renderer:

rendered_items = list(renderer)
for i, (pre, fill, node) in enumerate(rendered_items):
if len(rendered_items) > max_rows:
if i == max_rows:
lines.append("...")
if i >= max_rows and i != (len(rendered_items) - 1):
continue

node_repr = _datatree_node_repr(node, show_inherited=show_inherited)
show_inherited = False # only show inherited coords on the root

Expand Down
31 changes: 18 additions & 13 deletions xarray/core/formatting_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
inline_variable_array_repr,
short_data_repr,
)
from xarray.core.options import _get_boolean_with_default
from xarray.core.options import OPTIONS, _get_boolean_with_default

STATIC_FILES = (
("xarray.static.html", "icons-svg-inline.html"),
Expand Down Expand Up @@ -192,16 +192,27 @@ def collapsible_section(


def _mapping_section(
mapping, name, details_func, max_items_collapse, expand_option_name, enabled=True
mapping,
name,
details_func,
max_items_collapse,
expand_option_name,
enabled=True,
max_items_truncate: int | None = None,
) -> str:
n_items = len(mapping)
expanded = _get_boolean_with_default(
expand_option_name, n_items < max_items_collapse
)
collapsed = not expanded
truncated = max_items_truncate is not None and n_items > max_items_truncate
inline_details = (
f"Only first {max_items_truncate} will show in dropdown" if truncated else ""
)

return collapsible_section(
name,
inline_details=inline_details,
details=details_func(mapping),
n_items=n_items,
enabled=enabled,
Expand Down Expand Up @@ -349,19 +360,12 @@ def dataset_repr(ds) -> str:

def summarize_datatree_children(children: Mapping[str, DataTree]) -> str:
N_CHILDREN = len(children) - 1

# Get result from datatree_node_repr and wrap it
lines_callback = lambda n, c, end: _wrap_datatree_repr(
datatree_node_repr(n, c), end=end
)
MAX_CHILDREN = OPTIONS["display_max_rows"]

children_html = "".join(
(
lines_callback(n, c, end=False) # Long lines
if i < N_CHILDREN
else lines_callback(n, c, end=True)
) # Short lines
_wrap_datatree_repr(datatree_node_repr(n, c), end=i == N_CHILDREN)
for i, (n, c) in enumerate(children.items())
if i < MAX_CHILDREN
)

return "".join(
Expand All @@ -378,6 +382,7 @@ def summarize_datatree_children(children: Mapping[str, DataTree]) -> str:
name="Groups",
details_func=summarize_datatree_children,
max_items_collapse=1,
max_items_truncate=OPTIONS["display_max_rows"],
expand_option_name="display_expand_groups",
)

Expand Down Expand Up @@ -422,7 +427,7 @@ def datatree_node_repr(group_title: str, node: DataTree, show_inherited=False) -
return _obj_repr(ds, header_components, sections)


def _wrap_datatree_repr(r: str, end: bool = False) -> str:
def _wrap_datatree_repr(r: str, end: bool = False, skipped_some: bool = False) -> str:
"""
Wrap HTML representation with a tee to the left of it.

Expand Down
86 changes: 86 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,92 @@ def test_repr_two_children(self) -> None:
).strip()
assert result == expected

def test_repr_truncates_nodes(self) -> None:
# construct a datatree with 50 nodes
number_of_files = 10
number_of_groups = 5
tree_dict = {}
for f in range(number_of_files):
for g in range(number_of_groups):
tree_dict[f"file_{f}/group_{g}"] = Dataset({"g": f * g})

tree = DataTree.from_dict(tree_dict)
result = repr(tree)
expected = dedent(
"""
<xarray.DataTree>
Group: /
├── Group: /file_0
│ ├── Group: /file_0/group_0
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ ├── Group: /file_0/group_1
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ ├── Group: /file_0/group_2
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ ├── Group: /file_0/group_3
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ └── Group: /file_0/group_4
│ Dimensions: ()
│ Data variables:
│ g int64 8B 0
├── Group: /file_1
│ ├── Group: /file_1/group_0
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ ├── Group: /file_1/group_1
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 1
│ ├── Group: /file_1/group_2
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 2
│ ├── Group: /file_1/group_3
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 3
...
└── Group: /file_9/group_4
Dimensions: ()
Data variables:
g int64 8B 36
"""
).strip()
assert expected == result

with xr.set_options(display_max_rows=4):
result = repr(tree)
expected = dedent(
"""
<xarray.DataTree>
Group: /
├── Group: /file_0
│ ├── Group: /file_0/group_0
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
│ ├── Group: /file_0/group_1
│ │ Dimensions: ()
│ │ Data variables:
│ │ g int64 8B 0
...
└── Group: /file_9/group_4
Dimensions: ()
Data variables:
g int64 8B 36
"""
).strip()
assert expected == result

def test_repr_inherited_dims(self) -> None:
tree = DataTree.from_dict(
{
Expand Down
38 changes: 38 additions & 0 deletions xarray/tests/test_formatting_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,44 @@ def test_two_children(
)


class TestDataTreeTruncatesNodes:
def test_many_nodes(self):
# construct a datatree with 500 nodes
number_of_files = 20
number_of_groups = 25
tree_dict = {}
for f in range(number_of_files):
for g in range(number_of_groups):
tree_dict[f"file_{f}/group_{g}"] = xr.Dataset({"g": f * g})

tree = xr.DataTree.from_dict(tree_dict)
with xr.set_options(display_style="html"):
result = tree._repr_html_()

assert "Only first 12 will show in dropdown"
assert "file_0" in result
assert "file_11" in result
assert "file_12" not in result
assert "file_19" not in result
assert "group_0" in result
assert "group_11" in result
assert "group_12" not in result
assert "group_24" not in result

with xr.set_options(display_style="html", display_max_rows=4):
result = tree._repr_html_()

assert "Only first 4 will show in dropdown"
assert "file_0" in result
assert "file_3" in result
assert "file_4" not in result
assert "file_19" not in result
assert "group_0" in result
assert "group_3" in result
assert "group_4" not in result
assert "group_24" not in result


class TestDataTreeInheritance:
def test_inherited_section_present(self) -> None:
dt = xr.DataTree.from_dict(
Expand Down
Loading