Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/coffea/nanoevents/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def from_root(
persistent_cache,
schemaclass,
metadata,
tree,
mode=mode,
)

Expand Down Expand Up @@ -703,6 +704,7 @@ def _from_mapping(
persistent_cache,
schemaclass,
metadata,
tree,
mode,
):
"""Quickly build NanoEvents from a root file
Expand All @@ -726,6 +728,8 @@ def _from_mapping(
A schema class deriving from `BaseSchema` and implementing the desired view of the file
metadata : dict
Arbitrary metadata to add to the `base.NanoEvents` object
tree : TTree
Pass TTree directly to schema
mode:
Nanoevents will use "eager", "virtual", or "dask" as a backend.

Expand All @@ -736,7 +740,10 @@ def _from_mapping(
base_form["parameters"]["metadata"] = metadata
if not callable(schemaclass):
raise ValueError("Invalid schemaclass type")
schema = schemaclass(base_form)
if schemaclass.__name__ == "NanoAODSchema":
schema = schemaclass(base_form, tree)
else:
schema = schemaclass(base_form)
if not isinstance(schema, BaseSchema):
raise RuntimeError("Invalid schema type")
return cls(
Expand Down
175 changes: 175 additions & 0 deletions src/coffea/nanoevents/schemas/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import awkward
import numpy

from coffea.nanoevents import transforms
from coffea.nanoevents.util import concat, quote

Expand Down Expand Up @@ -96,6 +99,178 @@ def nest_jagged_forms(parent, child, counts_name, name):
parent["content"]["contents"].append(inner)


# move to transforms?
def local2globalindex(index, counts):
"""
Convert a local index to a global index

This is the same as local2global(index, counts2offsets(counts))
where local2global and counts2offsets are as in coffea.nanoevents.transforms

TO DO: dask_awkward.map_partitions implementation
"""
if awkward.backend(index) == "typetracer":
return index
offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64)
offsets[0] = 0
numpy.cumsum(counts, out=offsets[1:])
index = index.mask[index >= 0] + offsets[:-1]
index = index.mask[index < offsets[1:]] # guard against out of bounds
# workaround ValueError: can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents
# index.type is N * var * int32?
index = awkward.fill_none(index, -1)
return index


# move to transforms?
def nestedindex_form(indices):
"""
Concatenate a list of indices along a new axis
Outputs a jagged array with same outer shape as index arrays
Add examples to documentation?

"""
if not all(
isinstance(index.layout, awkward.contents.listoffsetarray.ListOffsetArray)
for index in indices
):
raise RuntimeError
# return awkward.concatenate([idx[:, None] for idx in indexers], axis=1)

# store offsets to later reapply them to the arrays
offsets_stored = indices[0].layout.offsets
# also store parameters
parameters = {}
for i, idx in enumerate(indices):
if "__doc__" in parameters:
parameters["__doc__"] += " and "
parameters["__doc__"] += awkward.parameters(idx)["__doc__"]
else:
parameters["__doc__"] = "nested from "
parameters["__doc__"] += awkward.parameters(idx)["__doc__"]
# flatten the index
indices[i] = awkward.Array(idx.layout.content)

n = len(indices)
out = numpy.empty(n * len(indices[0]), dtype="int64")
for i, idx in enumerate(indices):
# index arrays should all be same shape flat arrays
out[i::n] = idx
offsets = numpy.arange(0, len(out) + 1, n, dtype=numpy.int64)
out = awkward.Array(
awkward.contents.ListOffsetArray(
awkward.index.Index64(offsets),
awkward.contents.NumpyArray(out),
)
)
# reapply the offsets
out = awkward.Array(
awkward.contents.ListOffsetArray(
offsets_stored,
out.layout,
parameters=parameters,
)
)
return out


# move to transforms?
def counts2nestedindex_form(local_counts, target_offsets):
"""Turn jagged local counts into doubly-jagged global index into a target
Outputs a jagged array with same axis-0 shape as counts axis-1
"""
if not isinstance(
local_counts.layout, awkward.contents.listoffsetarray.ListOffsetArray
):
raise RuntimeError
if not isinstance(target_offsets.layout, awkward.contents.numpyarray.NumpyArray):
raise RuntimeError

# count offsets the same way as with counts2offsets in coffea.nanoevents.transforms
offsets = numpy.empty(len(target_offsets) + 1, dtype=numpy.int64)
offsets[0] = 0
numpy.cumsum(target_offsets, out=offsets[1:])

# store offsets to later reapply them to the arrays
offsets_stored = local_counts.layout.offsets

out = awkward.unflatten(
numpy.arange(offsets[-1], dtype=numpy.int64),
awkward.flatten(local_counts),
)
# reapply the offsets
out = awkward.Array(
awkward.contents.ListOffsetArray(
offsets_stored,
out.layout,
)
)
return out


# move to transforms?
def counts2offsets(counts):
# Cumulative sum of counts
offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64)
offsets[0] = 0
numpy.cumsum(counts, out=offsets[1:])
return offsets


# move to transforms?
def check_equal_lengths(
contents: list[awkward.contents.Content],
) -> int | awkward._nplikes.shape.UnknownLength:
length = contents[0].length
for layout in contents:
if layout.length != length:
raise ValueError("all arrays must have the same length")
return length


def zip_depth2(content, offsets, with_name, behavior, parameters=None):
# if with_name is not None:
# if parameters is None:
# parameters = {}
# else:
# parameters = dict(parameters)
# parameters["__record__"] = with_name

fields = list(content.keys())
contents = [
# take contents 2 layers deep
v.layout.content
for v in content.values()
]
length = check_equal_lengths(contents)
out = awkward.contents.ListOffsetArray(
offsets=offsets,
content=awkward.contents.RecordArray(contents, fields, length=length),
)
out = awkward.Array(out, behavior=behavior, with_name=with_name)
return out


def zip_depth1(content, with_name, behavior, parameters=None):
# if with_name is not None:
# if parameters is None:
# parameters = {}
# else:
# parameters = dict(parameters)
# parameters["__record__"] = with_name

fields = list(content.keys())
contents = [
# take contents 1 layer deep
v.layout
for v in content.values()
]
length = check_equal_lengths(contents)
out = awkward.contents.RecordArray(contents, fields, length)
out = awkward.Array(out, behavior=behavior, with_name=with_name)
return out


class BaseSchema:
"""Base schema builder

Expand Down
Loading
Loading