scikit-hep · maxymnaumchyk · Apr 15, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/src/coffea/nanoevents/factory.py b/src/coffea/nanoevents/factory.py
@@ -436,6 +436,7 @@ def from_root(
             persistent_cache,
             schemaclass,
             metadata,
+            tree,
             mode=mode,
         )
 
@@ -703,6 +704,7 @@ def _from_mapping(
         persistent_cache,
         schemaclass,
         metadata,
+        tree,
         mode,
     ):
         """Quickly build NanoEvents from a root file
@@ -726,6 +728,8 @@ def _from_mapping(
                 A schema class deriving from `BaseSchema` and implementing the desired view of the file
             metadata : dict
                 Arbitrary metadata to add to the `base.NanoEvents` object
+            tree : TTree
+                Pass TTree directly to schema
             mode:
                 Nanoevents will use "eager", "virtual", or "dask" as a backend.
 
@@ -736,7 +740,10 @@ def _from_mapping(
             base_form["parameters"]["metadata"] = metadata
         if not callable(schemaclass):
             raise ValueError("Invalid schemaclass type")
-        schema = schemaclass(base_form)
+        if schemaclass.__name__ == "NanoAODSchema":
+            schema = schemaclass(base_form, tree)
+        else:
+            schema = schemaclass(base_form)
         if not isinstance(schema, BaseSchema):
             raise RuntimeError("Invalid schema type")
         return cls(

diff --git a/src/coffea/nanoevents/schemas/base.py b/src/coffea/nanoevents/schemas/base.py
@@ -1,3 +1,6 @@
+import awkward
+import numpy
+
 from coffea.nanoevents import transforms
 from coffea.nanoevents.util import concat, quote
 
@@ -96,6 +99,178 @@ def nest_jagged_forms(parent, child, counts_name, name):
     parent["content"]["contents"].append(inner)
 
 
+# move to transforms?
+def local2globalindex(index, counts):
+    """
+    Convert a local index to a global index
+
+    This is the same as local2global(index, counts2offsets(counts))
+    where local2global and counts2offsets are as in coffea.nanoevents.transforms
+
+    TO DO: dask_awkward.map_partitions implementation
+    """
+    if awkward.backend(index) == "typetracer":
+        return index
+    offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64)
+    offsets[0] = 0
+    numpy.cumsum(counts, out=offsets[1:])
+    index = index.mask[index >= 0] + offsets[:-1]
+    index = index.mask[index < offsets[1:]]  # guard against out of bounds
+    # workaround ValueError: can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents
+    # index.type is N * var * int32?
+    index = awkward.fill_none(index, -1)
+    return index
+
+
+# move to transforms?
+def nestedindex_form(indices):
+    """
+    Concatenate a list of indices along a new axis
+    Outputs a jagged array with same outer shape as index arrays
+    Add examples to documentation?
+
+    """
+    if not all(
+        isinstance(index.layout, awkward.contents.listoffsetarray.ListOffsetArray)
+        for index in indices
+    ):
+        raise RuntimeError
+    # return awkward.concatenate([idx[:, None] for idx in indexers], axis=1)
+
+    # store offsets to later reapply them to the arrays
+    offsets_stored = indices[0].layout.offsets
+    # also store parameters
+    parameters = {}
+    for i, idx in enumerate(indices):
+        if "__doc__" in parameters:
+            parameters["__doc__"] += " and "
+            parameters["__doc__"] += awkward.parameters(idx)["__doc__"]
+        else:
+            parameters["__doc__"] = "nested from "
+            parameters["__doc__"] += awkward.parameters(idx)["__doc__"]
+        # flatten the index
+        indices[i] = awkward.Array(idx.layout.content)
+
+    n = len(indices)
+    out = numpy.empty(n * len(indices[0]), dtype="int64")
+    for i, idx in enumerate(indices):
+        #  index arrays should all be same shape flat arrays
+        out[i::n] = idx
+    offsets = numpy.arange(0, len(out) + 1, n, dtype=numpy.int64)
+    out = awkward.Array(
+        awkward.contents.ListOffsetArray(
+            awkward.index.Index64(offsets),
+            awkward.contents.NumpyArray(out),
+        )
+    )
+    # reapply the offsets
+    out = awkward.Array(
+        awkward.contents.ListOffsetArray(
+            offsets_stored,
+            out.layout,
+            parameters=parameters,
+        )
+    )
+    return out
+
+
+# move to transforms?
+def counts2nestedindex_form(local_counts, target_offsets):
+    """Turn jagged local counts into doubly-jagged global index into a target
+    Outputs a jagged array with same axis-0 shape as counts axis-1
+    """
+    if not isinstance(
+        local_counts.layout, awkward.contents.listoffsetarray.ListOffsetArray
+    ):
+        raise RuntimeError
+    if not isinstance(target_offsets.layout, awkward.contents.numpyarray.NumpyArray):
+        raise RuntimeError
+
+    # count offsets the same way as with counts2offsets in coffea.nanoevents.transforms
+    offsets = numpy.empty(len(target_offsets) + 1, dtype=numpy.int64)
+    offsets[0] = 0
+    numpy.cumsum(target_offsets, out=offsets[1:])
+
+    # store offsets to later reapply them to the arrays
+    offsets_stored = local_counts.layout.offsets
+
+    out = awkward.unflatten(
+        numpy.arange(offsets[-1], dtype=numpy.int64),
+        awkward.flatten(local_counts),
+    )
+    # reapply the offsets
+    out = awkward.Array(
+        awkward.contents.ListOffsetArray(
+            offsets_stored,
+            out.layout,
+        )
+    )
+    return out
+
+
+# move to transforms?
+def counts2offsets(counts):
+    # Cumulative sum of counts
+    offsets = numpy.empty(len(counts) + 1, dtype=numpy.int64)
+    offsets[0] = 0
+    numpy.cumsum(counts, out=offsets[1:])
+    return offsets
+
+
+# move to transforms?
+def check_equal_lengths(
+    contents: list[awkward.contents.Content],
+) -> int | awkward._nplikes.shape.UnknownLength:
+    length = contents[0].length
+    for layout in contents:
+        if layout.length != length:
+            raise ValueError("all arrays must have the same length")
+    return length
+
+
+def zip_depth2(content, offsets, with_name, behavior, parameters=None):
+    # if with_name is not None:
+    #     if parameters is None:
+    #         parameters = {}
+    #     else:
+    #         parameters = dict(parameters)
+    #     parameters["__record__"] = with_name
+
+    fields = list(content.keys())
+    contents = [
+        # take contents 2 layers deep
+        v.layout.content
+        for v in content.values()
+    ]
+    length = check_equal_lengths(contents)
+    out = awkward.contents.ListOffsetArray(
+        offsets=offsets,
+        content=awkward.contents.RecordArray(contents, fields, length=length),
+    )
+    out = awkward.Array(out, behavior=behavior, with_name=with_name)
+    return out
+
+
+def zip_depth1(content, with_name, behavior, parameters=None):
+    # if with_name is not None:
+    #     if parameters is None:
+    #         parameters = {}
+    #     else:
+    #         parameters = dict(parameters)
+    #     parameters["__record__"] = with_name
+
+    fields = list(content.keys())
+    contents = [
+        # take contents 1 layer deep
+        v.layout
+        for v in content.values()
+    ]
+    length = check_equal_lengths(contents)
+    out = awkward.contents.RecordArray(contents, fields, length)
+    out = awkward.Array(out, behavior=behavior, with_name=with_name)
+    return out
+
+
 class BaseSchema:
     """Base schema builder