Skip to content

Commit 7b39448

Browse files
nikoladzepre-commit-ci[bot]jpivarski
authored
feat: Add known forth for ATLAS (#1282)
* add test for known forth of 2D vector ElementLinks * add known forth for 2D vector ElementLinks * style: pre-commit fixes * rename test file * update form instead of replacing (fixes tests for assumed forms) * add treatment for branch=None in known_forth discovery * switch to hardcoded dict of typenames * delay awkward import * make PrimaryVerticesAuxDyn.neutralParticleLinks work * make if-statement for known_forth and awkward_form more clear Co-authored-by: Jim Pivarski <[email protected]> * add xAOD::MuonSegment_v1 to dict of known_forth with VectorVectorElementLink and sort keys * always convert self._form from dict in AsObjects.awkward_form * treat more cases for typename * flatten directory structure and add docstring for VectorVectorElementLink --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski <[email protected]>
1 parent 95b998b commit 7b39448

File tree

4 files changed

+208
-2
lines changed

4 files changed

+208
-2
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
2+
3+
"""
4+
This module provides known forth code and awkward forms for types where it is known a priori.
5+
6+
See :doc:`uproot.interpretation.known_forth.known_forth_of` for the function
7+
that provides the lookup of known forth codes and :doc:`uproot.interpretation.known_forth.atlas.VectorVectorElementLink` for an
8+
implementation used in ATLAS (D)AODs.
9+
"""
10+
from __future__ import annotations
11+
12+
import uproot
13+
from uproot.interpretation.known_forth.atlas import VectorVectorElementLink
14+
15+
KNOWN_FORTH_DICT = {
16+
"std::vector<std::vector<ElementLink<DataVector<xAOD::CaloCluster_v1>>>>": VectorVectorElementLink,
17+
"std::vector<std::vector<ElementLink<DataVector<xAOD::IParticle>>>>": VectorVectorElementLink,
18+
"std::vector<std::vector<ElementLink<DataVector<xAOD::MuonSegment_v1>>>>": VectorVectorElementLink,
19+
"std::vector<std::vector<ElementLink<DataVector<xAOD::NeutralParticle_v1>>>>": VectorVectorElementLink,
20+
"std::vector<std::vector<ElementLink<DataVector<xAOD::TauTrack_v1>>>>": VectorVectorElementLink,
21+
"std::vector<std::vector<ElementLink<DataVector<xAOD::TrackParticle_v1>>>>": VectorVectorElementLink,
22+
"std::vector<std::vector<ElementLink<DataVector<xAOD::TruthParticle_v1>>>>": VectorVectorElementLink,
23+
"std::vector<std::vector<ElementLink<DataVector<xAOD::Vertex_v1>>>>": VectorVectorElementLink,
24+
}
25+
26+
27+
def known_forth_of(model):
28+
"""
29+
Args:
30+
model: The :doc:`uproot.model.Model` to look up known forth for
31+
32+
Returns an object with attributes `forth_code` and `awkward_form` if a known
33+
special case exists, else None
34+
"""
35+
try:
36+
typename = model.typename
37+
except AttributeError:
38+
try:
39+
typename = model.classname
40+
except AttributeError:
41+
typename = uproot.model.classname_decode(model.__name__)
42+
43+
if typename not in KNOWN_FORTH_DICT:
44+
return
45+
46+
return KNOWN_FORTH_DICT[typename](typename)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
2+
3+
"""
4+
This module defines ATLAS specific known forth code
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import re
10+
11+
12+
class VectorVectorElementLink:
13+
"""
14+
Known forth and awkward form for ``std::vector<std::vector<ElementLink<T>>`` types in ATLAS (D)AODs
15+
16+
The forth code was adjusted from what was provided in
17+
``branch._complete_forth_code`` after running ``.array()`` once.
18+
19+
The binary data of one vector<vector<ElementLink<T>> looks as follows:
20+
21+
* 6 bytes header for the outer vector
22+
* 4 bytes big endian uint for the size of the outer vector (node1)
23+
* for each outer vector element:
24+
* 4 bytes big endian uint for the size of the inner vector (node2)
25+
* for each inner vector element:
26+
* 20 bytes header for the ElementLink object
27+
* 4 bytes big endian uint for the ``m_persKey`` member (node3)
28+
* 4 bytes big endian uint for the ``m_persIndex`` member (node4)
29+
"""
30+
31+
forth_code = """
32+
input stream
33+
input byteoffsets
34+
input bytestops
35+
output node1-offsets int64
36+
output node2-offsets int64
37+
output node3-data uint32
38+
output node4-data uint32
39+
40+
0 node1-offsets <- stack
41+
0 node2-offsets <- stack
42+
43+
0 do
44+
byteoffsets I-> stack
45+
stream seek
46+
6 stream skip
47+
stream !I-> stack
48+
dup node1-offsets +<- stack
49+
0 do
50+
stream !I-> stack
51+
dup node2-offsets +<- stack
52+
0 do
53+
20 stream skip
54+
stream !I-> node3-data
55+
stream !I-> node4-data
56+
loop
57+
loop
58+
loop
59+
"""
60+
61+
def __init__(self, typename):
62+
self.typename = typename
63+
self.inner_typename = re.sub(
64+
"std::vector<std::vector<(.*)>>", r"\1", self.typename
65+
)
66+
67+
@property
68+
def awkward_form(self):
69+
return {
70+
"class": "ListOffsetArray",
71+
"offsets": "i64",
72+
"form_key": "node1",
73+
"content": {
74+
"class": "ListOffsetArray",
75+
"offsets": "i64",
76+
"form_key": "node2",
77+
"content": {
78+
"class": "RecordArray",
79+
"fields": ["m_persKey", "m_persIndex"],
80+
"contents": [
81+
{
82+
"class": "NumpyArray",
83+
"primitive": "uint32",
84+
"inner_shape": [],
85+
"parameters": {},
86+
"form_key": "node3",
87+
},
88+
{
89+
"class": "NumpyArray",
90+
"primitive": "uint32",
91+
"inner_shape": [],
92+
"parameters": {},
93+
"form_key": "node4",
94+
},
95+
],
96+
"parameters": {"__record__": f"{self.inner_typename}"},
97+
},
98+
},
99+
}

src/uproot/interpretation/objects.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import uproot
3030
import uproot._awkwardforth
31+
from uproot.interpretation.known_forth import known_forth_of
3132

3233

3334
class AsObjects(uproot.interpretation.Interpretation):
@@ -45,14 +46,22 @@ class AsObjects(uproot.interpretation.Interpretation):
4546
:ref:`uproot.interpretation.objects.AsObjects.simplify` attempts to
4647
replace this interpretation with a faster-to-read equivalent, but not all
4748
data types can be simplified.
49+
50+
:doc:`uproot.interpretation.known_forth` defines forth code and forms for
51+
special cases that will be picked up here as well
4852
"""
4953

5054
def __init__(self, model, branch=None):
5155
self._model = model
5256
self._branch = branch
53-
self._form = None
5457
self._forth = True
55-
self._complete_forth_code = None
58+
known_forth = known_forth_of(self._model)
59+
if known_forth is not None:
60+
self._complete_forth_code = known_forth.forth_code
61+
self._form = known_forth.awkward_form
62+
else:
63+
self._complete_forth_code = None
64+
self._form = None
5665
self._forth_lock = threading.Lock()
5766

5867
@property
@@ -122,6 +131,10 @@ def awkward_form(
122131
tobject_header=False,
123132
breadcrumbs=(),
124133
):
134+
if self._form is not None:
135+
awkward = uproot.extras.awkward()
136+
return awkward.forms.from_dict(self._form)
137+
125138
context = self._make_context(
126139
context, index_format, header, tobject_header, breadcrumbs
127140
)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env python3
2+
3+
import awkward
4+
import pytest
5+
import skhep_testdata
6+
import uproot
7+
8+
VECTOR_VECTOR_ELEMENTLINK_BRANCHES = [
9+
"AnalysisHLT_e12_lhloose_nod0_2mu10AuxDyn.TrigMatchedObjects",
10+
"AnalysisElectronsAuxDyn.caloClusterLinks",
11+
"AnalysisPhotonsAuxDyn.vertexLinks",
12+
"TruthMuonsAuxDyn.childLinks",
13+
"AnalysisElectronsAuxDyn.trackParticleLinks",
14+
"PrimaryVerticesAuxDyn.neutralParticleLinks",
15+
"AnalysisTauJetsAuxDyn.tauTrackLinks",
16+
]
17+
18+
19+
@pytest.mark.parametrize("key", VECTOR_VECTOR_ELEMENTLINK_BRANCHES)
20+
def test_pickup_vector_vector_elementlink(key):
21+
with uproot.open(
22+
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
23+
) as tree:
24+
branch = tree[key]
25+
assert branch.interpretation._complete_forth_code is not None
26+
assert branch.interpretation._form is not None
27+
28+
29+
def test_consistent_library_np_vector_vector_elementlink():
30+
arrays_np = {}
31+
with uproot.open(
32+
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
33+
) as tree:
34+
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES:
35+
arrays_np[key] = tree[key].array(library="np")
36+
arrays_ak = {}
37+
with uproot.open(
38+
{skhep_testdata.data_path("uproot-issue-123a.root"): "CollectionTree"}
39+
) as tree:
40+
for key in VECTOR_VECTOR_ELEMENTLINK_BRANCHES:
41+
arrays_ak[key] = tree[key].array()
42+
for key in arrays_np:
43+
array_ak = arrays_ak[key]
44+
array_np = uproot.interpretation.library._object_to_awkward_array(
45+
awkward, array_ak.layout.form.to_dict(), arrays_np[key]
46+
)
47+
for field in array_ak.fields:
48+
assert awkward.all(array_np[field] == array_ak[field])

0 commit comments

Comments
 (0)