@@ -133,6 +133,55 @@ def _arrays(
133133 )[entry_start :entry_stop ]
134134
135135
136+ def _num_entries_for (in_ntuple , target_num_bytes , filter_name ):
137+ # TODO: part of this is also done in _arrays, so we should refactor this
138+ # TODO: there might be a better way to estimate the number of entries
139+ entry_stop = in_ntuple .ntuple .num_entries
140+
141+ clusters = in_ntuple .ntuple .cluster_summaries
142+ cluster_starts = numpy .array ([c .num_first_entry for c in clusters ])
143+
144+ start_cluster_idx = numpy .searchsorted (cluster_starts , 0 , side = "right" ) - 1
145+ stop_cluster_idx = numpy .searchsorted (cluster_starts , entry_stop , side = "right" )
146+
147+ form = in_ntuple .to_akform ().select_columns (
148+ filter_name , prune_unions_and_records = False
149+ )
150+ target_cols = []
151+ _recursive_find (form , target_cols )
152+
153+ total_bytes = 0
154+ for key in target_cols :
155+ if "column" in key and "union" not in key :
156+ key_nr = int (key .split ("-" )[1 ])
157+ for cluster in range (start_cluster_idx , stop_cluster_idx ):
158+ pages = in_ntuple .ntuple .page_list_envelopes .pagelinklist [cluster ][
159+ key_nr
160+ ]
161+ total_bytes += sum (page .locator .num_bytes for page in pages )
162+
163+ total_entries = entry_stop
164+ if total_bytes == 0 :
165+ num_entries = 0
166+ else :
167+ num_entries = int (round (target_num_bytes * total_entries / total_bytes ))
168+ if num_entries <= 0 :
169+ return 1
170+ else :
171+ return num_entries
172+
173+
174+ def _regularize_step_size (in_ntuple , step_size , filter_name ):
175+ if uproot ._util .isint (step_size ):
176+ return step_size
177+ target_num_bytes = uproot ._util .memory_size (
178+ step_size ,
179+ "number of entries or memory size string with units "
180+ f"(such as '100 MB') required, not { step_size !r} " ,
181+ )
182+ return _num_entries_for (in_ntuple , target_num_bytes , filter_name )
183+
184+
136185class Model_ROOT_3a3a_Experimental_3a3a_RNTuple (uproot .model .Model ):
137186 """
138187 A versionless :doc:`uproot.model.Model` for ``ROOT::Experimental::RNTuple``.
@@ -742,6 +791,13 @@ def arrays(
742791 array_cache = array_cache ,
743792 )
744793
794+ def iterate (self , filter_name = "*" , * args , step_size = "100 MB" , ** kwargs ):
795+ step_size = _regularize_step_size (self , step_size , filter_name )
796+ for start in range (0 , self .num_entries , step_size ):
797+ yield self .arrays (
798+ * args , entry_start = start , entry_stop = start + step_size , ** kwargs
799+ )
800+
745801
746802# Supporting function and classes
747803def _split_switch_bits (content ):
@@ -1215,6 +1271,13 @@ def __array__(self, *args, **kwargs):
12151271 else :
12161272 return numpy .array (out , * args , ** kwargs )
12171273
1274+ def iterate (self , filter_name = "*" , * args , step_size = "100 MB" , ** kwargs ):
1275+ step_size = _regularize_step_size (self , step_size , filter_name )
1276+ for start in range (0 , self .ntuple .num_entries , step_size ):
1277+ yield self .array (
1278+ * args , entry_start = start , entry_stop = start + step_size , ** kwargs
1279+ )
1280+
12181281
12191282uproot .classes ["ROOT::Experimental::RNTuple" ] = (
12201283 Model_ROOT_3a3a_Experimental_3a3a_RNTuple
0 commit comments