55
66from yt .data_objects .index_subobjects .grid_patch import AMRGridPatch
77from yt .data_objects .static_output import Dataset
8- from yt .funcs import setdefaultattr
8+ from yt .funcs import get_pbar , setdefaultattr
99from yt .geometry .api import Geometry
1010from yt .geometry .grid_geometry_handler import GridIndex
1111from yt .utilities .on_demand_imports import _h5py as h5py
1212
1313from .fields import ChollaFieldInfo
1414
1515
16+ def _split_fname_proc_suffix (filename : str ):
17+ """Splits ``filename`` at the '.' separating the beginning part of the
18+ string from the process-id suffix, and returns both parts in a 2-tuple.
19+
20+ When cholla is compiled with MPI and it directly writes data-files, each
21+ process appends a suffix to each filename that denotes the process-id. For
22+ example, the MPI-compiled version might write '0.h5.0'. If that function is
23+ passed such a string, then it returns ``('0.h5', '0')``.
24+
25+ In cases where there is no suffix, the output is ``(filename, '')``. This
26+ might come up if the user concatenated the output files, which is common
27+ practice.
28+ """
29+
30+ # at this time, we expect the suffix to be the minimum number of characters
31+ # that are necessary to represent the process id. For flexibility, we will
32+ # allow extra zero-padding
33+
34+ sep_i = filename .rfind ("." )
35+ suf_len = len (filename ) - (sep_i + 1 )
36+ if (sep_i == - 1 ) or (suf_len == 0 ) or not filename [sep_i + 1 :].isdecimal ():
37+ return (filename , "" )
38+ elif (sep_i == 0 ) or ((sep_i - 1 ) == filename .rfind ("/" )):
39+ raise ValueError (
40+ f"can't split a process-suffix off of { filename !r} "
41+ "since the remaining filename would be empty"
42+ )
43+ else :
44+ return (filename [:sep_i ], filename [sep_i + 1 :])
45+
46+
1647class ChollaGrid (AMRGridPatch ):
1748 _id_offset = 0
1849
19- def __init__ (self , id , index , level , dims ):
20- super ().__init__ (id , filename = index . index_filename , index = index )
50+ def __init__ (self , id , index , level , dims , filename ):
51+ super ().__init__ (id , filename = filename , index = index )
2152 self .Parent = None
2253 self .Children = []
2354 self .Level = level
@@ -42,23 +73,92 @@ def _detect_output_fields(self):
4273 self .field_list = [("cholla" , k ) for k in h5f .keys ()]
4374
4475 def _count_grids (self ):
45- self .num_grids = 1
76+ # the number of grids is equal to the number of processes, unless the
77+ # dataset has been concatenated. But, when the dataset is concatenated
78+ # (a common post-processing step), the "nprocs" hdf5 attribute is
79+ # usually dropped.
80+
81+ with h5py .File (self .index_filename , mode = "r" ) as h5f :
82+ nprocs = h5f .attrs .get ("nprocs" , np .array ([1 , 1 , 1 ]))[:].astype ("=i8" )
83+ self .num_grids = np .prod (nprocs )
84+
85+ if self .num_grids > 1 :
86+ # When there's more than 1 grid, we expect the user to
87+ # - have not changed the names of the output files
88+ # - have passed the file written by process 0 to ``yt.load``
89+ # Let's perform a sanity-check that self.index_filename has the
90+ # expected suffix for a file written by mpi-process 0
91+ if int (_split_fname_proc_suffix (self .index_filename )[1 ]) != 0 :
92+ raise ValueError (
93+ "the primary file associated with a "
94+ "distributed cholla dataset must end in '.0'"
95+ )
4696
4797 def _parse_index (self ):
48- self .grid_left_edge [0 ][:] = self .ds .domain_left_edge [:]
49- self .grid_right_edge [0 ][:] = self .ds .domain_right_edge [:]
50- self .grid_dimensions [0 ][:] = self .ds .domain_dimensions [:]
51- self .grid_particle_count [0 ][0 ] = 0
52- self .grid_levels [0 ][0 ] = 0
98+ self .grids = np .empty (self .num_grids , dtype = "object" )
99+
100+ # construct an iterable over the pairs of grid-index and corresponding
101+ # filename
102+ if self .num_grids == 1 :
103+ ind_fname_pairs = [(0 , self .index_filename )]
104+ else :
105+ # index_fname should has the form f'{self.directory}/<prefix>.0'
106+ # strip off the '.0' and determine the contents of <prefix>
107+ pref , suf = _split_fname_proc_suffix (self .index_filename )
108+ assert int (suf ) == 0 # sanity check!
109+
110+ ind_fname_pairs = ((i , f"{ pref } .{ i } " ) for i in range (self .num_grids ))
111+
112+ dims_global = self .ds .domain_dimensions [:]
113+ pbar = get_pbar ("Parsing Hierarchy" , self .num_grids )
114+
115+ # It would be nice if we could avoid reading in every hdf5 file during
116+ # this step... (to do this, Cholla could probably encode how the blocks
117+ # are sorted in an hdf5 attribute)
118+
119+ for i , fname in ind_fname_pairs :
120+ if self .num_grids == 1 :
121+ # if the file was concatenated, we might be missing attributes
122+ # that are accessed in the other branch. To avoid issues, we use
123+ # hardcoded values
124+ left_frac , right_frac , dims_local = 0.0 , 1.0 , dims_global
125+ else :
126+ with h5py .File (fname , "r" ) as f :
127+ offset = f .attrs ["offset" ][:].astype ("=i8" )
128+ dims_local = f .attrs ["dims_local" ][:].astype ("=i8" )
129+ left_frac = offset / dims_global
130+ right_frac = (offset + dims_local ) / dims_global
131+
132+ level = 0
133+
134+ self .grids [i ] = self .grid (
135+ i ,
136+ index = self ,
137+ level = level ,
138+ dims = dims_local ,
139+ filename = fname ,
140+ )
141+
142+ self .grid_left_edge [i ] = left_frac
143+ self .grid_right_edge [i ] = right_frac
144+ self .grid_dimensions [i ] = dims_local
145+ self .grid_levels [i , 0 ] = level
146+ self .grid_particle_count [i , 0 ] = 0
147+
148+ pbar .update (i + 1 )
149+ pbar .finish ()
150+
151+ slope = self .ds .domain_width / self .ds .arr (np .ones (3 ), "code_length" )
152+ self .grid_left_edge = self .grid_left_edge * slope + self .ds .domain_left_edge
153+ self .grid_right_edge = self .grid_right_edge * slope + self .ds .domain_left_edge
154+
53155 self .max_level = 0
54156
55157 def _populate_grid_objects (self ):
56- self .grids = np .empty (self .num_grids , dtype = "object" )
57158 for i in range (self .num_grids ):
58- g = self .grid ( i , self , self . grid_levels . flat [i ], self . grid_dimensions [ i ])
159+ g = self .grids [i ]
59160 g ._prepare_grid ()
60161 g ._setup_dx ()
61- self .grids [i ] = g
62162
63163
64164class ChollaDataset (Dataset ):
@@ -103,9 +203,11 @@ def _parse_parameter_file(self):
103203 attrs = h5f .attrs
104204 self .parameters = dict (attrs .items ())
105205 self .domain_left_edge = attrs ["bounds" ][:].astype ("=f8" )
106- self .domain_right_edge = attrs ["domain" ][:].astype ("=f8" )
206+ self .domain_right_edge = self .domain_left_edge + attrs ["domain" ][:].astype (
207+ "=f8"
208+ )
107209 self .dimensionality = len (attrs ["dims" ][:])
108- self .domain_dimensions = attrs ["dims" ][:].astype ("=f8 " )
210+ self .domain_dimensions = attrs ["dims" ][:].astype ("=i8 " )
109211 self .current_time = attrs ["t" ][:]
110212 self ._periodicity = tuple (attrs .get ("periodicity" , (False , False , False )))
111213 self .gamma = attrs .get ("gamma" , 5.0 / 3.0 )
0 commit comments