diff --git a/hagelslag/data/ModelOutput.py b/hagelslag/data/ModelOutput.py index 19de456..7d83808 100644 --- a/hagelslag/data/ModelOutput.py +++ b/hagelslag/data/ModelOutput.py @@ -6,6 +6,7 @@ from .HRRRModelGrid import HRRRModelGrid from .HRRREModelGrid import HRRREModelGrid from .HREFv2ModelGrid import HREFv2ModelGrid +from .HRRRZarrModelGrid import HRRRZarrModelGrid from .NCARStormEventModelGrid import NCARStormEventModelGrid from hagelslag.util.make_proj_grids import make_proj_grids, read_arps_map_file, read_ncar_map_file, get_proj_obj from hagelslag.util.derived_vars import relative_humidity_pressure_level, melting_layer_height diff --git a/hagelslag/data/ZarrModelGrid.py b/hagelslag/data/ZarrModelGrid.py index d8f61e5..9ea770f 100644 --- a/hagelslag/data/ZarrModelGrid.py +++ b/hagelslag/data/ZarrModelGrid.py @@ -22,7 +22,6 @@ class ZarrModelGrid(object): freqency (str): spacing between model time steps. valid_dates: DatetimeIndex of all model timesteps forecast_hours: array of all hours in the forecast - file_objects (list): List of the file objects for each model time step """ def __init__(self, path, @@ -37,29 +36,31 @@ def __init__(self, self.start_date = pd.to_datetime(start_date) self.end_date = pd.to_datetime(end_date) self.frequency = frequency - self.valid_dates = date_range(start=self.start_date, - end=self.end_date, - freq=self.frequency) - print(self.run_date) - print(type(self.run_date)) + self.valid_dates = date_range(start=self.start_date, end=self.end_date, freq=self.frequency) self.forecast_hours = (self.valid_dates - self.run_date).astype("timedelta64[h]").astype(int) - def load_data(self): units = "" level = self.variable.split('-')[1] self.variable = self.variable.split('-')[0] fs = s3fs.S3FileSystem(anon=True) - files = [] run_date_str = self.run_date.strftime("%Y%m%d") - forecast_hour = self.run_date.strftime("%H") - path = join(self.path, run_date_str, f'{run_date_str}_{forecast_hour}z_fcst.zarr', level, self.variable, level) + run_hour = self.run_date.strftime("%H") + path = join(self.path, run_date_str, f'{run_date_str}_{run_hour}z_fcst.zarr', level, self.variable, level) f = s3fs.S3Map(root=path, s3=fs, check=False) - files.append(f) + ds = xr.open_mfdataset([f], engine='zarr', parallel=True).load() - ds = xr.open_mfdataset(files, engine='zarr').load() - array = ds[self.variable].values.astype('float32') + if self.run_date in self.valid_dates: + arr = ds[self.variable].values[self.forecast_hours[0]:self.forecast_hours[-1] + 1].astype('float32') + forecast_hour_00_path = join(self.path, run_date_str, f'{run_date_str}_{run_hour}z_anl.zarr', level, + self.variable.replace('1hr_', ''), level) + fh_0_file = s3fs.S3Map(root=forecast_hour_00_path, s3=fs, check=False) + fh_0_ds = xr.open_mfdataset([fh_0_file], engine='zarr', parallel=True).expand_dims('time') + fh_0_arr = fh_0_ds[self.variable.replace('1hr_', '')].values + array = np.concatenate([fh_0_arr, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] + else: + array = ds[self.variable].values[self.forecast_hours[0] - 1:self.forecast_hours[-1]].astype('float32') if hasattr(ds[self.variable], 'units'): units = ds[self.variable].attrs['units']