From 220bda80195977178bd5f8d24782c4ad74d9610c Mon Sep 17 00:00:00 2001 From: Charlie Becker Date: Mon, 13 Sep 2021 16:06:05 -0600 Subject: [PATCH 1/4] Add missing import --- hagelslag/data/ModelOutput.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hagelslag/data/ModelOutput.py b/hagelslag/data/ModelOutput.py index 19de456..7d83808 100644 --- a/hagelslag/data/ModelOutput.py +++ b/hagelslag/data/ModelOutput.py @@ -6,6 +6,7 @@ from .HRRRModelGrid import HRRRModelGrid from .HRRREModelGrid import HRRREModelGrid from .HREFv2ModelGrid import HREFv2ModelGrid +from .HRRRZarrModelGrid import HRRRZarrModelGrid from .NCARStormEventModelGrid import NCARStormEventModelGrid from hagelslag.util.make_proj_grids import make_proj_grids, read_arps_map_file, read_ncar_map_file, get_proj_obj from hagelslag.util.derived_vars import relative_humidity_pressure_level, melting_layer_height From 8f8b82810b6415014c82d3cfe36ab7969458dc1c Mon Sep 17 00:00:00 2001 From: Charlie Becker Date: Mon, 13 Sep 2021 16:06:56 -0600 Subject: [PATCH 2/4] Correct bug for missing forecast hour 00 --- hagelslag/data/ZarrModelGrid.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hagelslag/data/ZarrModelGrid.py b/hagelslag/data/ZarrModelGrid.py index d8f61e5..8af6a59 100644 --- a/hagelslag/data/ZarrModelGrid.py +++ b/hagelslag/data/ZarrModelGrid.py @@ -58,8 +58,10 @@ def load_data(self): f = s3fs.S3Map(root=path, s3=fs, check=False) files.append(f) - ds = xr.open_mfdataset(files, engine='zarr').load() - array = ds[self.variable].values.astype('float32') + ds = xr.open_mfdataset(files, engine='zarr', parallel=True).load() + arr = ds[self.variable].values.astype('float32') + dummy_forecast_hour_00 = np.zeros((1, arr.shape[1], arr.shape[2])) + array = np.concatenate([dummy_forecast_hour_00, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] if hasattr(ds[self.variable], 'units'): units = ds[self.variable].attrs['units'] From c390c93b4fa38a8242ff83e4734387b7a976bdb2 Mon Sep 17 00:00:00 2001 From: Charlie Becker Date: Tue, 14 Sep 2021 12:18:19 -0600 Subject: [PATCH 3/4] Change to only load forecast hour 00 when needed. --- hagelslag/data/ZarrModelGrid.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hagelslag/data/ZarrModelGrid.py b/hagelslag/data/ZarrModelGrid.py index 8af6a59..ea4444e 100644 --- a/hagelslag/data/ZarrModelGrid.py +++ b/hagelslag/data/ZarrModelGrid.py @@ -41,10 +41,8 @@ def __init__(self, end=self.end_date, freq=self.frequency) print(self.run_date) - print(type(self.run_date)) self.forecast_hours = (self.valid_dates - self.run_date).astype("timedelta64[h]").astype(int) - def load_data(self): units = "" @@ -57,11 +55,15 @@ def load_data(self): path = join(self.path, run_date_str, f'{run_date_str}_{forecast_hour}z_fcst.zarr', level, self.variable, level) f = s3fs.S3Map(root=path, s3=fs, check=False) files.append(f) - ds = xr.open_mfdataset(files, engine='zarr', parallel=True).load() - arr = ds[self.variable].values.astype('float32') - dummy_forecast_hour_00 = np.zeros((1, arr.shape[1], arr.shape[2])) - array = np.concatenate([dummy_forecast_hour_00, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] + + + if self.run_date not in self.valid_dates: + array = ds[self.variable].values[self.forecast_hours[0] - 1:self.forecast_hours[-1]].astype('float32') + elif self.run_date in self.valid_dates: + arr = ds[self.variable].values[self.forecast_hours[0]:self.forecast_hours[-1] + 1].astype('float32') + dummy_forecast_hour_00 = np.zeros((1, arr.shape[1], arr.shape[2])) + array = np.concatenate([dummy_forecast_hour_00, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] if hasattr(ds[self.variable], 'units'): units = ds[self.variable].attrs['units'] From eeca18f873a45f404076c4ed2667768e71121959 Mon Sep 17 00:00:00 2001 From: Charlie Becker Date: Tue, 14 Sep 2021 23:16:34 -0600 Subject: [PATCH 4/4] Replace dummy forecast hour with HRRR analysis and account for variable name differences --- hagelslag/data/ZarrModelGrid.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/hagelslag/data/ZarrModelGrid.py b/hagelslag/data/ZarrModelGrid.py index ea4444e..9ea770f 100644 --- a/hagelslag/data/ZarrModelGrid.py +++ b/hagelslag/data/ZarrModelGrid.py @@ -22,7 +22,6 @@ class ZarrModelGrid(object): freqency (str): spacing between model time steps. valid_dates: DatetimeIndex of all model timesteps forecast_hours: array of all hours in the forecast - file_objects (list): List of the file objects for each model time step """ def __init__(self, path, @@ -37,10 +36,7 @@ def __init__(self, self.start_date = pd.to_datetime(start_date) self.end_date = pd.to_datetime(end_date) self.frequency = frequency - self.valid_dates = date_range(start=self.start_date, - end=self.end_date, - freq=self.frequency) - print(self.run_date) + self.valid_dates = date_range(start=self.start_date, end=self.end_date, freq=self.frequency) self.forecast_hours = (self.valid_dates - self.run_date).astype("timedelta64[h]").astype(int) def load_data(self): @@ -49,21 +45,22 @@ def load_data(self): level = self.variable.split('-')[1] self.variable = self.variable.split('-')[0] fs = s3fs.S3FileSystem(anon=True) - files = [] run_date_str = self.run_date.strftime("%Y%m%d") - forecast_hour = self.run_date.strftime("%H") - path = join(self.path, run_date_str, f'{run_date_str}_{forecast_hour}z_fcst.zarr', level, self.variable, level) + run_hour = self.run_date.strftime("%H") + path = join(self.path, run_date_str, f'{run_date_str}_{run_hour}z_fcst.zarr', level, self.variable, level) f = s3fs.S3Map(root=path, s3=fs, check=False) - files.append(f) - ds = xr.open_mfdataset(files, engine='zarr', parallel=True).load() + ds = xr.open_mfdataset([f], engine='zarr', parallel=True).load() - - if self.run_date not in self.valid_dates: - array = ds[self.variable].values[self.forecast_hours[0] - 1:self.forecast_hours[-1]].astype('float32') - elif self.run_date in self.valid_dates: + if self.run_date in self.valid_dates: arr = ds[self.variable].values[self.forecast_hours[0]:self.forecast_hours[-1] + 1].astype('float32') - dummy_forecast_hour_00 = np.zeros((1, arr.shape[1], arr.shape[2])) - array = np.concatenate([dummy_forecast_hour_00, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] + forecast_hour_00_path = join(self.path, run_date_str, f'{run_date_str}_{run_hour}z_anl.zarr', level, + self.variable.replace('1hr_', ''), level) + fh_0_file = s3fs.S3Map(root=forecast_hour_00_path, s3=fs, check=False) + fh_0_ds = xr.open_mfdataset([fh_0_file], engine='zarr', parallel=True).expand_dims('time') + fh_0_arr = fh_0_ds[self.variable.replace('1hr_', '')].values + array = np.concatenate([fh_0_arr, arr])[self.forecast_hours[0]:self.forecast_hours[-1] + 1, :, :] + else: + array = ds[self.variable].values[self.forecast_hours[0] - 1:self.forecast_hours[-1]].astype('float32') if hasattr(ds[self.variable], 'units'): units = ds[self.variable].attrs['units']