44import requests
55import s3fs
66import xarray as xr
7- from dagster import AssetExecutionContext , AssetIn , Config , EnvVar , asset
7+ import dagster as dg
88from dagster_aws .s3 import S3Resource
99
1010import downscaled_climate_data
1111
1212
13- class Loca2Config (Config ):
13+ class Loca2Config (dg . Config ):
1414 s3_key : str
1515 url : str = "https://cirrus.ucsd.edu/~pierce/LOCA2/CONUS_regions_split/ACCESS-CM2/cent/0p0625deg/r2i1p1f1/historical/tasmax/tasmax.ACCESS-CM2.historical.r2i1p1f1.1950-2014.LOCA_16thdeg_v20220413.cent.nc" # NOQA E501
1616
1717
18- @asset (
18+ @dg . asset (
1919 name = "loca2_raw_netcdf" ,
2020 description = "Raw LOCA2 data downloaded from the web" ,
2121 code_version = downscaled_climate_data .__version__ ,
2222 group_name = "loca2"
2323)
24- def loca2_raw_netcdf (context : AssetExecutionContext ,
24+ def loca2_raw_netcdf (context : dg . AssetExecutionContext ,
2525 config : Loca2Config ,
26- s3 : S3Resource ) -> dict [ str , str ] :
26+ s3 : S3Resource ) -> dg . Output :
2727
28- destination_bucket = EnvVar ("LOCA2_BUCKET" ).get_value ()
29- destination_path_root = EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
28+ destination_bucket = dg . EnvVar ("LOCA2_BUCKET" ).get_value ()
29+ destination_path_root = dg . EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
3030
3131 with requests .get (config .url , stream = True ) as response :
3232 # Raise an exception for bad HTTP responses
@@ -44,24 +44,34 @@ def loca2_raw_netcdf(context: AssetExecutionContext,
4444 )
4545
4646 context .log .info (f"Downloading data to { config .s3_key } " )
47- return {
48- "bucket" : destination_bucket ,
49- "s3_key" : config .s3_key ,
50- }
47+ zarr_config = ZarrConfig (
48+ s3_key = config .s3_key ,
49+ bucket = destination_bucket ,
50+ )
51+ return dg .MaterializeResult (
52+ metadata = {
53+ "zarr_config" : dg .MetadataValue .json (zarr_config .__dict__ ),
54+ }
55+ )
5156
5257
53- @asset (
58+ class ZarrConfig (dg .Config ):
59+ s3_key : str
60+ bucket : str
61+
62+
63+ @dg .asset (
5464 name = "loca2_zarr" ,
55- ins = {
56- "loca2_raw_netcdf" : AssetIn ()
57- },
65+ deps = ["loca2_raw_netcdf" ],
5866 group_name = "loca2" ,
5967 description = "LOCA2 data converted to Zarr format" ,
6068 code_version = downscaled_climate_data .__version__ )
61- def loca2_zarr (context ,
62- loca2_raw_netcdf ,
63- s3 : S3Resource ):
64- context .log .info (f"Converting { loca2_raw_netcdf ['s3_key' ]} to zarr" )
69+ def loca2_zarr (context : dg .AssetExecutionContext , s3 : S3Resource ):
70+ upstream_metadata = context .instance .get_latest_materialization_event (
71+ dg .AssetKey ("loca2_raw_netcdf" )).asset_materialization .metadata
72+
73+ config = ZarrConfig (** upstream_metadata ['zarr_config' ].data )
74+ context .log .info (f"Converting { config .s3_key } to zarr" )
6575
6676 # Initialize s3fs with the same credentials as the S3Resource
6777 fs = s3fs .S3FileSystem (
@@ -70,14 +80,14 @@ def loca2_zarr(context,
7080 endpoint_url = s3 .endpoint_url
7181 )
7282
73- raw_root = EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
74- zarr_root = EnvVar ("LOCA2_ZARR_PATH_ROOT" ).get_value ()
83+ raw_root = dg . EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
84+ zarr_root = dg . EnvVar ("LOCA2_ZARR_PATH_ROOT" ).get_value ()
7585 # Construct S3 paths
76- input_path = f"s3://{ loca2_raw_netcdf [ ' bucket' ] } /{ raw_root } { loca2_raw_netcdf [ ' s3_key' ] } " # NOQA E501
86+ input_path = f"s3://{ config . bucket } /{ raw_root } { config . s3_key } " # NOQA E501
7787 context .log .info (f"Reading from { input_path } " )
7888
79- zarr_key = loca2_raw_netcdf [ ' s3_key' ] .replace ('.nc' , '.zarr' )
80- output_path = f"s3://{ loca2_raw_netcdf [ ' bucket' ] } /{ zarr_root } { zarr_key } "
89+ zarr_key = config . s3_key .replace ('.nc' , '.zarr' )
90+ output_path = f"s3://{ config . bucket } /{ zarr_root } { zarr_key } "
8191 context .log .info (f"Writing to { output_path } " )
8292
8393 # Read NetCDF file from S3
@@ -103,7 +113,7 @@ def loca2_zarr(context,
103113 ds .close ()
104114
105115
106- class ESMCatalogConfig (Config ):
116+ class ESMCatalogConfig (dg . Config ):
107117 data_format : str = "zarr"
108118 id : str = "loca2_zarr_monthly_esm_catalog"
109119 description : str = "LOCA2 Zarr data catalog"
@@ -143,21 +153,21 @@ def parse_key(relative_path: str, bucket: str, full_key: str) -> dict[str, str]:
143153 }
144154
145155
146- @asset (
156+ @dg . asset (
147157 name = "loca2_esm_catalog" ,
148158 group_name = "loca2" ,
149159 description = "Generate an Intake-ESM Catalog for LOCA2 datasets" ,
150160 code_version = downscaled_climate_data .__version__ )
151- def loca2_esm_catalog (context : AssetExecutionContext ,
161+ def loca2_esm_catalog (context : dg . AssetExecutionContext ,
152162 config : ESMCatalogConfig ,
153163 s3 : S3Resource ):
154164
155- bucket = EnvVar ("LOCA2_BUCKET" ).get_value ()
165+ bucket = dg . EnvVar ("LOCA2_BUCKET" ).get_value ()
156166
157167 if config .is_zarr ():
158- prefix = EnvVar ("LOCA2_ZARR_PATH_ROOT" ).get_value ()
168+ prefix = dg . EnvVar ("LOCA2_ZARR_PATH_ROOT" ).get_value ()
159169 else :
160- prefix = EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
170+ prefix = dg . EnvVar ("LOCA2_RAW_PATH_ROOT" ).get_value ()
161171
162172 if config .is_monthly ():
163173 prefix += "/monthly"
0 commit comments