1+ import os
12from pathlib import Path
23
34import pandas as pd
1213
1314import ilamb3
1415import ilamb3 .meta as meta
16+ import ilamb3 .regions as ilr
1517from ilamb3 .run import parse_benchmark_setup , run_study
1618
1719app = typer .Typer (name = "ilamb" , no_args_is_help = True )
1820
1921
2022def _dataframe_reference (
21- root : Path = Path ("/ home/nate/ .cache/ilamb3/") ,
23+ root : Path = Path (). home () / " .cache/ilamb3/" ,
2224 cache_file : Path = Path ("df_reference.csv" ),
2325) -> pd .DataFrame :
2426 if cache_file .exists ():
2527 df = pd .read_csv (cache_file )
2628 df = df .set_index ("key" )
2729 return df
30+ if "ILAMB_ROOT" in os .environ :
31+ root = Path (os .environ ["ILAMB_ROOT" ])
2832 df = []
2933 for dirpath , _ , files in root .walk ():
3034 for fname in files :
3135 if not fname .endswith (".nc" ):
3236 continue
33- path = ( dirpath / fname ). absolute ()
37+ path = dirpath / fname
3438 df .append (
3539 {
36- "key" : str (path .parent ). split ( "/" )[ - 1 ] + f"/ { path . name } " ,
37- "path" : str (path ),
40+ "key" : str (Path ( * path .relative_to ( root ). parts [ 1 :])) ,
41+ "path" : str (path . absolute () ),
3842 }
3943 )
4044 df = pd .DataFrame (df )
@@ -44,12 +48,17 @@ def _dataframe_reference(
4448
4549
4650def _dataframe_cmip (
47- root : Path = Path ( "/home/nate/esgf-data/CMIP6/CMIP/" ) ,
51+ root : Path | None = None ,
4852 cache_file : Path = Path ("df_cmip.csv" ),
4953) -> pd .DataFrame :
5054 if cache_file .exists ():
5155 df = pd .read_csv (cache_file )
5256 return df
57+ if root is None :
58+ if "ESGF_ROOT" in os .environ :
59+ root = Path (os .environ ["ESGF_ROOT" ])
60+ else :
61+ root = Path .home () / ".esgf"
5362 df = []
5463 for dirpath , _ , files in root .walk ():
5564 for fname in files :
@@ -79,6 +88,8 @@ def _dataframe_cmip(
7988def run (
8089 config : Path ,
8190 regions : str | None = None ,
91+ region_sources : list [str ] | None = None ,
92+ df_comparison : Path | None = None ,
8293 output_path : Path = Path ("_build" ),
8394 cache : bool = True ,
8495 central_longitude : float = 0.0 ,
@@ -89,33 +100,28 @@ def run(
89100 regions = [None ]
90101 else :
91102 regions = [None if r .lower () == "none" else r for r in regions .split ("," )]
103+ if region_sources is not None :
104+ cat = ilamb3 .ilamb_catalog ()
105+ for source in region_sources :
106+ ilr .Regions ().add_netcdf (cat .fetch (source ))
107+ ilamb3 .conf ["region_sources" ] = region_sources
92108
93109 # set options
94110 ilamb3 .conf .set (
95111 regions = regions ,
96112 use_cached_results = cache ,
97113 use_uncertainty = True ,
98114 plot_central_longitude = central_longitude ,
115+ comparison_groupby = ["source_id" , "grid_label" ],
116+ model_name_facets = ["source_id" ],
99117 )
100118
101119 # load local databases, need a better way
102120 df_ref = _dataframe_reference ()
103- df_com = _dataframe_cmip ()
104- df_com = df_com [df_com ["source_id" ] == "CanESM5" ]
105- df_com = df_com [df_com ["member_id" ] == "r1i1p1f1" ]
106- df_com = df_com [
107- df_com ["variable_id" ].apply (lambda v : v not in ["areacello" , "sftof" ])
108- ]
109-
110- # add a few CESM2 variables that CanESM5 does not have
111- df = _dataframe_cmip ()
112- df = df [df ["source_id" ] == "CESM2" ]
113- df = df [
114- df ["variable_id" ].apply (
115- lambda v : v in ["areacella" , "sftlf" , "fBNF" , "burntFractionAll" ]
116- )
117- ]
118- df_com = pd .concat ([df_com , df ])
121+ if df_comparison is None :
122+ df_com = _dataframe_cmip ()
123+ else :
124+ df_com = pd .read_csv (df_comparison )
119125
120126 # execute
121127 if HAS_MPI4PY :
@@ -166,9 +172,13 @@ def _extract_sources(current: dict):
166172 ilamb3 .ilamb3_catalog (),
167173 ]
168174 for source in sources :
175+ found = False
169176 for reg in registries :
170177 if source in reg .registry_files :
171178 reg .fetch (source )
179+ found = True
180+ if not found :
181+ raise ValueError (f"Could not find '{ source } ' in the data registries." )
172182
173183
174184if __name__ == "__main__" :
0 commit comments