1616import xarray as xr
1717from scipy .stats import binned_statistic
1818
19- from e3sm_diags import INSTALL_PATH
19+ from e3sm_diags import INSTALL_PATH , LEGACY_XARRAY_MERGE_KWARGS
2020from e3sm_diags .driver .utils .dataset_xr import Dataset
2121from e3sm_diags .logger import _setup_child_logger
2222from e3sm_diags .plot .mp_partition_plot import plot
@@ -60,19 +60,27 @@ def compute_lcf(cice, cliq, temp, landfrac):
6060
6161
6262def run_diag (parameter : MPpartitionParameter ) -> MPpartitionParameter :
63- """Runs the mixed-phase partition/T5050 diagnostic.
64-
65- :param parameter: Parameters for the run
66- :type parameter: CoreParameter
67- :raises ValueError: Invalid run type
68- :return: Parameters for the run
69- :rtype: CoreParameter
63+ """
64+ Runs the mixed-phase partition/T5050 diagnostic.
65+
66+ Parameters
67+ ----------
68+ parameter : CoreParameter
69+ Parameters for the run.
70+
71+ Raises
72+ ------
73+ ValueError
74+ If the run type is invalid.
75+
76+ Returns
77+ -------
78+ CoreParameter
79+ Parameters for the run.
7080 """
7181 run_type = parameter .run_type
7282 season = "ANN"
7383
74- # Read reference data first
75-
7684 benchmark_data_path = os .path .join (
7785 INSTALL_PATH ,
7886 "control_runs" ,
@@ -82,35 +90,20 @@ def run_diag(parameter: MPpartitionParameter) -> MPpartitionParameter:
8290 with open (benchmark_data_path , "r" ) as myfile :
8391 lcf_file = myfile .read ()
8492
85- # parse file
8693 metrics_dict = json .loads (lcf_file )
8794
8895 test_data = Dataset (parameter , data_type = "test" )
89- # test = test_data.get_timeseries_variable("LANDFRAC")
90- # print(dir(test))
91- # landfrac = test_data.get_timeseries_variable("LANDFRAC")(cdutil.region.domain(latitude=(-70.0, -30, "ccb")))
92- # temp = test_data.get_timeseries_variable("T")(cdutil.region.domain(latitude=(-70.0, -30, "ccb")))
93- # cice = test_data.get_timeseries_variable("CLDICE")(cdutil.region.domain(latitude=(-70.0, -30, "ccb")))
94- # cliq = test_data.get_timeseries_variable("CLDLIQ")(cdutil.region.domain(latitude=(-70.0, -30, "ccb")))
95-
9696 test_data_path = parameter .test_data_path
97- start_year = parameter .test_start_yr
98- end_year = parameter .test_end_yr
97+ start_year = int (parameter .test_start_yr )
98+
99+ end_year = int (parameter .test_end_yr )
100+
99101 # TODO the time subsetting and variable derivation should be replaced during cdat revamp
100102 try :
101- # xr.open_mfdataset() can accept an explicit list of files.
102- landfrac = xr .open_mfdataset (glob .glob (f"{ test_data_path } /LANDFRAC_*" )).sel (
103- lat = slice (- 70 , - 30 ), time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" )
104- )["LANDFRAC" ]
105- temp = xr .open_mfdataset (glob .glob (f"{ test_data_path } /T_*.nc" )).sel (
106- lat = slice (- 70 , - 30 ), time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" )
107- )["T" ]
108- cice = xr .open_mfdataset (glob .glob (f"{ test_data_path } /CLDICE_*.nc" )).sel (
109- lat = slice (- 70 , - 30 ), time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" )
110- )["CLDICE" ]
111- cliq = xr .open_mfdataset (glob .glob (f"{ test_data_path } /CLDLIQ_*.nc" )).sel (
112- lat = slice (- 70 , - 30 ), time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" )
113- )["CLDLIQ" ]
103+ landfrac = _open_mfdataset (test_data_path , "LANDFRAC" , start_year , end_year )
104+ temp = _open_mfdataset (test_data_path , "T" , start_year , end_year )
105+ cice = _open_mfdataset (test_data_path , "CLDICE" , start_year , end_year )
106+ cliq = _open_mfdataset (test_data_path , "CLDLIQ" , start_year , end_year )
114107 except OSError :
115108 logger .info (
116109 f"No files to open for variables within { start_year } and { end_year } from { test_data_path } ."
@@ -126,46 +119,19 @@ def run_diag(parameter: MPpartitionParameter) -> MPpartitionParameter:
126119
127120 if run_type == "model-vs-model" :
128121 ref_data = Dataset (parameter , data_type = "ref" )
129-
130122 ref_data_path = parameter .reference_data_path
131- start_year = parameter .ref_start_yr
132- end_year = parameter .ref_end_yr
133- # xr.open_mfdataset() can accept an explicit list of files.
123+
134124 try :
135- landfrac = xr .open_mfdataset (glob .glob (f"{ ref_data_path } /LANDFRAC_*" )).sel (
136- lat = slice (- 70 , - 30 ),
137- time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" ),
138- )["LANDFRAC" ]
139- temp = xr .open_mfdataset (glob .glob (f"{ ref_data_path } /T_*.nc" )).sel (
140- lat = slice (- 70 , - 30 ),
141- time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" ),
142- )["T" ]
143- cice = xr .open_mfdataset (glob .glob (f"{ ref_data_path } /CLDICE_*.nc" )).sel (
144- lat = slice (- 70 , - 30 ),
145- time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" ),
146- )["CLDICE" ]
147- cliq = xr .open_mfdataset (glob .glob (f"{ ref_data_path } /CLDLIQ_*.nc" )).sel (
148- lat = slice (- 70 , - 30 ),
149- time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" ),
150- )["CLDLIQ" ]
125+ landfrac = _open_mfdataset (ref_data_path , "LANDFRAC" , start_year , end_year )
126+ temp = _open_mfdataset (ref_data_path , "T" , start_year , end_year )
127+ cice = _open_mfdataset (ref_data_path , "CLDICE" , start_year , end_year )
128+ cliq = _open_mfdataset (ref_data_path , "CLDLIQ" , start_year , end_year )
151129 except OSError :
152130 logger .info (
153131 f"No files to open for variables within { start_year } and { end_year } from { ref_data_path } ."
154132 )
155133 raise
156134
157- # landfrac = ref_data.get_timeseries_variable("LANDFRAC")(
158- # cdutil.region.domain(latitude=(-70.0, -30, "ccb"))
159- # )
160- # temp = ref_data.get_timeseries_variable("T")(
161- # cdutil.region.domain(latitude=(-70.0, -30, "ccb"))
162- # )
163- # cice = ref_data.get_timeseries_variable("CLDICE")(
164- # cdutil.region.domain(latitude=(-70.0, -30, "ccb"))
165- # )
166- # cliq = ref_data.get_timeseries_variable("CLDLIQ")(
167- # cdutil.region.domain(latitude=(-70.0, -30, "ccb"))
168- # )
169135 parameter .ref_name_yrs = ref_data .get_name_yrs_attr (season )
170136 metrics_dict ["ref" ] = {}
171137 metrics_dict ["ref" ]["T" ], metrics_dict ["ref" ]["LCF" ] = compute_lcf (
@@ -177,3 +143,47 @@ def run_diag(parameter: MPpartitionParameter) -> MPpartitionParameter:
177143 plot (metrics_dict , parameter )
178144
179145 return parameter
146+
147+
148+ def _open_mfdataset (
149+ data_path : str , var : str , start_year : int , end_year : int
150+ ) -> xr .DataArray :
151+ """
152+ Open multiple NetCDF files as a single xarray Dataset and subset by time
153+ and latitude.
154+
155+ This function reads multiple NetCDF files matching the specified variable
156+ name and combines them into a single xarray Dataset. The data is then
157+ subsetted based on the specified time range and latitude bounds.
158+
159+ Parameters
160+ ----------
161+ data_path : str
162+ The path to the directory containing the NetCDF files.
163+ var : str
164+ The variable name to match in the file pattern.
165+ start_year : int
166+ The starting year for the time subsetting.
167+ end_year : int
168+ The ending year for the time subsetting.
169+
170+ Returns
171+ -------
172+ xr.DataArray
173+ The subsetted DataArray for the specified variable, filtered by time
174+ and latitude.
175+ """
176+ file_pattern = f"{ data_path } /{ var } _*.nc"
177+ ds = xr .open_mfdataset (
178+ glob .glob (file_pattern ),
179+ data_vars = "minimal" ,
180+ ** LEGACY_XARRAY_MERGE_KWARGS , # type: ignore[ arg-type ]
181+ )
182+
183+ ds_sub = ds .sel (
184+ lat = slice (- 70 , - 30 ), time = slice (f"{ start_year } -01-01" , f"{ end_year } -12-31" )
185+ )
186+
187+ da_var = ds_sub [var ]
188+
189+ return da_var
0 commit comments