Skip to content

Commit 66f68bb

Browse files
committed
Update run scripts to capture performance benchmarks
1 parent 85c820e commit 66f68bb

File tree

2 files changed

+301
-1
lines changed

2 files changed

+301
-1
lines changed

auxiliary_tools/cdat_regression_testing/843-migration-phase3/run-script-model-vs-obs/run_script.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
ZonalMean2dStratosphereParameter,
3939
)
4040
from e3sm_diags.run import runner
41+
import timeit
4142

4243

4344
class MachinePaths(TypedDict):
@@ -66,7 +67,7 @@ def run_all_sets():
6667
"ANN",
6768
"JJA",
6869
] # Default setting: seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
69-
param.results_dir = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/843-migration-phase3-model-vs-obs"
70+
param.results_dir = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/843-migration-phase3-perf-benchmark"
7071
param.multiprocessing = True
7172
param.num_workers = 24
7273

@@ -281,4 +282,8 @@ def _get_test_data_dirs(machine: str) -> Tuple[str, str]:
281282

282283

283284
if __name__ == "__main__":
285+
start_time = timeit.default_timer()
284286
run_all_sets()
287+
end_time = timeit.default_timer()
288+
elapsed_time = end_time - start_time
289+
print(f"Elapsed time: {elapsed_time} seconds")
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
"""
2+
Make sure to run the machine-specific commands below before
3+
running this script:
4+
5+
Compy:
6+
srun --pty --nodes=1 --time=01:00:00 /bin/bash
7+
source /share/apps/E3SM/conda_envs/load_latest_e3sm_unified_compy.sh
8+
9+
LCRC:
10+
srun --pty --nodes=1 --time=01:00:00 /bin/bash
11+
source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_chrysalis.sh
12+
Or: source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_anvil.sh
13+
14+
NERSC perlmutter cpu:
15+
salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=e3sm
16+
source /global/common/software/e3sm/anaconda_envs/load_latest_e3sm_unified_pm-cpu.sh
17+
"""
18+
# flake8: noqa E501
19+
20+
import os
21+
from typing import Tuple, TypedDict
22+
23+
from mache import MachineInfo
24+
25+
from e3sm_diags.parameter.annual_cycle_zonal_mean_parameter import ACzonalmeanParameter
26+
from e3sm_diags.parameter.area_mean_time_series_parameter import (
27+
AreaMeanTimeSeriesParameter,
28+
)
29+
from e3sm_diags.parameter.arm_diags_parameter import ARMDiagsParameter
30+
from e3sm_diags.parameter.core_parameter import CoreParameter
31+
from e3sm_diags.parameter.diurnal_cycle_parameter import DiurnalCycleParameter
32+
from e3sm_diags.parameter.enso_diags_parameter import EnsoDiagsParameter
33+
from e3sm_diags.parameter.mp_partition_parameter import MPpartitionParameter
34+
from e3sm_diags.parameter.qbo_parameter import QboParameter
35+
from e3sm_diags.parameter.streamflow_parameter import StreamflowParameter
36+
from e3sm_diags.parameter.tc_analysis_parameter import TCAnalysisParameter
37+
from e3sm_diags.parameter.zonal_mean_2d_stratosphere_parameter import (
38+
ZonalMean2dStratosphereParameter,
39+
)
40+
from e3sm_diags.run import runner
41+
import timeit
42+
43+
44+
class MachinePaths(TypedDict):
45+
html_path: str
46+
obs_climo: str
47+
test_climo: str
48+
obs_ts: str
49+
test_ts: str
50+
dc_obs_climo: str
51+
dc_test_climo: str
52+
arm_obs: str
53+
arm_test: str
54+
tc_obs: str
55+
tc_test: str
56+
57+
58+
def run_all_sets():
59+
machine_paths: MachinePaths = _get_machine_paths()
60+
61+
param = CoreParameter()
62+
63+
param.reference_data_path = machine_paths["obs_climo"]
64+
param.test_data_path = machine_paths["test_climo"]
65+
param.test_name = "20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis"
66+
param.seasons = [
67+
"ANN",
68+
"JJA",
69+
] # Default setting: seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
70+
param.results_dir = (
71+
"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/843-main-perf-benchmark"
72+
)
73+
param.multiprocessing = True
74+
param.num_workers = 24
75+
76+
# Set specific parameters for new sets
77+
enso_param = EnsoDiagsParameter()
78+
enso_param.reference_data_path = machine_paths["obs_ts"]
79+
enso_param.test_data_path = machine_paths["test_ts"]
80+
enso_param.test_name = "e3sm_v2"
81+
enso_param.test_start_yr = "0051"
82+
enso_param.test_end_yr = "0060"
83+
# Enso obs data range from year 1979 to 2016
84+
enso_param.ref_start_yr = "2001"
85+
enso_param.ref_end_yr = "2010"
86+
87+
qbo_param = QboParameter()
88+
qbo_param.reference_data_path = machine_paths["obs_ts"]
89+
qbo_param.test_data_path = machine_paths["test_ts"]
90+
qbo_param.test_name = "e3sm_v2"
91+
qbo_param.start_yr = "0051"
92+
qbo_param.end_yr = "0060"
93+
# Qbo obs data range from year 1979 to 2019
94+
# Number of years of test and ref should match
95+
qbo_param.ref_start_yr = "2001"
96+
qbo_param.ref_end_yr = "2010"
97+
98+
ts_param = AreaMeanTimeSeriesParameter()
99+
ts_param.reference_data_path = machine_paths["obs_ts"]
100+
ts_param.test_data_path = machine_paths["test_ts"]
101+
ts_param.test_name = "e3sm_v2"
102+
ts_param.start_yr = "0051"
103+
ts_param.end_yr = "0060"
104+
105+
dc_param = DiurnalCycleParameter()
106+
dc_param.reference_data_path = machine_paths["dc_obs_climo"]
107+
dc_param.test_data_path = machine_paths["dc_test_climo"]
108+
dc_param.short_test_name = "e3sm_v2"
109+
# Plotting diurnal cycle amplitude on different scales. Default is True
110+
dc_param.normalize_test_amp = False
111+
112+
streamflow_param = StreamflowParameter()
113+
streamflow_param.reference_data_path = machine_paths["obs_ts"]
114+
streamflow_param.test_data_path = machine_paths["test_ts"]
115+
streamflow_param.short_test_name = "e3sm_v2"
116+
streamflow_param.test_start_yr = "0051"
117+
streamflow_param.test_end_yr = "0060"
118+
# Streamflow gauge station data range from year 1986 to 1995
119+
streamflow_param.ref_start_yr = "1986"
120+
streamflow_param.ref_end_yr = "1995"
121+
122+
arm_param = ARMDiagsParameter()
123+
arm_param.reference_data_path = machine_paths["arm_obs"]
124+
arm_param.ref_name = "armdiags"
125+
arm_param.test_data_path = machine_paths["arm_test"]
126+
arm_param.test_name = "e3sm_v2"
127+
# arm_param.test_start_yr = "1996"
128+
# arm_param.test_end_yr = "2010"
129+
arm_param.test_start_yr = "1985"
130+
arm_param.test_end_yr = "2014"
131+
# For model vs obs, the ref start and end year can be any four digit strings.
132+
# For now, will use all available years form obs
133+
arm_param.ref_start_yr = "0001"
134+
arm_param.ref_end_yr = "0001"
135+
136+
tc_param = TCAnalysisParameter()
137+
tc_param.reference_data_path = machine_paths["tc_obs"]
138+
tc_param.test_data_path = machine_paths["tc_test"]
139+
tc_param.short_test_name = "e3sm_v2"
140+
tc_param.test_start_yr = "0051"
141+
tc_param.test_end_yr = "0060"
142+
# For model vs obs, the ref start and end year can be any four digit strings.
143+
# For now, use all available years form obs by default.
144+
tc_param.ref_start_yr = "1979"
145+
tc_param.ref_end_yr = "2018"
146+
147+
ac_param = ACzonalmeanParameter()
148+
149+
zm_param = ZonalMean2dStratosphereParameter()
150+
151+
mp_param = MPpartitionParameter()
152+
# mp_param.reference_data_path = machine_paths["obs_ts"]
153+
mp_param.test_data_path = machine_paths["test_ts"]
154+
mp_param.short_test_name = "e3sm_v2"
155+
mp_param.test_start_yr = "0051"
156+
mp_param.test_end_yr = "0060"
157+
158+
param.save_netcdf = True
159+
runner.sets_to_run = [
160+
"lat_lon",
161+
"zonal_mean_xy",
162+
"zonal_mean_2d",
163+
"zonal_mean_2d_stratosphere",
164+
"polar",
165+
"cosp_histogram",
166+
"meridional_mean_2d",
167+
"annual_cycle_zonal_mean",
168+
"enso_diags",
169+
"qbo",
170+
"area_mean_time_series",
171+
"diurnal_cycle",
172+
"streamflow",
173+
"arm_diags",
174+
"tc_analysis",
175+
"aerosol_aeronet",
176+
"aerosol_budget",
177+
"mp_partition",
178+
]
179+
180+
runner.run_diags(
181+
[
182+
param,
183+
zm_param,
184+
ac_param,
185+
enso_param,
186+
qbo_param,
187+
ts_param,
188+
dc_param,
189+
streamflow_param,
190+
arm_param,
191+
tc_param,
192+
mp_param,
193+
]
194+
)
195+
196+
return param.results_dir
197+
198+
199+
def _get_machine_paths() -> MachinePaths:
200+
"""Returns the paths on the machine that are required to run e3sm_diags.
201+
202+
Returns
203+
-------
204+
MachinePaths
205+
A dictionary of paths on the machine, with the key being the path type
206+
and the value being the absolute path string.
207+
"""
208+
# Get the current machine's configuration info.
209+
machine_info = MachineInfo()
210+
machine = machine_info.machine
211+
212+
if machine not in [
213+
"anvil",
214+
"chrysalis",
215+
"compy",
216+
"pm-cpu",
217+
"cori-haswell",
218+
"cori-knl",
219+
]:
220+
raise ValueError(f"e3sm_diags is not supported on this machine ({machine}).")
221+
222+
# Path to the HTML outputs for the current user.
223+
web_portal_base_path = machine_info.config.get("web_portal", "base_path")
224+
html_path = f"{web_portal_base_path}/{machine_info.username}/"
225+
226+
# Path to the reference data directory.
227+
diags_base_path = machine_info.diagnostics_base
228+
ref_data_dir = f"{diags_base_path}/observations/Atm"
229+
230+
# Paths to the test data directories.
231+
test_data_dir, test_data_dir2 = _get_test_data_dirs(machine)
232+
233+
# Construct the paths required by e3sm_diags using the base paths above.
234+
machine_paths: MachinePaths = {
235+
"html_path": html_path,
236+
"obs_climo": f"{ref_data_dir}/climatology",
237+
"test_climo": f"{test_data_dir}/climatology/rgr/",
238+
"obs_ts": f"{ref_data_dir}/time-series/",
239+
"test_ts": f"{test_data_dir}/time-series/rgr/",
240+
"dc_obs_climo": f"{ref_data_dir}/climatology",
241+
"dc_test_climo": f"{test_data_dir}/diurnal_climatology/rgr",
242+
"arm_obs": f"{ref_data_dir}/arm-diags-data/",
243+
"arm_test": f"{test_data_dir2}/arm-diags-data/",
244+
"tc_obs": f"{ref_data_dir}/tc-analysis/",
245+
"tc_test": f"{test_data_dir}/tc-analysis/",
246+
}
247+
248+
return machine_paths
249+
250+
251+
def _get_test_data_dirs(machine: str) -> Tuple[str, str]:
252+
"""Get the directories for test data based on the machine.
253+
254+
The second path is for using the high frequency grid box output at ARM sites
255+
from another simulation when the output is available.
256+
257+
Parameters
258+
----------
259+
machine : str
260+
The name of the machine.
261+
262+
Returns
263+
-------
264+
Tuple[str, str]
265+
A tuple of two strings, each representing a test data directory path.
266+
"""
267+
test_data_dirs = None
268+
269+
# TODO: Update this function to use `mache` after the directories are updated.
270+
if machine in ["chrysalis", "anvil"]:
271+
base = "/lcrc/group/e3sm/public_html/e3sm_diags_test_data/postprocessed_e3sm_v2_data_for_e3sm_diags"
272+
elif machine in ["compy"]:
273+
base = "/compyfs/e3sm_diags_data/postprocessed_e3sm_v2_data_for_e3sm_diags"
274+
elif machine in ["cori-haswell", "cori-knl", "pm-cpu"]:
275+
base = "/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags"
276+
277+
test_data_dirs = (
278+
f"{base}/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis",
279+
# f"{base}/20210719.PhaseII.F20TR-P3.NGD.ne30pg2.compy",
280+
f"{base}/20221103.v2.LR.amip.NGD_v3atm.chrysalis",
281+
)
282+
283+
return test_data_dirs # type: ignore
284+
285+
286+
if __name__ == "__main__":
287+
# Run the function 3 times and measure the execution time
288+
execution_times = []
289+
for _ in range(3):
290+
execution_time = timeit.timeit(run_all_sets, number=1)
291+
execution_times.append(execution_time)
292+
293+
# Calculate the average execution time
294+
average_execution_time = sum(execution_times) / len(execution_times)
295+
print(f"Average execution time: {average_execution_time} seconds")

0 commit comments

Comments
 (0)