Skip to content

Commit 0417310

Browse files
committed
make cmip restart test more strict
1 parent 6c0eaa0 commit 0417310

File tree

4 files changed

+148
-92
lines changed

4 files changed

+148
-92
lines changed

experiments/ClimaEarth/test/compare.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import ClimaComms
55
import ClimaAtmos as CA
66
import ClimaCore as CC
7+
import Oceananigans as OC
8+
import ClimaSeaIce as CSI
79
import NCDatasets
810

911
"""
@@ -50,8 +52,8 @@ function compare(
5052
name = "",
5153
ignore = Set([:rc]),
5254
) where {
53-
T1 <: Union{CC.Fields.FieldVector, CC.Spaces.AbstractSpace, NamedTuple, CA.AtmosCache},
54-
T2 <: Union{CC.Fields.FieldVector, CC.Spaces.AbstractSpace, NamedTuple, CA.AtmosCache},
55+
T1 <: Union{CC.Fields.FieldVector, CC.Spaces.AbstractSpace, NamedTuple, CA.AtmosCache, OC.Models.HydrostaticFreeSurfaceModels.HydrostaticFreeSurfaceModel, CSI.SeaIceModel},
56+
T2 <: Union{CC.Fields.FieldVector, CC.Spaces.AbstractSpace, NamedTuple, CA.AtmosCache, OC.Models.HydrostaticFreeSurfaceModels.HydrostaticFreeSurfaceModel, CSI.SeaIceModel},
5557
}
5658
pass = true
5759
return _compare(pass, v1, v2; name, ignore)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# This test runs a small AMIP simulation four times.
2+
#
3+
# - The first time the simulation is run for four steps
4+
# - The second time the simulation is run for two steps
5+
# - The third time the simulation is run for two steps, but restarting from the
6+
# second simulation
7+
#
8+
# After all these simulations are run, we compare the first and last runs. They
9+
# should be bit-wise identical.
10+
#
11+
# The content of the simulation is not the most important, but it helps if it
12+
# has all of the complexity possible.
13+
14+
import ClimaComms
15+
ClimaComms.@import_required_backends
16+
import ClimaUtilities.OutputPathGenerator: maybe_wait_filesystem
17+
import YAML
18+
import Logging
19+
using Test
20+
21+
# Uncomment the following for cleaner output (but more difficult debugging)
22+
# Logging.disable_logging(Logging.Warn)
23+
24+
include("compare.jl")
25+
include("../code_loading.jl")
26+
27+
comms_ctx = ClimaComms.context()
28+
@info "Context: $(comms_ctx)"
29+
ClimaComms.init(comms_ctx)
30+
31+
# Make sure that all MPI processes agree on the output_loc
32+
tmpdir = ClimaComms.iamroot(comms_ctx) ? mktempdir(pwd()) : ""
33+
tmpdir = ClimaComms.bcast(comms_ctx, tmpdir)
34+
# Sometimes the shared filesystem doesn't work properly and the folder is not
35+
# synced across MPI processes. Let's add an additional check here.
36+
maybe_wait_filesystem(ClimaComms.context(), tmpdir)
37+
38+
# Parse the input config file as a dictionary
39+
config_file = joinpath(@__DIR__, "restart_cmip.yml")
40+
config_dict = Input.get_coupler_config_dict(config_file)
41+
42+
# Four steps
43+
four_steps = deepcopy(config_dict)
44+
45+
four_steps["t_end"] = "960secs"
46+
four_steps["coupler_output_dir"] = tmpdir
47+
four_steps["checkpoint_dt"] = "960secs"
48+
four_steps["job_id"] = "four_steps"
49+
50+
println("Simulating four steps")
51+
cs_four_steps = setup_and_run(four_steps)
52+
53+
# Check that we can pick up a simulation by providing t_restart and restart_dir
54+
println("Simulating four steps, options from command line")
55+
four_steps_reading = deepcopy(four_steps)
56+
57+
four_steps_reading["t_end"] = "1200secs"
58+
four_steps_reading["detect_restart_files"] = true
59+
four_steps_reading["restart_dir"] = cs_four_steps.dir_paths.checkpoints_dir
60+
four_steps_reading["restart_t"] = 960
61+
four_steps_reading["job_id"] = "four_steps_reading"
62+
Input.update_t_start_for_restarts!(four_steps_reading)
63+
64+
cs_four_steps_reading = setup_and_run(four_steps_reading)
65+
@testset "CMIP restarts (state and cache)" begin
66+
@test cs_four_steps_reading.tspan[1] == cs_four_steps.tspan[2]
67+
end
68+
69+
# Now, two steps plus one
70+
two_steps = deepcopy(config_dict)
71+
72+
two_steps["t_end"] = "480secs"
73+
two_steps["coupler_output_dir"] = tmpdir
74+
two_steps["checkpoint_dt"] = "480secs"
75+
two_steps["job_id"] = "two_steps"
76+
77+
# Copying since setup_and_run changes its content
78+
println("Simulating two steps")
79+
cs_two_steps1 = setup_and_run(two_steps)
80+
81+
println("Reading and simulating last two steps")
82+
# Two additional steps
83+
two_steps["t_end"] = "960secs"
84+
two_steps["detect_restart_files"] = true
85+
Input.update_t_start_for_restarts!(two_steps)
86+
cs_two_steps2 = setup_and_run(two_steps)
87+
88+
@testset "Restarts" begin
89+
# We put cs_four_steps.fields in a NamedTuple so that we can start the recursion in compare
90+
@test compare(
91+
(; coupler_fields = cs_four_steps.fields),
92+
(; coupler_fields = cs_two_steps2.fields),
93+
)
94+
95+
@test compare(
96+
cs_four_steps.model_sims.atmos_sim.integrator.u,
97+
cs_two_steps2.model_sims.atmos_sim.integrator.u,
98+
)
99+
100+
@test compare(
101+
cs_four_steps.model_sims.atmos_sim.integrator.p,
102+
cs_two_steps2.model_sims.atmos_sim.integrator.p,
103+
ignore = [
104+
:walltime_estimate, # Stateful
105+
:output_dir, # Changes across runs
106+
:scratch, # Irrelevant
107+
:ghost_buffer, # Irrelevant
108+
:hyperdiffusion_ghost_buffer, # Irrelevant
109+
:data_handler, # Stateful
110+
:face_clear_sw_direct_flux_dn, # Not filled by RRTGMP
111+
:face_sw_direct_flux_dn, # Not filled by RRTGMP
112+
:rc, # CUDA internal object
113+
],
114+
)
115+
116+
@test compare(
117+
cs_four_steps.model_sims.land_sim.integrator.u,
118+
cs_two_steps2.model_sims.land_sim.integrator.u,
119+
)
120+
@test compare(
121+
cs_four_steps.model_sims.land_sim.integrator.p,
122+
cs_two_steps2.model_sims.land_sim.integrator.p,
123+
ignore = [:dss_buffer_3d, :dss_buffer_2d, :rc],
124+
)
125+
126+
@test compare(
127+
cs_four_steps.model_sims.ice_sim.ice.model,
128+
cs_two_steps2.model_sims.ice_sim.ice.model,
129+
ignore = [:clock, :parent, :ptr]
130+
)
131+
132+
@test compare(
133+
cs_four_steps.model_sims.ocean_sim.ocean.model,
134+
cs_two_steps2.model_sims.ocean_sim.ocean.model,
135+
ignore = [:clock, :parent, :ptr]
136+
)
137+
end

experiments/ClimaEarth/test/restart_state_only_cmip.yml renamed to experiments/ClimaEarth/test/restart_cmip.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ albedo_model: "CouplerAlbedo"
33
atmos_config_file: "config/atmos_configs/climaatmos_edonly.yml"
44
coupler_toml: ["toml/amip_edonly.toml"]
55
dt_atmos: "120secs"
6-
dt_cpl: "360secs"
6+
dt_cpl: "240secs"
77
dt_land: "120secs"
8-
dt_ocean: "360secs"
9-
dt_seaice: "360secs"
8+
dt_ocean: "240secs"
9+
dt_seaice: "240secs"
10+
dt_rad: "120secs"
1011
dz_bottom: 100.0
1112
energy_check: false
1213
h_elem: 8
13-
checkpoint_dt: "720hours"
14+
checkpoint_dt: "480secs"
1415
ice_model: "clima_seaice"
1516
land_model: "integrated"
1617
land_spun_up_ic: false
@@ -20,9 +21,10 @@ ocean_model: "oceananigans"
2021
output_default_diagnostics: true
2122
rayleigh_sponge: true
2223
simple_ocean: true
24+
save_cache: true
2325
start_date: "20100101"
2426
surface_setup: "PrescribedSurface"
25-
t_end: "720secs"
27+
t_end: "480secs"
2628
topo_smoothing: true
2729
topography: "Earth"
2830
turbconv: ~

experiments/ClimaEarth/test/restart_state_only_cmip.jl

Lines changed: 0 additions & 85 deletions
This file was deleted.

0 commit comments

Comments
 (0)