Skip to content

CRM Testing Framework Setup on Summit

Benjamin R. Hillman edited this page Jan 8, 2020 · 11 revisions

Generate the baselines

Create the baseline cases

For convenience, you'll want to specify your project on Summit and the root path for ACME-ECP with an environment variable, for example:

export PROJ=cli115
export E3SMHOME=/ccs/home/$USER/ACME-ECP

The following script, which I call create_baselines.sh will create the baseline cases for you:

E3SM_HOME=~/ACME-ECP
COMPILER=gnu
MACH=summit-cpu
PES=84x1
RES=ne4_ne4
PROJ=stf006

CASE=sp1vfast2d_baseline
./create_newcase -compset FC5AV1C-H01A -case $CASE -compiler $COMPILER -mach $MACH -project $PROJ -pecount $PES -res $RES --handle-preexisting-dirs r || exit -1
cd $CASE
./xmlchange CAM_CONFIG_OPTS="-phys cam5 -use_SPCAM -crm_nx 16 -crm_ny 1 -crm_nx_rad 1 -crm_ny_rad 1 -crm_adv MPDATA -nlev 30 -crm_nz 28 -crm_dx 2000 -crm_dt 10 -microphys mg2 -SPCAM_microp_scheme sam1mom -cppdefs ' -DSP_DIR_NS -DSP_MCICA_RAD ' -rad rrtmg -chem none -pcols 16"
./xmlchange ATM_NCPL=96,STOP_N=1
./xmlchange CHARGE_ACCOUNT=$PROJ
cat > user_nl_cam << 'eof'
prescribed_aero_cycle_yr = 2000
prescribed_aero_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
prescribed_aero_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
use_hetfrz_classnuc = .false.
prescribed_aero_type = 'CYCLICAL'
aerodep_flx_type = 'CYCLICAL'
aerodep_flx_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
aerodep_flx_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
aerodep_flx_cycle_yr = 2000
srf_flux_avg = 1
use_crm_accel = .true.
crm_accel_factor = 2.
crm_accel_uv = .true.
eof

./case.setup

cd ..

CASE=sp1vfast3d_baseline
./create_newcase -compset FC5AV1C-H01A -case $CASE -compiler $COMPILER -mach $MACH -project $PROJ -pecount $PES -res $RES --handle-preexisting-dirs r || exit -1
cd $CASE
./xmlchange CAM_CONFIG_OPTS="-phys cam5 -use_SPCAM -crm_nx 4 -crm_ny 4 -crm_nx_rad 1 -crm_ny_rad 1 -crm_adv MPDATA -nlev 30 -crm_nz 28 -crm_dx 2000 -crm_dt 10 -microphys mg2 -SPCAM_microp_scheme sam1mom -cppdefs ' -DSP_DIR_NS -DSP_MCICA_RAD ' -rad rrtmg -chem none -pcols 16"
./xmlchange ATM_NCPL=96,STOP_N=1
./xmlchange CHARGE_ACCOUNT=$PROJ
cat > user_nl_cam << 'eof'
prescribed_aero_cycle_yr = 2000
prescribed_aero_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
prescribed_aero_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
use_hetfrz_classnuc = .false.
prescribed_aero_type = 'CYCLICAL'
aerodep_flx_type = 'CYCLICAL'
aerodep_flx_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
aerodep_flx_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
aerodep_flx_cycle_yr = 2000
srf_flux_avg = 1
use_crm_accel = .true.
crm_accel_factor = 2.
crm_accel_uv = .true.
eof

./case.setup

Change compiler flags to -O0

cd sp1vfast2d_baseline

Now change the gnu compiler flags to use -O0 when ("${DEBUG}" STREQUAL "FALSE") in Macros.cmake. This will change it from:

 70 if("${COMPILER}" STREQUAL "gnu")
 71   set(LDFLAGS "${LDFLAGS} -L$ENV{NETCDF_C_PATH}/lib -lnetcdf -L$ENV{NETCDF_FORTRAN_PATH}/lib -lnetcdff -L$ENV{ESSL_PATH}/lib64 -lessl -L$ENV{NETLIB_LAPACK_PATH}/lib -llapack")
 72   set(CPPDEFS "${CPPDEFS}  -DFORTRANUNDERSCORE -DNO_R16 -DCPRGNU")
 73   if("${compile_threaded}" STREQUAL "TRUE")
 74     set(CFLAGS "${CFLAGS}  -fopenmp")
 75   endif()
 76   if("${MODEL}" STREQUAL "csm_share")
 77     set(CFLAGS "${CFLAGS}  -std=c99")
 78   endif()
 79   if("${compile_threaded}" STREQUAL "TRUE")
 80     set(FFLAGS "${FFLAGS}  -fopenmp")
 81   endif()
 82   if("${DEBUG}" STREQUAL "TRUE")
 83     set(FFLAGS "${FFLAGS}  -g -Wall -Og -fbacktrace -fcheck=bounds -ffpe-trap=invalid,zero,overflow")
 84     set(CFLAGS "${CFLAGS}  -g -Wall -Og -fbacktrace -fcheck=bounds -ffpe-trap=invalid,zero,overflow")
 85   endif()
 86   if("${DEBUG}" STREQUAL "FALSE")
 87     set(FFLAGS "${FFLAGS}  -O")
 88     set(FFLAGS "${FFLAGS}  -O2")
 89     set(CFLAGS "${CFLAGS}  -O")
 90     set(CFLAGS "${CFLAGS}  -O2")
 91   endif()
 92   if("${MODEL}" STREQUAL "cism")
 93     set(CMAKE_OPTS "${CMAKE_OPTS}  -D CISM_GNU=ON")
 94   endif()
 95   if("${compile_threaded}" STREQUAL "TRUE")
 96     set(LDFLAGS "${LDFLAGS}  -fopenmp")
 97   endif()
 98   if("${MPILIB}" STREQUAL "!mpi-serial")
 99     set(LDFLAGS "${LDFLAGS}  -L$ENV{PNETCDF_PATH}/lib -lpnetcdf -L$ENV{HDF5_PATH}/lib -lhdf5_hl -lhdf5")
100   endif()
101 endif()

To:

 70 if("${COMPILER}" STREQUAL "gnu")
 71   set(LDFLAGS "${LDFLAGS} -L$ENV{NETCDF_C_PATH}/lib -lnetcdf -L$ENV{NETCDF_FORTRAN_PATH}/lib -lnetcdff -L$ENV{ESSL_PATH}/lib64 -lessl -L$ENV{NETLIB_LAPACK_PATH}/lib -llapack")
 72   set(CPPDEFS "${CPPDEFS}  -DFORTRANUNDERSCORE -DNO_R16 -DCPRGNU")
 73   if("${compile_threaded}" STREQUAL "TRUE")
 74     set(CFLAGS "${CFLAGS}  -fopenmp")
 75   endif()
 76   if("${MODEL}" STREQUAL "csm_share")
 77     set(CFLAGS "${CFLAGS}  -std=c99")
 78   endif()
 79   if("${compile_threaded}" STREQUAL "TRUE")
 80     set(FFLAGS "${FFLAGS}  -fopenmp")
 81   endif()
 82   if("${DEBUG}" STREQUAL "TRUE")
 83     set(FFLAGS "${FFLAGS}  -g -Wall -Og -fbacktrace -fcheck=bounds -ffpe-trap=invalid,zero,overflow")
 84     set(CFLAGS "${CFLAGS}  -g -Wall -Og -fbacktrace -fcheck=bounds -ffpe-trap=invalid,zero,overflow")
 85   endif()
 86   if("${DEBUG}" STREQUAL "FALSE")
 87     set(FFLAGS "${FFLAGS}  -O0")
 88     set(FFLAGS "${FFLAGS}  -O0")
 89     set(CFLAGS "${CFLAGS}  -O0")
 90     set(CFLAGS "${CFLAGS}  -O0")
 91   endif()
 92   if("${MODEL}" STREQUAL "cism")
 93     set(CMAKE_OPTS "${CMAKE_OPTS}  -D CISM_GNU=ON")
 94   endif()
 95   if("${compile_threaded}" STREQUAL "TRUE")
 96     set(LDFLAGS "${LDFLAGS}  -fopenmp")
 97   endif()
 98   if("${MPILIB}" STREQUAL "!mpi-serial")
 99     set(LDFLAGS "${LDFLAGS}  -L$ENV{PNETCDF_PATH}/lib -lpnetcdf -L$ENV{HDF5_PATH}/lib -lhdf5_hl -lhdf5")
100   endif()
101 endif()

Now cd ../sp1vfast3d_baseline and do the exact same thing.

Compile and run the -O0 code

cd ../sp1vfast2d_baseline; ./case.build; ./case.submit
cd ../sp1vfast3d_baseline; ./case.build; ./case.submit

Copy the -O0 baselines to the case directories

cd ../sp1vfast2d_baseline
cp /gpfs/alpine/scratch/$USER/$PROJ/e3sm/sp1vfast2d_baseline/run/sp1vfast2d_baseline.cam.r.0001-01-02-00000.nc \
   ./sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO0.nc
cd ../sp1vfasted_baseline
cp /gpfs/alpine/scratch/$USER/$PROJ/e3sm/sp1vfast3d_baseline/run/sp1vfast3d_baseline.cam.r.0001-01-02-00000.nc \
   ./sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO0.nc

Change compiler flags to -O3

Now go into the 2d and 3d directories, and change those same flags to -O3 instead of -O0

Clean, compile and run the -O3 code

cd ../sp1vfast2d_baseline
./case.build --clean-all
./case.build
./case.submit
cd ../sp1vfast3d_baseline
./case.build --clean-all
./case.build
./case.submit

Copy the -O3 baselines to the case directory

cd ../sp1vfast2d_baseline
cp /gpfs/alpine/scratch/$USER/$PROJ/e3sm/sp1vfast2d_baseline/run/sp1vfast2d_baseline.cam.r.0001-01-02-00000.nc \
   ./sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO3.nc
cd ../sp1vfasted_baseline
cp /gpfs/alpine/scratch/$USER/$PROJ/e3sm/sp1vfast3d_baseline/run/sp1vfast3d_baseline.cam.r.0001-01-02-00000.nc \
   ./sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO3.nc

Create a netcdf-capable python environment

If you do not already have a netcdf environment with the netcdf4 module, you'll need to create an anaconda environment with the following commands on Summit:

module load python/3.7.0-anaconda3-5.3.0
conda create -n netcdf python=3.7 openssl=1.1.1b numpy netcdf4 xarray
module unload python

You only need to create this once for all time, and from then on, you can load it with:

module load python/3.7.0-anaconda3-5.3.0
source activate netcdf

Note that you cannot have this loaded when you run ./case.* scripts in E3SM because it interferes with those python scripts and usually causes them to fail.

Verify your netcdf python environment, and verity that the -O0 and -O3 files are actually different

module load python/3.7.0-anaconda3-5.3.0
source activate netcdf
cd ../sp1vfast2d_baseline
python $E3SMHOME/cime/tools/nccmp/nccmp.py \
       ./sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO0.nc \
       ./sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO3.nc
cd ../sp1vfast3d_baseline
python $E3SMHOME/cime/tools/nccmp/nccmp.py \
       ./sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO0.nc \
       ./sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO3.nc
source deactivate
module rm python

Create the regression cases

The following script will create and setup the regression test cases for you. I call this script create_regressions.sh:

#!/bin/bash

E3SM_HOME=~/ACME-ECP
COMPILER=gnu
MACH=summit-cpu
PES=84x1
RES=ne4_ne4
PROJ=stf006

CASE=sp1vfast2d_regression
./create_newcase -compset FC5AV1C-H01A -case $CASE -compiler $COMPILER -mach $MACH -project $PROJ -pecount $PES -res $RES --handle-preexisting-dirs r || exit -1
cd $CASE
./xmlchange CAM_CONFIG_OPTS="-phys cam5 -use_SPCAM -crm_nx 16 -crm_ny 1 -crm_nx_rad 1 -crm_ny_rad 1 -crm_adv MPDATA -nlev 30 -crm_nz 28 -crm_dx 2000 -crm_dt 10 -microphys mg2 -SPCAM_microp_scheme sam1mom -cppdefs ' -DSP_DIR_NS -DSP_MCICA_RAD ' -rad rrtmg -chem none -pcols 16"
./xmlchange ATM_NCPL=96,STOP_N=1
./xmlchange CHARGE_ACCOUNT=$PROJ
cat > user_nl_cam << 'eof'
prescribed_aero_cycle_yr = 2000
prescribed_aero_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
prescribed_aero_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
use_hetfrz_classnuc = .false.
prescribed_aero_type = 'CYCLICAL'
aerodep_flx_type = 'CYCLICAL'
aerodep_flx_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
aerodep_flx_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
aerodep_flx_cycle_yr = 2000
srf_flux_avg = 1
use_crm_accel = .true.
crm_accel_factor = 2.
crm_accel_uv = .true.
eof

./case.setup

cd ..

CASE=sp1vfast3d_regression
./create_newcase -compset FC5AV1C-H01A -case $CASE -compiler $COMPILER -mach $MACH -project $PROJ -pecount $PES -res $RES --handle-preexisting-dirs r || exit -1
cd $CASE
./xmlchange CAM_CONFIG_OPTS="-phys cam5 -use_SPCAM -crm_nx 4 -crm_ny 4 -crm_nx_rad 1 -crm_ny_rad 1 -crm_adv MPDATA -nlev 30 -crm_nz 28 -crm_dx 2000 -crm_dt 10 -microphys mg2 -SPCAM_microp_scheme sam1mom -cppdefs ' -DSP_DIR_NS -DSP_MCICA_RAD ' -rad rrtmg -chem none -pcols 16"
./xmlchange ATM_NCPL=96,STOP_N=1
./xmlchange CHARGE_ACCOUNT=$PROJ
cat > user_nl_cam << 'eof'
prescribed_aero_cycle_yr = 2000
prescribed_aero_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
prescribed_aero_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
use_hetfrz_classnuc = .false.
prescribed_aero_type = 'CYCLICAL'
aerodep_flx_type = 'CYCLICAL'
aerodep_flx_datapath = '/gpfs/alpine/world-shared/csc190/e3sm/cesm/inputdata/atm/cam/chem/trop_mam/aero'
aerodep_flx_file = 'mam3_1.9x2.5_L30_2000clim_c130319.nc'
aerodep_flx_cycle_yr = 2000
srf_flux_avg = 1
use_crm_accel = .true.
crm_accel_factor = 2.
crm_accel_uv = .true.
eof

./case.setup

Change regression compiler flags to -O3

Once you run this script, you can then cd into the 2d and 3d regression case directories, and change Macros.cmake exactly like you did before to set the DEBUG=FALSE compiler flags to -O3 to make the regressions run faster.

Run the regressions, and check the answers

The following script will run the regressions and check the answer for you. You have the option to run them within an interactive batch job to ensure they run quickly, or you can submit them to the batch system because they have batch headers. I call this script run_regressions.sh

#!/bin/bash
#BSUB -P stf006
#BSUB -W 02:00
#BSUB -nnodes 1
#BSUB -J regression
#BSUB -o regdim23.%J
#BSUB -e regdim23.%J
#BSUB -alloc_flags smt2

date

# Set your netcdf-capable python env here
python_env=rrtmgp-env

source $MODULESHOME/init/bash
ulimit -s unlimited

# Run 2D or 3D or both
dim2=1
dim3=1

# Clean, build, and / or run the code
clean=0
build=1
submit=1

E3SM_HOME=~/ACME-ECP

cd $E3SM_HOME/cime/scripts

if [[ $dim2 -eq 1 ]]; then
  CASE=sp1vfast2d_regression
  if [ ! -d "$CASE" ]; then
    echo "************* ERROR: 2D CASE DOES NOT EXIST *************"
  else 
    cd $CASE
  fi
  if [[ $clean  -eq 1 ]]; then
    echo "************* CLEANING 2D CASE *************"
    ./case.build --clean-all
  fi
  if [[ $build  -eq 1 ]]; then
    echo "************* BUILDING 2D CASE *************"
    ./case.build  || exit -1
  fi
  if [[ $submit -eq 1 ]]; then
    echo "************* SUBMITTING 2D CASE *************"
    ./case.submit --no-batch || exit -1
    cp /gpfs/alpine/scratch/imn/stf006/e3sm/$CASE/run/$CASE.cam.r.0001-01-02-00000.nc .
  fi
  echo "************* DIFF'ING 2D *************"
  module add python/3.7.0-anaconda3-5.3.0
  source activate $python_env
  python $E3SM_HOME/cime/tools/nccmp/nccmp3.py $E3SM_HOME/cime/scripts/sp1vfast2d_baseline/sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO0.nc \
                                               $E3SM_HOME/cime/scripts/sp1vfast2d_baseline/sp1vfast2d_baseline.cam.r.0001-01-02-00000.optO3.nc \
                                               $E3SM_HOME/cime/scripts/sp1vfast2d_regression/sp1vfast2d_regression.cam.r.0001-01-02-00000.nc
  source deactivate
  module rm python
  echo ""
  
  cd ..
fi

if [[ $dim3 -eq 1 ]]; then
  CASE=sp1vfast3d_regression
  if [ ! -d "$CASE" ]; then
    echo "************* ERROR: 3D CASE DOES NOT EXIST *************"
  else 
    cd $CASE
  fi
  if [[ $clean  -eq 1 ]]; then
    echo "************* CLEANING 3D CASE *************"
    ./case.build --clean-all
  fi
  if [[ $build  -eq 1 ]]; then
    echo "************* BUILDING 3D CASE *************"
    ./case.build || exit -1
  fi
  if [[ $submit -eq 1 ]]; then
    echo "************* SUBMITTING 3D CASE *************"
    ./case.submit --no-batch || exit -1
    cp /gpfs/alpine/scratch/imn/stf006/e3sm/$CASE/run/$CASE.cam.r.0001-01-02-00000.nc .
  fi
  echo "************* DIFF'ING 3D *************"
  module add python/3.7.0-anaconda3-5.3.0
  source activate $python_env
  python $E3SM_HOME/cime/tools/nccmp/nccmp3.py $E3SM_HOME/cime/scripts/sp1vfast3d_baseline/sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO0.nc \
                                               $E3SM_HOME/cime/scripts/sp1vfast3d_baseline/sp1vfast3d_baseline.cam.r.0001-01-02-00000.optO3.nc \
                                               $E3SM_HOME/cime/scripts/sp1vfast3d_regression/sp1vfast3d_regression.cam.r.0001-01-02-00000.nc
  source deactivate
  module rm python
  echo ""
fi

date

The job output from this script will give you all of the diffs that are greater than the envelope. So from now on, all you need to do us run the run_regressions.sh script. I recommend not cleaning every time as it isn't usually necessary, and it increases the testing time.

You will have to run the run_regressions.sh script a couple of times due to bugs in the CIME infrastructure that improperly handle ./case.submit --no-batch (causing it to fail after the build phase in the submission phase), but after a couple of failures, the job will run correctly.