Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions .github/workflows/at2_gcc-cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ jobs:
build-type: ${{ fromJSON(needs.define_matrix.outputs.build_type) }}
fp-precision: ${{ fromJSON(needs.define_matrix.outputs.precision) }}
name: gcc-cuda / ${{ matrix.build-type }} - ${{ matrix.fp-precision }}
env:
# relative path to nvcc_wrapper from mam4xx root
haero_nvcw: "haero_src/ext/ekat/extern/kokkos/bin/nvcc_wrapper"
steps:
- name: Check out the repository
uses: actions/checkout@v4
Expand All @@ -79,7 +82,6 @@ jobs:
uses: actions/checkout@v4
with:
repository: eagles-project/haero
ref: 017fea932381777f48b2585f86d6ab48fe4b8d09
submodules: recursive
path: haero_src
- name: Show action trigger
Expand All @@ -100,49 +102,56 @@ jobs:
echo "H100 detected--setting Hopper90 architecture"
echo "Hopper=ON" >> $GITHUB_ENV
echo "CUDA_ARCH=90" >> $GITHUB_ENV
ARCH=90
echo "CUDA_DEVICE=HOPPER90" >> $GITHUB_ENV
;;
*"A100"*)
echo "A100 detected--setting Ampere80 architecture"
echo "Ampere=ON" >> $GITHUB_ENV
echo "CUDA_ARCH=80" >> $GITHUB_ENV
echo "CUDA_DEVICE=AMPERE80" >> $GITHUB_ENV
;;
*"V100"*)
echo "V100 detected--setting Volta70 architecture"
echo "Volta=ON" >> $GITHUB_ENV
echo "CUDA_ARCH=70" >> $GITHUB_ENV
echo "CUDA_DEVICE=VOLTA70" >> $GITHUB_ENV
;;
*)
echo "Unsupported GPU model: $gpu_model"
exit 1
;;
esac
- name: Set nvcc_wrapper Arch
run: |
nvcw="$(pwd)/${haero_nvcw}"
sed -i s/default_arch=\"sm_70\"/default_arch=\"sm_"$CUDA_ARCH"\"/g "${nvcw}"
echo "===================================="
grep -i "default_arch=" "${nvcw}"
- name: Building Haero (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
run: |
nvcw="$(pwd)/${haero_nvcw}"
cmake -S haero_src -B haero_build \
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DCMAKE_INSTALL_PREFIX="haero_install" \
-DCMAKE_C_COMPILER=gcc \
-DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_CXX_COMPILER="${nvcw}" \
-DHAERO_ENABLE_MPI=OFF \
-DHAERO_ENABLE_GPU=ON \
-DHAERO_PRECISION=${{ matrix.fp-precision }}
-DHAERO_PRECISION=${{ matrix.fp-precision }} \
-DKokkos_ARCH_$CUDA_DEVICE=ON \
-DHAERO_DEVICE_ARCH=$CUDA_DEVICE
cd haero_build
make -j
make install
- name: Set nvcc_wrapper Arch
run: |
sed -i s/default_arch=\"sm_70\"/default_arch=\"sm_"$CUDA_ARCH"\"/g `pwd`/haero_install/bin/nvcc_wrapper
echo "===================================="
grep -i "default_arch=" `pwd`/haero_install/bin/nvcc_wrapper
- name: Configuring MAM4xx (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
run: |
nvcw="$(pwd)/${haero_nvcw}"
cmake -S . -B build \
-DCMAKE_CXX_COMPILER=`pwd`/haero_install/bin/nvcc_wrapper \
-DCMAKE_CXX_COMPILER="${nvcw}" \
-DCMAKE_C_COMPILER=gcc \
-DCMAKE_INSTALL_PREFIX=`pwd`/install \
-DCMAKE_INSTALL_PREFIX=$(pwd)/install \
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DMAM4XX_HAERO_DIR=`pwd`/haero_install \
-DMAM4XX_HAERO_DIR=$(pwd)/haero_install \
-DNUM_VERTICAL_LEVELS=72 \
-DENABLE_COVERAGE=OFF \
-DENABLE_SKYWALKER=ON \
Expand All @@ -151,7 +160,7 @@ jobs:
- name: Building MAM4xx (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
run: |
cd build
make
make -j
- name: Running tests (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
run: |
cd build
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/gh_gcc-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ jobs:
prec: ${{ inputs.precision || 'ALL' }}
run: |
case ${{ env.prec }} in
"Debug")
"single")
echo 'precision=["single"]' >> "$GITHUB_OUTPUT" ;;
"Release")
"double")
echo 'precision=["double"]' >> "$GITHUB_OUTPUT" ;;
"ALL")
echo 'precision=["single", "double"]' >> "$GITHUB_OUTPUT" ;;
Expand Down Expand Up @@ -108,7 +108,6 @@ jobs:
uses: actions/checkout@v3
with:
repository: eagles-project/haero
ref: 017fea932381777f48b2585f86d6ab48fe4b8d09
submodules: recursive
path: haero_src

Expand All @@ -130,9 +129,9 @@ jobs:
- name: Configuring MAM4xx (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
run: |
cmake -S . -B build \
-DCMAKE_INSTALL_PREFIX=`pwd`/install \
-DCMAKE_INSTALL_PREFIX=$(pwd)/install \
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DMAM4XX_HAERO_DIR=`pwd`/haero_install \
-DMAM4XX_HAERO_DIR=$(pwd)/haero_install \
-DNUM_VERTICAL_LEVELS=72 \
-DENABLE_COVERAGE=ON \
-G "Unix Makefiles"
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ set(CLANG_FORMAT_VERSION 14)
# Set compilers, linkers, and flags from Haero.
set(CMAKE_CXX_STANDARD ${HAERO_CXX_STANDARD})
set(CMAKE_CXX_COMPILER ${HAERO_CXX_COMPILER})
set(CMAKE_CXX_FLAGS ${HAERO_CXX_FLAGS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HAERO_CXX_FLAGS}")

set(CMAKE_C_STANDARD ${HAERO_C_STANDARD})
set(CMAKE_C_COMPILER ${HAERO_C_COMPILER})
Expand Down
27 changes: 23 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,38 @@ of parameters. Check out the comments at the top of `build-haero.sh`.

You can build a CPU-capable version of HAERO with some defaults set by typing

```shell
./build-haero.sh <install-path>
```
./build-haero.sh <path>

The extended syntax to fully configure other types of build is

```shell
./build-haero.sh <install-path> <device> <precision> <build-type> [device-arch]
```

in which the options are:

```shell
device: {cpu, gpu}
precision: {single, double}
build-type: {Debug, Release}
```

where `<path>` is a directory to which HAERO will be installed. If you'd rather
install HAERO yourself, you can follow the instructions in the
and the semi-optional `device-arch` argument must correspond to
[those accepted by Kokkos](https://kokkos.org/kokkos-core-wiki/get-started/configuration-guide.html#architectures)
and is likely required for a properly-configured GPU build.
That is to say, the `device-arch` option **should** be set for GPU builds, and
building for GPU without that argument is unsupported.

If you'd rather install HAERO yourself, you can follow the instructions in the
[HAERO repository](https://github.com/eagles-project/haero). Make sure you run
all the steps, including `make install`.

If you're on a machine that requires modules to get access to compilers, etc,
use
```
source build-haero.sh <path>
source build-haero.sh <path> [...]
```
to make sure your environment is updated.

Expand Down
74 changes: 43 additions & 31 deletions build-haero.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,10 @@ PREFIX=$1
DEVICE=$2
PRECISION=$3
BUILD_TYPE=$4
# Turn off search for yaml libraries. EKAT will build yaml-cpp from submodules.
DEVICE_ARCH=$5
# Turn off search for yaml libraries. EKAT will build yaml-cpp from submodules.
SKIP_FIND_YAML_CPP=ON

# Default compilers (can be overridden by environment variables)
if [[ -z $CC ]]; then
CC=cc
fi
if [[ -z $CXX ]]; then
CXX=c++
fi

if [[ "$PREFIX" == "" ]]; then
echo "Haero installation prefix was not specified!"
echo "Usage: $0 <prefix> <device> <precision> <build_type>"
Expand Down Expand Up @@ -75,34 +68,51 @@ fi
echo "Cloning Haero repository into $(pwd)/.haero..."
git clone [email protected]:eagles-project/haero.git .haero || exit
cd .haero || exit
git checkout main
git submodule update --init --recursive || exit

# Are we on a special machine?
cd machines
echo $(pwd)
for MACHINE_FILE in $(ls)
do
MACHINE=${MACHINE_FILE/\.sh/}
echo $MACHINE
echo `hostname` | grep -q "$MACHINE"
host_match=$?
echo $SYSTEM_NAME | grep -q "$MACHINE"
sys_match=$?
if [ $host_match -eq 0 ] || [ $sys_match -eq 0 ]; then
echo "Found machine file $MACHINE_FILE. Setting up environment for $MACHINE..."
source ./$MACHINE.sh
fi
done

cd ../..

# Configure Haero with the given selections.
if [[ "$DEVICE" == "gpu" ]]; then
cd ..

# these are (at least close to) the standard compiler choices for gpu builds
# to use one, uncomment or replace in the if [] blocks below

# NVIDIA GPU + gcc
# NOTE: if CXX is set to nvcc_wrapper, then this must be the same path used
# in the `sed` command below
# This happens by default via the $nvcw variable
CXX="$(pwd)/.haero/ext/ekat/extern/kokkos/bin/nvcc_wrapper"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need build-haero to be independent of the GPU type. Perhaps we should pass a new input parameter for the GPU type.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah. originally meant that to be commented out. However, in line with your following comment, I did a big rewrite on build-haero, and it now works for all three types via command-line args. However, it does require an additional arg.

I won't say it's pretty, and it's getting to the point where it's feeling too complicated to be long-lived... but it does appear to be reasonably capable for the time being.

Tested on blake (nvidia H100), caraway (amd MI200), and aurora (intel ponte-vecchio), and things are mostly good. I did, however run into a few failing tests on AMD. Test names and output are below.

The following tests FAILED:
        512 - validate_chm_diags_ts_355 (Failed)
        544 - validate_gas_washout_merged (Failed)
        586 - validate_aer_rad_props_lw_ts_355 (Failed)
        588 - validate_aer_rad_props_sw_ts_355 (Failed)
Output from failing tests
Start testing: May 21 17:56 MDT
----------------------------------------------------------
512/646 Testing: validate_chm_diags_ts_355
512/646 Test: validate_chm_diags_ts_355
Command: "/projects/x86-64-zen-rocky8/utilities/python/3.10.12/gcc/8.5.0/base/jliu53k/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_chm_diags_ts_355.py" "mam_chm_diags_ts_355.py" "True" "9e-2"
Directory: /home/mjschm/cara_mam4xx/build/src/validation/mo_chm_diags
"validate_chm_diags_ts_355" start time: May 21 17:56 MDT
Output:
----------------------------------------------------------
area
L1 5.304647879e+93
L2 5.304647879e+93
Linf 5.304647879e+93

L1 rel_error 1.0
L2 rel_error 1.0
Linf rel_error 1.0
df_nhx
L1 0.0
L2 0.0
Linf 0.0
df_noy
L1 0.0
L2 0.0
Linf 0.0
df_sox
L1 2.1127595761198383e-12
L2 2.1127595761198383e-12
Linf 2.1127595761198383e-12
drymass
L1 5.3344978817229e+97
L2 8.124504007622111e+96
Linf 2.087694766e+96

L1 rel_error 25.55209683235322
L2 rel_error 3.8916148758606943
Linf rel_error 1.0
mass
L1 5.3634051141167e+97
L2 8.167922012785719e+96
Linf 2.100738982e+96

L1 rel_error 25.531040077194607
L2 rel_error 3.888118458681374
Linf rel_error 1.0
mass_bc
L1 9.60847203014767e-10
L2 1.620904641972315e-10
Linf 4.939643042681813e-11
mass_dst
L1 5.645219727811883e-10
L2 9.138351106323364e-11
Linf 3.0207107116600906e-11
mass_mom
L1 2.642731209640951e-10
L2 4.486074748302841e-11
Linf 1.131394327747087e-11
mass_ncl
L1 1.26390509960511e-08
L2 2.1489251095492187e-09
Linf 5.552468649288639e-10

mass_pom
L1 6.112871615165164e-09
L2 9.78679018295419e-10
Linf 2.7110255655831923e-10

mass_so4
L1 2.722059208483874e-08
L2 3.873118373798847e-09
Linf 8.841201006732738e-10

mass_soa
L1 4.294473015494709e-08
L2 6.028376698333038e-09
Linf 1.0898451258843572e-09

mmr_nhx
L1 0.0
L2 0.0
Linf 0.0
mmr_noy
L1 0.0
L2 0.0
Linf 0.0
mmr_sox
L1 1.8649615594912017e-08
L2 3.95657473703572e-09
Linf 1.7420350123242822e-09

net_chem
L1 8.704525419491872e+98
L2 1.6843361180595725e+98
Linf 6.728531663e+97

L1 rel_error 12.936738437834519
L2 rel_error 2.5032744176886137
Linf rel_error 1.0
ozone_col
L1 188.36638731991184
L2 188.36638731991184
Linf 188.36638731991184

L1 rel_error 0.4220976935224353
L2 rel_error 0.4220976935224353
Linf rel_error 0.4220976935224353
ozone_strat
L1 194.8069847646648
L2 194.8069847646648
Linf 194.8069847646648

L1 rel_error 0.45500197248116503
L2 rel_error 0.45500197248116503
Linf rel_error 0.45500197248116503
ozone_trop
L1 22.84425401524706
L2 22.84425401524706
Linf 22.84425401524706

L1 rel_error 0.4819257021003174
L2 rel_error 0.4819257021003174
Linf rel_error 0.4819257021003174
vmr_brox
L1 0.0
L2 0.0
Linf 0.0
vmr_broy
L1 0.0
L2 0.0
Linf 0.0
vmr_clox
L1 0.0
L2 0.0
Linf 0.0
vmr_cloy
L1 0.0
L2 0.0
Linf 0.0
vmr_nox
L1 0.0
L2 0.0
Linf 0.0
vmr_noy
L1 0.0
L2 0.0
Linf 0.0
vmr_tcly
L1 0.0
L2 0.0
Linf 0.0
final pass array = [False  True  True  True False False  True  True  True  True  True  True
  True  True  True  True False False False False  True  True  True  True
  True  True  True]
Traceback (most recent call last):
  File "/home/mjschm/cara_mam4xx/build/src/validation/mo_chm_diags/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.40 sec
----------------------------------------------------------
Test Failed.
"validate_chm_diags_ts_355" end time: May 21 17:56 MDT
"validate_chm_diags_ts_355" time elapsed: 00:00:00
----------------------------------------------------------

544/646 Testing: validate_gas_washout_merged
544/646 Test: validate_gas_washout_merged
Command: "/projects/x86-64-zen-rocky8/utilities/python/3.10.12/gcc/8.5.0/base/jliu53k/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_gas_washout_merged.py" "mam_gas_washout_merged.py" "True" "9e-8"
Directory: /home/mjschm/cara_mam4xx/build/src/validation/mo_sethet
"validate_gas_washout_merged" start time: May 21 17:56 MDT
Output:
----------------------------------------------------------
xgas
L1 8090334167.5109825
L2 566043812.1790568
Linf 91848861.27848816

L1 rel_error 0.3770747033609994
L2 rel_error 0.026382198577640813
Linf rel_error 0.004280896363924451
final pass array = [False]
Traceback (most recent call last):
  File "/home/mjschm/cara_mam4xx/build/src/validation/mo_sethet/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.26 sec
----------------------------------------------------------
Test Failed.
"validate_gas_washout_merged" end time: May 21 17:56 MDT
"validate_gas_washout_merged" time elapsed: 00:00:00
----------------------------------------------------------

586/646 Testing: validate_aer_rad_props_lw_ts_355
586/646 Test: validate_aer_rad_props_lw_ts_355
Command: "/projects/x86-64-zen-rocky8/utilities/python/3.10.12/gcc/8.5.0/base/jliu53k/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_aer_rad_props_lw_ts_355.py" "mam_aer_rad_props_lw_ts_355.py" "True" "7e-11"
Directory: /home/mjschm/cara_mam4xx/build/src/validation/aerosol_optics
"validate_aer_rad_props_lw_ts_355" start time: May 21 17:56 MDT
Output:
----------------------------------------------------------
odap_aer
L1 8.149720146521368e-11
L2 1.2350049868414222e-11
Linf 5.907174489144795e-12


L1 rel_error 1.5365303214624192e-07
qqcw
L1 0.004725936932030295
L2 0.0029179972368972426
Linf 0.0028033703565597534

L1 rel_error 6.392993991358082e-11
L2 rel_error 3.947310146237891e-11
Linf rel_error 3.792249050885764e-11
final pass array = [False  True]
Traceback (most recent call last):
  File "/home/mjschm/cara_mam4xx/build/src/validation/aerosol_optics/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.32 sec
----------------------------------------------------------
Test Failed.
"validate_aer_rad_props_lw_ts_355" end time: May 21 17:56 MDT
"validate_aer_rad_props_lw_ts_355" time elapsed: 00:00:00
----------------------------------------------------------

588/646 Testing: validate_aer_rad_props_sw_ts_355
588/646 Test: validate_aer_rad_props_sw_ts_355
Command: "/projects/x86-64-zen-rocky8/utilities/python/3.10.12/gcc/8.5.0/base/jliu53k/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_aer_rad_props_sw_ts_355.py" "mam_aer_rad_props_sw_ts_355.py" "True" "8e-11"
Directory: /home/mjschm/cara_mam4xx/build/src/validation/aerosol_optics
"validate_aer_rad_props_sw_ts_355" start time: May 21 17:56 MDT
Output:
----------------------------------------------------------
qqcw
L1 0.004725936932030295
L2 0.0029179972368972426
Linf 0.0028033703565597534

L1 rel_error 6.392993991358082e-11
L2 rel_error 3.947310146237891e-11
Linf rel_error 3.792249050885764e-11
tau
L1 5.680481426327253e-09
L2 8.136824862342645e-10
Linf 3.305621523036484e-10

L1 rel_error 4.1424822308460483e-07
L2 rel_error 5.933766854960738e-08
Linf rel_error 2.4106193460321693e-08
tau_w
L1 5.493229332899513e-09
L2 7.97048022772819e-10
Linf 3.2231628650791766e-10

L1 rel_error 5.9386263058373104e-09
L2 rel_error 8.616735381327773e-10
Linf rel_error 3.484500394680191e-10
tau_w_f
L1 1.8698060176701675e-09
L2 3.4550021959878085e-10
Linf 1.659684395124983e-10

L1 rel_error 2.5879668064638994e-09
L2 rel_error 4.782009959844717e-10
Linf rel_error 2.297141031314855e-10
tau_w_g
L1 3.2453379731259033e-09
L2 5.356132473482768e-10
Linf 2.3360460787991144e-10

L1 rel_error 3.8180446742657685e-09
L2 rel_error 6.301332321744433e-10
Linf rel_error 2.7482895044695463e-10
final pass array = [ True False False False False]
Traceback (most recent call last):
  File "/home/mjschm/cara_mam4xx/build/src/validation/aerosol_optics/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.39 sec
----------------------------------------------------------
Test Failed.
"validate_aer_rad_props_sw_ts_355" end time: May 21 17:56 MDT
"validate_aer_rad_props_sw_ts_355" time elapsed: 00:00:00
----------------------------------------------------------

End testing: May 21 17:56 MDT

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Late update--also some fails on aurora:

The following tests FAILED:
         19 - mode_averages (Failed)
         64 - validate_stand_modal_aero_calcsize_sub_update_ptend (Failed)
         66 - validate_stand_calcsize_aero_model_wetdep_ts_379 (Failed)
Output from tests failing on Aurora
Start testing: May 22 00:28 UTC
----------------------------------------------------------
19/646 Testing: mode_averages
19/646 Test: mode_averages
Command: "/usr/bin/sh" "-c" "/home/mjschm/mam4xx/build/bin/test-launcher -- ./mode_averages --use-colour no"
Directory: /home/mjschm/mam4xx/build/src/tests
"mode_averages" start time: May 22 00:28 UTC
Output:
----------------------------------------------------------
Calling initialize_kokkos
 ExecSpace name: SYCL
 ExecSpace initialized: yes
 active avx set: 
 compiler id: IntelLLVM
 FPE support is enabled, current FPE mask: 0 (NONE)
 #host threads: 1


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mode_averages is a Catch v2.13.8 host application.
Run with -? for options

-------------------------------------------------------------------------------
modal_averages
-------------------------------------------------------------------------------
/home/mjschm/mam4xx/src/tests/mode_averages_unit_tests.cpp:27
...............................................................................

/home/mjschm/mam4xx/src/tests/mode_averages_unit_tests.cpp:27: FAILED:
due to unexpected exception with message:
  The program was built for 1 devices
  Build program log for 'Intel(R) Data Center GPU Max 1550':
  Module <0x3d8fd10>:  Unresolved Symbol <nan>
  Module <0x3d8fd10>:  Unresolved Symbol <nan>

===============================================================================
test cases: 1 | 1 failed
assertions: 1 | 1 failed

EKAT is not managing resources.
RUN: OMP_PROC_BIND=spread OMP_PLACES=threads ./mode_averages --use-colour no
FROM: /home/mjschm/mam4xx/build/src/tests
<end of output>
Test time =   1.50 sec
----------------------------------------------------------
Test Failed.
"mode_averages" end time: May 22 00:28 UTC
"mode_averages" time elapsed: 00:00:01
----------------------------------------------------------

64/646 Testing: validate_stand_modal_aero_calcsize_sub_update_ptend
64/646 Test: validate_stand_modal_aero_calcsize_sub_update_ptend
Command: "/opt/aurora/24.347.0/spack/unified/0.9.2/install/linux-sles15-x86_64/gcc-13.3.0/python-venv-1.0-a4pusmc/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_stand_modal_aero_calcsize_sub_update_ptend.py" "mam_stand_modal_aero_calcsize_sub_update_ptend.py" "True" "3e-5"
Directory: /home/mjschm/mam4xx/build/src/validation/calcsize
"validate_stand_modal_aero_calcsize_sub_update_ptend" start time: May 22 00:28 UTC
Output:
----------------------------------------------------------
dgnumdry_m
L1 4.758090000438264e-12
L2 6.120652745099863e-13
Linf 1.8500000014169075e-13
ptend_q
L1 0.0016484514219205918
L2 0.0010593508120839688
Linf 0.0007477462949000001
L1 rel_error 0.0003181953256389337
L2 rel_error 0.000204483111928284
Linf rel_error 0.00014433508481784857
qqcw
L1 0.0
L2 0.0
Linf 0.0
final pass array = [ True False  True]
Traceback (most recent call last):
  File "/home/mjschm/mam4xx/build/src/validation/calcsize/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.13 sec
----------------------------------------------------------
Test Failed.
"validate_stand_modal_aero_calcsize_sub_update_ptend" end time: May 22 00:28 UTC
"validate_stand_modal_aero_calcsize_sub_update_ptend" time elapsed: 00:00:00
----------------------------------------------------------

66/646 Testing: validate_stand_calcsize_aero_model_wetdep_ts_379
66/646 Test: validate_stand_calcsize_aero_model_wetdep_ts_379
Command: "/opt/aurora/24.347.0/spack/unified/0.9.2/install/linux-sles15-x86_64/gcc-13.3.0/python-venv-1.0-a4pusmc/bin/python3" "compare_mam4xx_mam4.py" "mam4xx_stand_calcsize_aero_model_wetdep_ts_379.py" "mam_stand_calcsize_aero_model_wetdep_ts_379.py" "True" "1.5e-3"
Directory: /home/mjschm/mam4xx/build/src/validation/calcsize
"validate_stand_calcsize_aero_model_wetdep_ts_379" start time: May 22 00:28 UTC
Output:
----------------------------------------------------------
dgnumdry_m
L1 4.383719999403568e-12
L2 5.946998775934814e-13
Linf 1.860000004629556e-13
ptend_q
L1 14.419584504071418
L2 10.195808596672368
Linf 7.209525398
L1 rel_error 0.19365119759473054
L2 rel_error 0.1369270067826688
Linf rel_error 0.09682201501840247
qqcw
L1 91388.94299998647
L2 25167.88959375477
Linf 9074.523000000045
L1 rel_error 0.0010769036541811079
L2 rel_error 0.0002965718978886232
Linf rel_error 0.0001069318306772845
final pass array = [ True False  True]
Traceback (most recent call last):
  File "/home/mjschm/mam4xx/build/src/validation/calcsize/compare_mam4xx_mam4.py", line 136, in <module>
    assert(np.all(pass_all_tests))
AssertionError
<end of output>
Test time =   0.11 sec
----------------------------------------------------------
Test Failed.
"validate_stand_calcsize_aero_model_wetdep_ts_379" end time: May 22 00:28 UTC
"validate_stand_calcsize_aero_model_wetdep_ts_379" time elapsed: 00:00:00
----------------------------------------------------------

End testing: May 22 00:28 UTC

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can take a look at the tests that are failing in Aurora. I wonder if we should merge this PR @singhbalwinder @jeff-cohere . @mjschmidt271, can you create an issue for the failing tests in Aurora? For the tests in AMD, I have PR 442 that fixes a few tests for Frontier. For Caraway, I will need to build and run the tests on this machine.

nvcw=$CXX
CC=gcc

if [[ "$DEVICE" == "gpu" && ! -z "$nvcw" ]]; then
ENABLE_GPU=ON
# nvcw="$(pwd)/.haero/ext/ekat/extern/kokkos/bin/nvcc_wrapper"
CUDA_GEN=${DEVICE_ARCH:(-2)}
# FIXME: this assumes a default value in nvcc_wrapper
sed -i s/default_arch=\"sm_70\"/default_arch=\"sm_"$CUDA_GEN"\"/g "${nvcw}"
echo "===================================="
grep -i "default_arch=" "${nvcw}"
echo "===================================="
else
ENABLE_GPU=OFF
fi

# AMD GPU
# CXX=hipcc
# CC=amdclang

# Intel GPU
# CXX=icpx
# CC=icx

# Default compilers (can be overridden by environment variables)
if [[ -z $CC ]]; then
CC=cc
fi
if [[ -z $CXX ]]; then
CXX=c++
fi

echo "Configuring Haero with the given selections (WITHOUT MPI)..."
cmake -S ./.haero -B ./.haero/build \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
Expand All @@ -113,11 +123,13 @@ cmake -S ./.haero -B ./.haero/build \
-DHAERO_ENABLE_MPI=OFF \
-DHAERO_ENABLE_GPU=$ENABLE_GPU \
-DHAERO_PRECISION=$PRECISION \
-DKokkos_ARCH_$DEVICE_ARCH:BOOL=ON \
-DHAERO_DEVICE_ARCH=$DEVICE_ARCH \
|| exit

echo "Building and installing Haero in $PREFIX..."
cd .haero/build || exit
make -j8 install
make -j8 install || exit

cd ../../
echo "Haero has been installed in $PREFIX. Set HAERO_DIR to this directory in"
Expand Down
2 changes: 0 additions & 2 deletions setup
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ cmake \
-DMAM4XX_HAERO_DIR=\$HAERO_DIR \
-DNUM_VERTICAL_LEVELS=\$NUM_VERTICAL_LEVELS \
-DENABLE_SKYWALKER=ON \
-DCMAKE_CUDA_ARCHITECTURES=80 \
\$OPTIONS \
-G "\$GENERATOR" \
\$SOURCE_DIR
Expand All @@ -120,4 +119,3 @@ echo " 1. cd $1"
echo " 2. Edit config.sh"
echo " 3. ./config.sh"
echo " 4. Build using 'make -j'."

26 changes: 22 additions & 4 deletions src/mam4xx/conversions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,28 @@ KOKKOS_INLINE_FUNCTION Real saturation_mixing_ratio_hardy(Real T, Real P) {
/// the temperature. If not supplied,
/// @ref saturation_mixing_ratio_hardy is used.
/// @return relative humidity [1]
KOKKOS_INLINE_FUNCTION Real relative_humidity_from_vapor_mixing_ratio(
Real w, Real T, Real p,
Real (*wsat)(Real, Real) = saturation_mixing_ratio_hardy) {
const auto ws = wsat(T, p);
// KOKKOS_INLINE_FUNCTION Real relative_humidity_from_vapor_mixing_ratio(
// Real w, Real T, Real p,
// Real (*wsat)(Real, Real) = saturation_mixing_ratio_hardy) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also noted a compilation error in Aurora because of this function. However, we have compiled MAM4xx in EAMxx with SYCL, and this error did not show up. Thus, I wonder if we are only using relative_humidity_from_vapor_mixing_ratio in MAM4xx.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would make sense to me. However, unless we have reason for the flexibility to be available, I think we're ok without it.

@singhbalwinder - do you know if this calculation would ever need a different calculation for saturation mixing ratio?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just keep this commented out until we find out that we need it.

// const auto ws = wsat(T, p);
// return w / ws;
// }

/// Computes the relative humidity from the water vapor mixing ratio and the
/// pressure and temperature, given the relationship between temperature and
/// the water vapor saturation pressure.
///
/// Use this formula with parameterizations that are defined with respect to
/// air air (and note that mixing ratio is defined with respect to dry air).
///
/// @param [in] w water vapor mixing ratio [kg vapor / kg dry air]
/// @param [in] p total pressure [Pa]
/// @param [in] T temperature [K]
/// @return relative humidity [1]
KOKKOS_INLINE_FUNCTION Real relative_humidity_from_vapor_mixing_ratio(Real w,
Real T,
Real p) {
const auto ws = saturation_mixing_ratio_hardy(T, p);
return w / ws;
}

Expand Down