From 0f372fba5bfb00d5a9214af455bc3cd42823c4af Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Tue, 16 Sep 2025 17:24:30 -0600 Subject: [PATCH 1/6] EAMxx: fix sync of inputs/outputs in cld fraction when using python --- .../cld_fraction/eamxx_cld_fraction_process_interface.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp index 9c62a5158548..e195c186409b 100644 --- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp +++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp @@ -113,6 +113,7 @@ void CldFraction::run_impl (const double /* dt */) // Sync input to host liq_cld_frac.sync_to_host(); + qi.sync_to_host(); double ice_threshold = m_params.get("ice_cloud_threshold"); double ice_4out_threshold = m_params.get("ice_cloud_for_analysis_threshold"); @@ -127,8 +128,6 @@ void CldFraction::run_impl (const double /* dt */) } // Sync outputs to dev - qi.sync_to_dev(); - liq_cld_frac.sync_to_dev(); ice_cld_frac.sync_to_dev(); tot_cld_frac.sync_to_dev(); ice_cld_frac_4out.sync_to_dev(); From 250761bc32d40a78c17191ee6b778e40cbb6cac5 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 17 Sep 2025 15:41:49 -0600 Subject: [PATCH 2/6] EAMxx: simplify interface to call a fcn from py module in atm processes --- .../eamxx_cld_fraction_process_interface.cpp | 23 +++++-------------- .../atmosphere_process_pyhelpers.hpp | 9 ++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp index e195c186409b..2d4285f71b1b 100644 --- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp +++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp @@ -77,15 +77,7 @@ void CldFraction::initialize_impl (const RunType /* run_type */) add_postcondition_check(get_field_out("cldfrac_tot_for_analysis"),m_grid,0.0,1.0,false); #ifdef EAMXX_HAS_PYTHON if (has_py_module()) { - try { - py_module_call("init"); - } catch (const pybind11::error_already_set& e) { - std::cout << "[CldFraction::initialize_impl] Error! Something went wrong while calling the python module's function 'init'.\n" - " - module name: " + m_params.get("py_module_name") + "\n" - " - pybind11 error: " + std::string(e.what()) + "\n"; - throw e; - } - + py_module_call("init"); } #endif } @@ -118,14 +110,11 @@ void CldFraction::run_impl (const double /* dt */) double ice_threshold = m_params.get("ice_cloud_threshold"); double ice_4out_threshold = m_params.get("ice_cloud_for_analysis_threshold"); - try { - py_module_call("main",ice_threshold,ice_4out_threshold,py_qi,py_liq_cld_frac,py_ice_cld_frac,py_tot_cld_frac,py_ice_cld_frac_4out,py_tot_cld_frac_4out); - } catch (const pybind11::error_already_set& e) { - std::cout << "[CldFraction::run_impl] Error! Something went wrong while calling the python module's function 'main'.\n" - " - module name: " + m_params.get("py_module_name") + "\n" - " - pybind11 error: " + std::string(e.what()) + "\n"; - throw e; - } + py_module_call("main", + ice_threshold,ice_4out_threshold, + py_qi,py_liq_cld_frac, + py_ice_cld_frac,py_tot_cld_frac, + py_ice_cld_frac_4out,py_tot_cld_frac_4out); // Sync outputs to dev ice_cld_frac.sync_to_dev(); diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp b/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp index e824e2e6525c..231482404540 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp @@ -17,6 +17,15 @@ py_module_call (const std::string& name, const Args&... args) { const auto& py_module = std::any_cast(m_py_module); py_module.attr(name.c_str())(args...); + try { + py_module.attr(name.c_str())(args...); + } catch (const pybind11::error_already_set& e) { + std::cout << "[" << this->name() << "] Error! Something went wrong while calling a python module function.\n" + " - module name: " + m_params.get("py_module_name") + "\n" + " - function name: " + name + "\n" + " - pybind11 error: " + std::string(e.what()) + "\n"; + throw e; + } } inline const pybind11::array& AtmosphereProcess:: From 1643eec2ab2c139ca54e0d66fee2d2b1aacd08dc Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 17 Sep 2025 15:43:00 -0600 Subject: [PATCH 3/6] EAMxx: make cld_fraction py test work with both numpy and cupy --- .../physics/cld_fraction/cld_fraction_cupy.py | 46 +++++++++++++++++++ ...{cld_fraction.py => cld_fraction_numpy.py} | 0 .../eamxx_cld_fraction_process_interface.cpp | 46 ++++++++++++------- .../cld_fraction/CMakeLists.txt | 40 ++++++++++++++-- .../single-process/cld_fraction/input.yaml | 1 + 5 files changed, 112 insertions(+), 21 deletions(-) create mode 100644 components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py rename components/eamxx/src/physics/cld_fraction/{cld_fraction.py => cld_fraction_numpy.py} (100%) diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py new file mode 100644 index 000000000000..5898f778a592 --- /dev/null +++ b/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py @@ -0,0 +1,46 @@ +import cupy as cp +import numpy as np + +# Any initialization step can be done here +# This method is called during CldFraction::initialize_impl +def init (): + pass + +######################################################### +def get_cu_array(np_arr): +######################################################### + shape = np_arr.shape + dtype = np_arr.dtype + ptr = np_arr.__array_interface__['data'][0] + strides = np_arr.strides + + # The exact size here does not really matter, as we are just creating an + # unmanaged mem block, of which we then simply grab the start address. + # Still, use the correct size for code clarity + size = shape[0]*strides[0] + mem = cp.cuda.UnownedMemory(ptr=ptr,owner=None,size=size) + memptr = cp.cuda.MemoryPointer(mem, 0) + + return cp.ndarray(shape=shape,dtype=dtype,memptr=memptr,strides=strides) + +######################################################### +def main (ice_threshold, ice_4out_threshold, + qi, liq_cld_frac, + ice_cld_frac, tot_cld_frac, + ice_cld_frac_4out, tot_cld_frac_4out): +######################################################### + + cu_qi = get_cu_array(qi) + cu_liq_cld_frac = get_cu_array(liq_cld_frac) + cu_ice_cld_frac = get_cu_array(ice_cld_frac) + cu_tot_cld_frac = get_cu_array(tot_cld_frac) + cu_ice_cld_frac_4out = get_cu_array(ice_cld_frac_4out) + cu_tot_cld_frac_4out = get_cu_array(tot_cld_frac_4out) + + cu_ice_cld_frac[:] = 0 + cu_ice_cld_frac_4out[:] = 0 + cu_ice_cld_frac[cu_qi > ice_threshold] = 1 + cu_ice_cld_frac_4out[cu_qi > ice_4out_threshold] = 1 + + cp.maximum(cu_ice_cld_frac,cu_liq_cld_frac, out=cu_tot_cld_frac) + cp.maximum(cu_ice_cld_frac_4out,cu_liq_cld_frac,out=cu_tot_cld_frac_4out) diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py similarity index 100% rename from components/eamxx/src/physics/cld_fraction/cld_fraction.py rename to components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp index 2d4285f71b1b..3eded911a339 100644 --- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp +++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp @@ -95,17 +95,30 @@ void CldFraction::run_impl (const double /* dt */) auto tot_cld_frac_4out = get_field_out("cldfrac_tot_for_analysis"); #ifdef EAMXX_HAS_PYTHON if (has_py_module()) { - // For now, we run Python code only on CPU - const auto& py_qi = get_py_field_host("qi"); - const auto& py_liq_cld_frac = get_py_field_host("cldfrac_liq"); - const auto& py_ice_cld_frac = get_py_field_host("cldfrac_ice"); - const auto& py_tot_cld_frac = get_py_field_host("cldfrac_tot"); - const auto& py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis"); - const auto& py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis"); - - // Sync input to host - liq_cld_frac.sync_to_host(); - qi.sync_to_host(); + pybind11::array py_qi, + py_liq_cld_frac, + py_ice_cld_frac, + py_tot_cld_frac, + py_ice_cld_frac_4out, + py_tot_cld_frac_4out; + + if (m_params.get("py_backend")=="device") { + py_qi = get_py_field_dev("qi"); + py_liq_cld_frac = get_py_field_dev("cldfrac_liq"); + py_ice_cld_frac = get_py_field_dev("cldfrac_ice"); + py_tot_cld_frac = get_py_field_dev("cldfrac_tot"); + py_ice_cld_frac_4out = get_py_field_dev("cldfrac_ice_for_analysis"); + py_tot_cld_frac_4out = get_py_field_dev("cldfrac_tot_for_analysis"); + } else { + qi.sync_to_host(); + liq_cld_frac.sync_to_host(); + py_qi = get_py_field_host("qi"); + py_liq_cld_frac = get_py_field_host("cldfrac_liq"); + py_ice_cld_frac = get_py_field_host("cldfrac_ice"); + py_tot_cld_frac = get_py_field_host("cldfrac_tot"); + py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis"); + py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis"); + } double ice_threshold = m_params.get("ice_cloud_threshold"); double ice_4out_threshold = m_params.get("ice_cloud_for_analysis_threshold"); @@ -116,11 +129,12 @@ void CldFraction::run_impl (const double /* dt */) py_ice_cld_frac,py_tot_cld_frac, py_ice_cld_frac_4out,py_tot_cld_frac_4out); - // Sync outputs to dev - ice_cld_frac.sync_to_dev(); - tot_cld_frac.sync_to_dev(); - ice_cld_frac_4out.sync_to_dev(); - tot_cld_frac_4out.sync_to_dev(); + if (m_params.get("py_backend")=="host") { + ice_cld_frac.sync_to_dev(); + tot_cld_frac.sync_to_dev(); + ice_cld_frac_4out.sync_to_dev(); + tot_cld_frac_4out.sync_to_dev(); + } } else #endif { diff --git a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt index 07755b892fdd..cf95c7271c2e 100644 --- a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt +++ b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt @@ -20,10 +20,14 @@ CreateADUnitTest(cld_fraction_standalone FIXTURES_SETUP cldfrac_cpp) if (EAMXX_ENABLE_PYTHON) + include (BuildCprnc) + BuildCprnc() + # Configure yaml files to run directory set (POSTFIX py) - set (PY_MODULE_NAME "cld_fraction") + set (PY_MODULE_NAME "cld_fraction_numpy") set (PY_MODULE_PATH ${SCREAM_SOURCE_DIR}/src/physics/cld_fraction) + set (PY_BACKEND "host") configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml ${CMAKE_CURRENT_BINARY_DIR}/input_py.yaml) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml @@ -35,10 +39,7 @@ if (EAMXX_ENABLE_PYTHON) LABELS cld_fraction physics FIXTURES_SETUP cldfrac_py) - # Finally, compare output of the two tests - include (BuildCprnc) - BuildCprnc() - + # Compare output of py and cpp tests set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc") set (TGT_FILE "cldfrac_standalone_output_py.INSTANT.nsteps_x1.np1.${RUN_T0}.nc") set (TEST_NAME cldfrac_standalone_cpp_vs_py) @@ -49,6 +50,35 @@ if (EAMXX_ENABLE_PYTHON) LABELS "cldfrac;infrastructure" FIXTURES_REQUIRED "cldfrac_py;cldfrac_cpp") + if (Kokkos_ENABLE_CUDA) + # Also run with cupy instead of numpy + set (PY_MODULE_NAME "cld_fraction_cupy") + set (PY_BACKEND "device") + set (POSTFIX "cupy") + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml + ${CMAKE_CURRENT_BINARY_DIR}/input_cupy.yaml) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml + ${CMAKE_CURRENT_BINARY_DIR}/output_cupy.yaml) + + # Test the process with cupy impl + CreateUnitTestFromExec(cld_fraction_standalone_cupy cld_fraction_standalone + EXE_ARGS "--args -ifile=input_cupy.yaml" + LABELS cld_fraction physics + FIXTURES_SETUP cldfrac_cupy) + + # Compare output of cupy and cpp tests + set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc") + set (TGT_FILE "cldfrac_standalone_output_cupy.INSTANT.nsteps_x1.np1.${RUN_T0}.nc") + set (TEST_NAME cldfrac_standalone_cpp_vs_cupy) + add_test (NAME ${TEST_NAME} + COMMAND cmake -P ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake ${SRC_FILE} ${TGT_FILE} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + set_tests_properties(${TEST_NAME} PROPERTIES + LABELS "cldfrac;infrastructure" + FIXTURES_REQUIRED "cldfrac_cupy;cldfrac_cpp") + + endif() + # Run an ml emulator for cld-fraction set (PY_MODULE_NAME "cld_fraction_ml") set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/components/eamxx/tests/single-process/cld_fraction/input.yaml b/components/eamxx/tests/single-process/cld_fraction/input.yaml index aefc28142821..ca7987b2177c 100644 --- a/components/eamxx/tests/single-process/cld_fraction/input.yaml +++ b/components/eamxx/tests/single-process/cld_fraction/input.yaml @@ -15,6 +15,7 @@ eamxx: ice_cloud_for_analysis_threshold: 1e-5 py_module_name: ${PY_MODULE_NAME} py_module_path: ${PY_MODULE_PATH} + py_backend: ${PY_BACKEND} grids_manager: type: mesh_free From d0443f1c63dc7659bdddc537013c2af6cae9b715 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 17 Sep 2025 16:16:24 -0600 Subject: [PATCH 4/6] EAMxx: enable python tests on ghci-snl-cuda CI machine --- components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake b/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake index 83d1d20c6279..40d1756370e2 100644 --- a/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake +++ b/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake @@ -15,3 +15,7 @@ option(SCREAM_MPI_ON_DEVICE "Whether to use device pointers for MPI calls" OFF) # Currently, we have 2 GPUs/node on Blake, and we run a SINGLE build per node, so we can fit 2 ranks there set(SCREAM_TEST_MAX_RANKS 2 CACHE STRING "Upper limit on ranks for mpi tests") + +# Enable python tests +option (EAMXX_ENABLE_PYTHON "Whether to enable python interface from eamxx" ON) +set (Python_EXECUTABLE "/usr/bin/python3" CACHE STRING "") From 4b4a7e5f1ea0ddd8641b148d1731aaa8294f74b2 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Mon, 27 Oct 2025 10:06:35 -0600 Subject: [PATCH 5/6] EAMxx: fix pyfield test on GPU --- components/eamxx/src/share/field/tests/pyfield.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/components/eamxx/src/share/field/tests/pyfield.cpp b/components/eamxx/src/share/field/tests/pyfield.cpp index 9b9f93c8e0a6..63fca8945178 100644 --- a/components/eamxx/src/share/field/tests/pyfield.cpp +++ b/components/eamxx/src/share/field/tests/pyfield.cpp @@ -54,6 +54,7 @@ TEST_CASE("pyfield", "") { auto f_py = create_py_field(f1); py_mod.attr("set_iota")(f_py); + f1.sync_to_dev(); } REQUIRE (views_are_equal(f1,f2)); From 93b13243290e0b6e77bdc8bb551011a8d33859a8 Mon Sep 17 00:00:00 2001 From: Luca Bertagna Date: Wed, 12 Nov 2025 21:06:26 -0700 Subject: [PATCH 6/6] EAMxx: run cldfrac emulator only on CPU --- .../cld_fraction/CMakeLists.txt | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt index cf95c7271c2e..a9eeac782399 100644 --- a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt +++ b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt @@ -79,18 +79,20 @@ if (EAMXX_ENABLE_PYTHON) endif() - # Run an ml emulator for cld-fraction - set (PY_MODULE_NAME "cld_fraction_ml") - set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) - set (POSTFIX pyml) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml - ${CMAKE_CURRENT_BINARY_DIR}/input_pyml.yaml) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml - ${CMAKE_CURRENT_BINARY_DIR}/output_pyml.yaml) + if (NOT Kokkos_ENABLE_CUDA) + # Run an ml emulator for cld-fraction + set (PY_MODULE_NAME "cld_fraction_ml") + set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) + set (POSTFIX pyml) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml + ${CMAKE_CURRENT_BINARY_DIR}/input_pyml.yaml) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml + ${CMAKE_CURRENT_BINARY_DIR}/output_pyml.yaml) - # Test the process with python ml emulator - CreateUnitTestFromExec(cld_fraction_standalone_pyml cld_fraction_standalone - EXE_ARGS "--args -ifile=input_pyml.yaml" - LABELS cld_fraction physics - FIXTURES_SETUP cldfrac_pyml) + # Test the process with python ml emulator + CreateUnitTestFromExec(cld_fraction_standalone_pyml cld_fraction_standalone + EXE_ARGS "--args -ifile=input_pyml.yaml" + LABELS cld_fraction physics + FIXTURES_SETUP cldfrac_pyml) + endif() endif()