From 0f372fba5bfb00d5a9214af455bc3cd42823c4af Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Tue, 16 Sep 2025 17:24:30 -0600
Subject: [PATCH 1/6] EAMxx: fix sync of inputs/outputs in cld fraction when
 using python

---
 .../cld_fraction/eamxx_cld_fraction_process_interface.cpp      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
index 9c62a5158548..e195c186409b 100644
--- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
+++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
@@ -113,6 +113,7 @@ void CldFraction::run_impl (const double /* dt */)
 
     // Sync input to host
     liq_cld_frac.sync_to_host();
+    qi.sync_to_host();
 
     double ice_threshold      = m_params.get<double>("ice_cloud_threshold");
     double ice_4out_threshold = m_params.get<double>("ice_cloud_for_analysis_threshold");
@@ -127,8 +128,6 @@ void CldFraction::run_impl (const double /* dt */)
     }
 
     // Sync outputs to dev
-    qi.sync_to_dev();
-    liq_cld_frac.sync_to_dev();
     ice_cld_frac.sync_to_dev();
     tot_cld_frac.sync_to_dev();
     ice_cld_frac_4out.sync_to_dev();

From 250761bc32d40a78c17191ee6b778e40cbb6cac5 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Wed, 17 Sep 2025 15:41:49 -0600
Subject: [PATCH 2/6] EAMxx: simplify interface to call a fcn from py module in
 atm processes

---
 .../eamxx_cld_fraction_process_interface.cpp  | 23 +++++--------------
 .../atmosphere_process_pyhelpers.hpp          |  9 ++++++++
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
index e195c186409b..2d4285f71b1b 100644
--- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
+++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
@@ -77,15 +77,7 @@ void CldFraction::initialize_impl (const RunType /* run_type */)
   add_postcondition_check<Interval>(get_field_out("cldfrac_tot_for_analysis"),m_grid,0.0,1.0,false);
 #ifdef EAMXX_HAS_PYTHON
   if (has_py_module()) {
-    try {
-      py_module_call("init");
-    } catch (const pybind11::error_already_set& e) {
-      std::cout << "[CldFraction::initialize_impl] Error! Something went wrong while calling the python module's function 'init'.\n"
-                   " - module name: " + m_params.get<std::string>("py_module_name") + "\n"
-                   " - pybind11 error: " + std::string(e.what()) + "\n";
-      throw e;
-    }
-
+    py_module_call("init");
   }
 #endif
 }
@@ -118,14 +110,11 @@ void CldFraction::run_impl (const double /* dt */)
     double ice_threshold      = m_params.get<double>("ice_cloud_threshold");
     double ice_4out_threshold = m_params.get<double>("ice_cloud_for_analysis_threshold");
 
-    try {
-      py_module_call("main",ice_threshold,ice_4out_threshold,py_qi,py_liq_cld_frac,py_ice_cld_frac,py_tot_cld_frac,py_ice_cld_frac_4out,py_tot_cld_frac_4out);
-    } catch (const pybind11::error_already_set& e) {
-      std::cout << "[CldFraction::run_impl] Error! Something went wrong while calling the python module's function 'main'.\n"
-                   " - module name: " + m_params.get<std::string>("py_module_name") + "\n"
-                   " - pybind11 error: " + std::string(e.what()) + "\n";
-      throw e;
-    }
+    py_module_call("main",
+                   ice_threshold,ice_4out_threshold,
+                   py_qi,py_liq_cld_frac,
+                   py_ice_cld_frac,py_tot_cld_frac,
+                   py_ice_cld_frac_4out,py_tot_cld_frac_4out);
 
     // Sync outputs to dev
     ice_cld_frac.sync_to_dev();
diff --git a/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp b/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp
index e824e2e6525c..231482404540 100644
--- a/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp
+++ b/components/eamxx/src/share/atm_process/atmosphere_process_pyhelpers.hpp
@@ -17,6 +17,15 @@ py_module_call (const std::string& name, const Args&... args)
 {
   const auto& py_module = std::any_cast<const pybind11::module&>(m_py_module);
   py_module.attr(name.c_str())(args...);
+  try {
+    py_module.attr(name.c_str())(args...);
+  } catch (const pybind11::error_already_set& e) {
+    std::cout << "[" << this->name() << "] Error! Something went wrong while calling a python module function.\n"
+                 " - module name: " + m_params.get<std::string>("py_module_name") + "\n"
+                 " - function name: " + name + "\n"
+                 " - pybind11 error: " + std::string(e.what()) + "\n";
+    throw e;
+  }
 }
 
 inline const pybind11::array& AtmosphereProcess::

From 1643eec2ab2c139ca54e0d66fee2d2b1aacd08dc Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Wed, 17 Sep 2025 15:43:00 -0600
Subject: [PATCH 3/6] EAMxx: make cld_fraction py test work with both numpy and
 cupy

---
 .../physics/cld_fraction/cld_fraction_cupy.py | 46 +++++++++++++++++++
 ...{cld_fraction.py => cld_fraction_numpy.py} |  0
 .../eamxx_cld_fraction_process_interface.cpp  | 46 ++++++++++++-------
 .../cld_fraction/CMakeLists.txt               | 40 ++++++++++++++--
 .../single-process/cld_fraction/input.yaml    |  1 +
 5 files changed, 112 insertions(+), 21 deletions(-)
 create mode 100644 components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py
 rename components/eamxx/src/physics/cld_fraction/{cld_fraction.py => cld_fraction_numpy.py} (100%)

diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py
new file mode 100644
index 000000000000..5898f778a592
--- /dev/null
+++ b/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py
@@ -0,0 +1,46 @@
+import cupy as cp
+import numpy as np
+
+# Any initialization step can be done here
+# This method is called during CldFraction::initialize_impl
+def init ():
+    pass
+
+#########################################################
+def get_cu_array(np_arr):
+#########################################################
+    shape   = np_arr.shape
+    dtype   = np_arr.dtype
+    ptr     = np_arr.__array_interface__['data'][0]
+    strides = np_arr.strides
+
+    # The exact size here does not really matter, as we are just creating an
+    # unmanaged mem block, of which we then simply grab the start address.
+    # Still, use the correct size for code clarity
+    size = shape[0]*strides[0]
+    mem = cp.cuda.UnownedMemory(ptr=ptr,owner=None,size=size)
+    memptr = cp.cuda.MemoryPointer(mem, 0)
+
+    return cp.ndarray(shape=shape,dtype=dtype,memptr=memptr,strides=strides)
+
+#########################################################
+def main (ice_threshold, ice_4out_threshold,
+          qi, liq_cld_frac,
+          ice_cld_frac, tot_cld_frac,
+          ice_cld_frac_4out, tot_cld_frac_4out):
+#########################################################
+
+    cu_qi = get_cu_array(qi)
+    cu_liq_cld_frac = get_cu_array(liq_cld_frac)
+    cu_ice_cld_frac = get_cu_array(ice_cld_frac)
+    cu_tot_cld_frac = get_cu_array(tot_cld_frac)
+    cu_ice_cld_frac_4out = get_cu_array(ice_cld_frac_4out)
+    cu_tot_cld_frac_4out = get_cu_array(tot_cld_frac_4out)
+
+    cu_ice_cld_frac[:] = 0
+    cu_ice_cld_frac_4out[:] = 0
+    cu_ice_cld_frac[cu_qi > ice_threshold] = 1
+    cu_ice_cld_frac_4out[cu_qi > ice_4out_threshold] = 1
+
+    cp.maximum(cu_ice_cld_frac,cu_liq_cld_frac, out=cu_tot_cld_frac)
+    cp.maximum(cu_ice_cld_frac_4out,cu_liq_cld_frac,out=cu_tot_cld_frac_4out)
diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py
similarity index 100%
rename from components/eamxx/src/physics/cld_fraction/cld_fraction.py
rename to components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py
diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
index 2d4285f71b1b..3eded911a339 100644
--- a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
+++ b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
@@ -95,17 +95,30 @@ void CldFraction::run_impl (const double /* dt */)
   auto tot_cld_frac_4out = get_field_out("cldfrac_tot_for_analysis");
 #ifdef EAMXX_HAS_PYTHON
   if (has_py_module()) {
-    // For now, we run Python code only on CPU
-    const auto& py_qi                = get_py_field_host("qi");
-    const auto& py_liq_cld_frac      = get_py_field_host("cldfrac_liq");
-    const auto& py_ice_cld_frac      = get_py_field_host("cldfrac_ice");
-    const auto& py_tot_cld_frac      = get_py_field_host("cldfrac_tot");
-    const auto& py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis");
-    const auto& py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis");
-
-    // Sync input to host
-    liq_cld_frac.sync_to_host();
-    qi.sync_to_host();
+    pybind11::array py_qi,
+                    py_liq_cld_frac,
+                    py_ice_cld_frac,
+                    py_tot_cld_frac,
+                    py_ice_cld_frac_4out,
+                    py_tot_cld_frac_4out;
+
+    if (m_params.get<std::string>("py_backend")=="device") {
+      py_qi                = get_py_field_dev("qi");
+      py_liq_cld_frac      = get_py_field_dev("cldfrac_liq");
+      py_ice_cld_frac      = get_py_field_dev("cldfrac_ice");
+      py_tot_cld_frac      = get_py_field_dev("cldfrac_tot");
+      py_ice_cld_frac_4out = get_py_field_dev("cldfrac_ice_for_analysis");
+      py_tot_cld_frac_4out = get_py_field_dev("cldfrac_tot_for_analysis");
+    } else {
+      qi.sync_to_host();
+      liq_cld_frac.sync_to_host();
+      py_qi                = get_py_field_host("qi");
+      py_liq_cld_frac      = get_py_field_host("cldfrac_liq");
+      py_ice_cld_frac      = get_py_field_host("cldfrac_ice");
+      py_tot_cld_frac      = get_py_field_host("cldfrac_tot");
+      py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis");
+      py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis");
+    }
 
     double ice_threshold      = m_params.get<double>("ice_cloud_threshold");
     double ice_4out_threshold = m_params.get<double>("ice_cloud_for_analysis_threshold");
@@ -116,11 +129,12 @@ void CldFraction::run_impl (const double /* dt */)
                    py_ice_cld_frac,py_tot_cld_frac,
                    py_ice_cld_frac_4out,py_tot_cld_frac_4out);
 
-    // Sync outputs to dev
-    ice_cld_frac.sync_to_dev();
-    tot_cld_frac.sync_to_dev();
-    ice_cld_frac_4out.sync_to_dev();
-    tot_cld_frac_4out.sync_to_dev();
+    if (m_params.get<std::string>("py_backend")=="host") {
+      ice_cld_frac.sync_to_dev();
+      tot_cld_frac.sync_to_dev();
+      ice_cld_frac_4out.sync_to_dev();
+      tot_cld_frac_4out.sync_to_dev();
+    }
   } else
 #endif
   {
diff --git a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
index 07755b892fdd..cf95c7271c2e 100644
--- a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
+++ b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
@@ -20,10 +20,14 @@ CreateADUnitTest(cld_fraction_standalone
   FIXTURES_SETUP cldfrac_cpp)
 
 if (EAMXX_ENABLE_PYTHON)
+  include (BuildCprnc)
+  BuildCprnc()
+
   # Configure yaml files to run directory
   set (POSTFIX py)
-  set (PY_MODULE_NAME "cld_fraction")
+  set (PY_MODULE_NAME "cld_fraction_numpy")
   set (PY_MODULE_PATH ${SCREAM_SOURCE_DIR}/src/physics/cld_fraction)
+  set (PY_BACKEND "host")
   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
                  ${CMAKE_CURRENT_BINARY_DIR}/input_py.yaml)
   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
@@ -35,10 +39,7 @@ if (EAMXX_ENABLE_PYTHON)
     LABELS cld_fraction physics
     FIXTURES_SETUP cldfrac_py)
 
-  # Finally, compare output of the two tests
-  include (BuildCprnc)
-  BuildCprnc()
-
+  # Compare output of py and cpp tests
   set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
   set (TGT_FILE "cldfrac_standalone_output_py.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
   set (TEST_NAME cldfrac_standalone_cpp_vs_py)
@@ -49,6 +50,35 @@ if (EAMXX_ENABLE_PYTHON)
         LABELS "cldfrac;infrastructure"
         FIXTURES_REQUIRED "cldfrac_py;cldfrac_cpp")
 
+  if (Kokkos_ENABLE_CUDA)
+    # Also run with cupy instead of numpy
+    set (PY_MODULE_NAME "cld_fraction_cupy")
+    set (PY_BACKEND "device")
+    set (POSTFIX "cupy")
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/input_cupy.yaml)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/output_cupy.yaml)
+
+    # Test the process with cupy impl
+    CreateUnitTestFromExec(cld_fraction_standalone_cupy cld_fraction_standalone
+      EXE_ARGS "--args -ifile=input_cupy.yaml"
+      LABELS cld_fraction physics
+      FIXTURES_SETUP cldfrac_cupy)
+
+    # Compare output of cupy and cpp tests
+    set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
+    set (TGT_FILE "cldfrac_standalone_output_cupy.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
+    set (TEST_NAME cldfrac_standalone_cpp_vs_cupy)
+    add_test (NAME ${TEST_NAME}
+              COMMAND cmake -P ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake ${SRC_FILE} ${TGT_FILE}
+              WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    set_tests_properties(${TEST_NAME} PROPERTIES
+          LABELS "cldfrac;infrastructure"
+          FIXTURES_REQUIRED "cldfrac_cupy;cldfrac_cpp")
+
+  endif()
+
   # Run an ml emulator for cld-fraction
   set (PY_MODULE_NAME "cld_fraction_ml")
   set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/components/eamxx/tests/single-process/cld_fraction/input.yaml b/components/eamxx/tests/single-process/cld_fraction/input.yaml
index aefc28142821..ca7987b2177c 100644
--- a/components/eamxx/tests/single-process/cld_fraction/input.yaml
+++ b/components/eamxx/tests/single-process/cld_fraction/input.yaml
@@ -15,6 +15,7 @@ eamxx:
     ice_cloud_for_analysis_threshold: 1e-5
     py_module_name: ${PY_MODULE_NAME}
     py_module_path: ${PY_MODULE_PATH}
+    py_backend: ${PY_BACKEND}
 
 grids_manager:
   type: mesh_free

From d0443f1c63dc7659bdddc537013c2af6cae9b715 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Wed, 17 Sep 2025 16:16:24 -0600
Subject: [PATCH 4/6] EAMxx: enable python tests on ghci-snl-cuda CI machine

---
 components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake b/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake
index 83d1d20c6279..40d1756370e2 100644
--- a/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake
+++ b/components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake
@@ -15,3 +15,7 @@ option(SCREAM_MPI_ON_DEVICE "Whether to use device pointers for MPI calls" OFF)
 
 # Currently, we have 2 GPUs/node on Blake, and we run a SINGLE build per node, so we can fit 2 ranks there
 set(SCREAM_TEST_MAX_RANKS 2 CACHE STRING "Upper limit on ranks for mpi tests")
+
+# Enable python tests
+option (EAMXX_ENABLE_PYTHON "Whether to enable python interface from eamxx" ON)
+set (Python_EXECUTABLE "/usr/bin/python3" CACHE STRING "")

From 4b4a7e5f1ea0ddd8641b148d1731aaa8294f74b2 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Mon, 27 Oct 2025 10:06:35 -0600
Subject: [PATCH 5/6] EAMxx: fix pyfield test on GPU

---
 components/eamxx/src/share/field/tests/pyfield.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/components/eamxx/src/share/field/tests/pyfield.cpp b/components/eamxx/src/share/field/tests/pyfield.cpp
index 9b9f93c8e0a6..63fca8945178 100644
--- a/components/eamxx/src/share/field/tests/pyfield.cpp
+++ b/components/eamxx/src/share/field/tests/pyfield.cpp
@@ -54,6 +54,7 @@ TEST_CASE("pyfield", "") {
 
     auto f_py = create_py_field<Host>(f1);
     py_mod.attr("set_iota")(f_py);
+    f1.sync_to_dev();
   }
   REQUIRE (views_are_equal(f1,f2));
 

From 93b13243290e0b6e77bdc8bb551011a8d33859a8 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Wed, 12 Nov 2025 21:06:26 -0700
Subject: [PATCH 6/6] EAMxx: run cldfrac emulator only on CPU

---
 .../cld_fraction/CMakeLists.txt               | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
index cf95c7271c2e..a9eeac782399 100644
--- a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
+++ b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
@@ -79,18 +79,20 @@ if (EAMXX_ENABLE_PYTHON)
 
   endif()
 
-  # Run an ml emulator for cld-fraction
-  set (PY_MODULE_NAME "cld_fraction_ml")
-  set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-  set (POSTFIX pyml)
-  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
-                 ${CMAKE_CURRENT_BINARY_DIR}/input_pyml.yaml)
-  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
-                 ${CMAKE_CURRENT_BINARY_DIR}/output_pyml.yaml)
+  if (NOT Kokkos_ENABLE_CUDA)
+    # Run an ml emulator for cld-fraction
+    set (PY_MODULE_NAME "cld_fraction_ml")
+    set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+    set (POSTFIX pyml)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/input_pyml.yaml)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/output_pyml.yaml)
 
-  # Test the process with python ml emulator
-  CreateUnitTestFromExec(cld_fraction_standalone_pyml cld_fraction_standalone
-    EXE_ARGS "--args -ifile=input_pyml.yaml"
-    LABELS cld_fraction physics
-    FIXTURES_SETUP cldfrac_pyml)
+    # Test the process with python ml emulator
+    CreateUnitTestFromExec(cld_fraction_standalone_pyml cld_fraction_standalone
+      EXE_ARGS "--args -ifile=input_pyml.yaml"
+      LABELS cld_fraction physics
+      FIXTURES_SETUP cldfrac_pyml)
+  endif()
 endif()