EAMxx: make cld_fraction py test work with both numpy and cupy

bartgol · bartgol · commit 21cd75b3012d · 2025-09-17T16:18:57.000-06:00
diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_cupy.py
@@ -0,0 +1,46 @@
+import cupy as cp
+import numpy as np
+
+# Any initialization step can be done here
+# This method is called during CldFraction::initialize_impl
+def init ():
+    pass
+
+#########################################################
+def get_cu_array(np_arr):
+#########################################################
+    shape   = np_arr.shape
+    dtype   = np_arr.dtype
+    ptr     = np_arr.__array_interface__['data'][0]
+    strides = np_arr.strides
+
+    # The exact size here does not really matter, as we are just creating an
+    # unmanaged mem block, of which we then simply grab the start address.
+    # Still, use the correct size for code clarity
+    size = shape[0]*strides[0]
+    mem = cp.cuda.UnownedMemory(ptr=ptr,owner=None,size=size)
+    memptr = cp.cuda.MemoryPointer(mem, 0)
+
+    return cp.ndarray(shape=shape,dtype=dtype,memptr=memptr,strides=strides)
+
+#########################################################
+def main (ice_threshold, ice_4out_threshold,
+          qi, liq_cld_frac,
+          ice_cld_frac, tot_cld_frac,
+          ice_cld_frac_4out, tot_cld_frac_4out):
+#########################################################
+
+    cu_qi = get_cu_array(qi)
+    cu_liq_cld_frac = get_cu_array(liq_cld_frac)
+    cu_ice_cld_frac = get_cu_array(ice_cld_frac)
+    cu_tot_cld_frac = get_cu_array(tot_cld_frac)
+    cu_ice_cld_frac_4out = get_cu_array(ice_cld_frac_4out)
+    cu_tot_cld_frac_4out = get_cu_array(tot_cld_frac_4out)
+
+    cu_ice_cld_frac[:] = 0
+    cu_ice_cld_frac_4out[:] = 0
+    cu_ice_cld_frac[cu_qi > ice_threshold] = 1
+    cu_ice_cld_frac_4out[cu_qi > ice_4out_threshold] = 1
+
+    cp.maximum(cu_ice_cld_frac,cu_liq_cld_frac, out=cu_tot_cld_frac)
+    cp.maximum(cu_ice_cld_frac_4out,cu_liq_cld_frac,out=cu_tot_cld_frac_4out)
diff --git a/components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py b/components/eamxx/src/physics/cld_fraction/cld_fraction_numpy.py
diff --git a/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp b/components/eamxx/src/physics/cld_fraction/eamxx_cld_fraction_process_interface.cpp
@@ -95,17 +95,30 @@ void CldFraction::run_impl (const double /* dt */)
   auto tot_cld_frac_4out = get_field_out("cldfrac_tot_for_analysis");
 #ifdef EAMXX_HAS_PYTHON
   if (has_py_module()) {
-    // For now, we run Python code only on CPU
-    const auto& py_qi                = get_py_field_host("qi");
-    const auto& py_liq_cld_frac      = get_py_field_host("cldfrac_liq");
-    const auto& py_ice_cld_frac      = get_py_field_host("cldfrac_ice");
-    const auto& py_tot_cld_frac      = get_py_field_host("cldfrac_tot");
-    const auto& py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis");
-    const auto& py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis");
-
-    // Sync input to host
-    liq_cld_frac.sync_to_host();
-    qi.sync_to_host();
+    pybind11::array py_qi,
+                    py_liq_cld_frac,
+                    py_ice_cld_frac,
+                    py_tot_cld_frac,
+                    py_ice_cld_frac_4out,
+                    py_tot_cld_frac_4out;
+
+    if (m_params.get<std::string>("py_backend")=="device") {
+      py_qi                = get_py_field_dev("qi");
+      py_liq_cld_frac      = get_py_field_dev("cldfrac_liq");
+      py_ice_cld_frac      = get_py_field_dev("cldfrac_ice");
+      py_tot_cld_frac      = get_py_field_dev("cldfrac_tot");
+      py_ice_cld_frac_4out = get_py_field_dev("cldfrac_ice_for_analysis");
+      py_tot_cld_frac_4out = get_py_field_dev("cldfrac_tot_for_analysis");
+    } else {
+      qi.sync_to_host();
+      liq_cld_frac.sync_to_host();
+      py_qi                = get_py_field_host("qi");
+      py_liq_cld_frac      = get_py_field_host("cldfrac_liq");
+      py_ice_cld_frac      = get_py_field_host("cldfrac_ice");
+      py_tot_cld_frac      = get_py_field_host("cldfrac_tot");
+      py_ice_cld_frac_4out = get_py_field_host("cldfrac_ice_for_analysis");
+      py_tot_cld_frac_4out = get_py_field_host("cldfrac_tot_for_analysis");
+    }
 
     double ice_threshold      = m_params.get<double>("ice_cloud_threshold");
     double ice_4out_threshold = m_params.get<double>("ice_cloud_for_analysis_threshold");
@@ -116,11 +129,12 @@ void CldFraction::run_impl (const double /* dt */)
                    py_ice_cld_frac,py_tot_cld_frac,
                    py_ice_cld_frac_4out,py_tot_cld_frac_4out);
 
-    // Sync outputs to dev
-    ice_cld_frac.sync_to_dev();
-    tot_cld_frac.sync_to_dev();
-    ice_cld_frac_4out.sync_to_dev();
-    tot_cld_frac_4out.sync_to_dev();
+    if (m_params.get<std::string>("py_backend")=="host") {
+      ice_cld_frac.sync_to_dev();
+      tot_cld_frac.sync_to_dev();
+      ice_cld_frac_4out.sync_to_dev();
+      tot_cld_frac_4out.sync_to_dev();
+    }
   } else
 #endif
   {
diff --git a/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt b/components/eamxx/tests/single-process/cld_fraction/CMakeLists.txt
@@ -20,10 +20,14 @@ CreateADUnitTest(cld_fraction_standalone
   FIXTURES_SETUP cldfrac_cpp)
 
 if (EAMXX_ENABLE_PYTHON)
+  include (BuildCprnc)
+  BuildCprnc()
+
   # Configure yaml files to run directory
   set (POSTFIX py)
-  set (PY_MODULE_NAME "cld_fraction")
+  set (PY_MODULE_NAME "cld_fraction_numpy")
   set (PY_MODULE_PATH ${SCREAM_SOURCE_DIR}/src/physics/cld_fraction)
+  set (PY_BACKEND "host")
   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
                  ${CMAKE_CURRENT_BINARY_DIR}/input_py.yaml)
   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
@@ -35,10 +39,7 @@ if (EAMXX_ENABLE_PYTHON)
     LABELS cld_fraction physics
     FIXTURES_SETUP cldfrac_py)
 
-  # Finally, compare output of the two tests
-  include (BuildCprnc)
-  BuildCprnc()
-
+  # Compare output of py and cpp tests
   set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
   set (TGT_FILE "cldfrac_standalone_output_py.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
   set (TEST_NAME cldfrac_standalone_cpp_vs_py)
@@ -49,6 +50,35 @@ if (EAMXX_ENABLE_PYTHON)
         LABELS "cldfrac;infrastructure"
         FIXTURES_REQUIRED "cldfrac_py;cldfrac_cpp")
 
+  if (Kokkos_ENABLE_CUDA)
+    # Also run with cupy instead of numpy
+    set (PY_MODULE_NAME "cld_fraction_cupy")
+    set (PY_BACKEND "device")
+    set (POSTFIX "cupy")
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/input_cupy.yaml)
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/output.yaml
+                   ${CMAKE_CURRENT_BINARY_DIR}/output_cupy.yaml)
+
+    # Test the process with cupy impl
+    CreateUnitTestFromExec(cld_fraction_standalone_cupy cld_fraction_standalone
+      EXE_ARGS "--args -ifile=input_cupy.yaml"
+      LABELS cld_fraction physics
+      FIXTURES_SETUP cldfrac_cupy)
+
+    # Compare output of cupy and cpp tests
+    set (SRC_FILE "cldfrac_standalone_output_cpp.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
+    set (TGT_FILE "cldfrac_standalone_output_cupy.INSTANT.nsteps_x1.np1.${RUN_T0}.nc")
+    set (TEST_NAME cldfrac_standalone_cpp_vs_cupy)
+    add_test (NAME ${TEST_NAME}
+              COMMAND cmake -P ${CMAKE_BINARY_DIR}/bin/CprncTest.cmake ${SRC_FILE} ${TGT_FILE}
+              WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    set_tests_properties(${TEST_NAME} PROPERTIES
+          LABELS "cldfrac;infrastructure"
+          FIXTURES_REQUIRED "cldfrac_cupy;cldfrac_cpp")
+
+  endif()
+
   # Run an ml emulator for cld-fraction
   set (PY_MODULE_NAME "cld_fraction_ml")
   set (PY_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/components/eamxx/tests/single-process/cld_fraction/input.yaml b/components/eamxx/tests/single-process/cld_fraction/input.yaml
@@ -15,6 +15,7 @@ eamxx:
     ice_cloud_for_analysis_threshold: 1e-5
     py_module_name: ${PY_MODULE_NAME}
     py_module_path: ${PY_MODULE_PATH}
+    py_backend: ${PY_BACKEND}
 
 grids_manager:
   type: mesh_free