ESCOMP
diff --git a/‎.git-blame-ignore-revs‎
Lines changed: 3 additions & 0 deletions b/‎.git-blame-ignore-revs‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cime_config/SystemTests/setparamfile.py‎
Lines changed: 95 additions & 0 deletions b/‎cime_config/SystemTests/setparamfile.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎cime_config/config_tests.xml‎
Lines changed: 10 additions & 0 deletions b/‎cime_config/config_tests.xml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎cime_config/testdefs/testlist_clm.xml‎
Lines changed: 10 additions & 0 deletions b/‎cime_config/testdefs/testlist_clm.xml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/users_guide/using-clm-tools/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/users_guide/using-clm-tools/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/users_guide/using-clm-tools/paramfile-tools.md‎
Lines changed: 68 additions & 0 deletions b/‎doc/source/users_guide/using-clm-tools/paramfile-tools.md‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎python/ctsm/args_utils.py‎
Lines changed: 9 additions & 0 deletions b/‎python/ctsm/args_utils.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎python/ctsm/netcdf_utils.py‎
Lines changed: 141 additions & 0 deletions b/‎python/ctsm/netcdf_utils.py‎
Lines changed: 141 additions & 0 deletions
diff --git a/‎python/ctsm/param_utils/__init__.py‎ b/‎python/ctsm/param_utils/__init__.py‎
@@ -70,5 +70,8 @@ cdf40d265cc82775607a1bf25f5f527bacc97405
 4ad46f46de7dde753b4653c15f05326f55116b73
 75db098206b064b8b7b2a0604d3f0bf8fdb950cc
 84609494b54ea9732f64add43b2f1dd035632b4c
+7eb17f3ef0b9829fb55e0e3d7f02e157b0e41cfb
+62d7711506a0fb9a3ad138ceceffbac1b79a6caa
+49ad0f7ebe0b07459abc00a5c33c55a646f1e7e0
 ac03492012837799b7111607188acff9f739044a
 d858665d799690d73b56bcb961684382551193f4
@@ -0,0 +1,95 @@
+"""
+CTSM-specific test that first runs the set_paramfile tool and then ensures that CTSM does not fail
+using the just-generated parameter file
+"""
+
+import os
+import sys
+import logging
+import re
+from CIME.SystemTests.system_tests_common import SystemTestsCommon
+
+# In case we need to import set_paramfile later
+_CTSM_PYTHON = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, "python"
+)
+sys.path.insert(1, _CTSM_PYTHON)
+
+logger = logging.getLogger(__name__)
+
+
+class SETPARAMFILE(SystemTestsCommon):
+    def __init__(self, case):
+        """
+        initialize an object interface to the SMS system test
+        """
+        SystemTestsCommon.__init__(self, case)
+
+        # Create out-of-the-box lnd_in to obtain paramfile
+        case.create_namelists(component="lnd")
+
+        # Find the paramfile to modify
+        lnd_in_path = os.path.join(self._get_caseroot(), "CaseDocs", "lnd_in")
+        self._paramfile_in = None
+        with open(lnd_in_path, "r", encoding="utf-8") as lnd_in:
+            for line in lnd_in:
+                paramfile_in = re.match(r" *paramfile *= *'(.*)'", line)
+                if paramfile_in:
+                    self._paramfile_in = paramfile_in.group(1)
+                    break
+        if not self._paramfile_in:
+            raise RuntimeError(f"paramfile not found in {lnd_in_path}")
+
+        # Get the output file
+        self.paramfile_out = os.path.join(self._get_caseroot(), "paramfile.nc")
+
+        # Define set_paramfile command
+        self.set_paramfile_cmd = [
+            "set_paramfile",
+            "-i",
+            self._paramfile_in,
+            "-o",
+            self.paramfile_out,
+            # Change two parameters for one PFT
+            "-p",
+            "needleleaf_deciduous_boreal_tree",
+            "rswf_min=0.35",
+            "rswf_max=0.7",
+        ]
+
+    def build_phase(self, sharedlib_only=False, model_only=False):
+        """
+        Run set_paramfile and then build the model
+        """
+
+        # Run set_paramfile.
+        # build_phase gets called twice:
+        # - once with sharedlib_only = True and
+        # - once with model_only = True
+        # Because we only need set_paramfile run once, we only do it for the sharedlib_only call.
+        # We could also check for the existence of the set_paramfile outputs, but that might lead to
+        # a situation where the user expects set_paramfile to be called but it's not. Better to run
+        # unnecessarily (e.g., if you fixed some FORTRAN code and just need to rebuild).
+        if sharedlib_only:
+            self._run_set_paramfile()
+
+        # Do the build
+        self.build_indv(sharedlib_only=sharedlib_only, model_only=model_only)
+
+    def _run_set_paramfile(self):
+        """
+        Run set_paramfile
+        """
+        # Import set_paramfile. Do it here rather than at top because otherwise the import will
+        # be attempted even during RUN phase.
+        # pylint: disable=wrong-import-position,import-outside-toplevel
+        from ctsm.param_utils.set_paramfile import main as set_paramfile
+
+        # Run set_paramfile
+        sys.argv = self.set_paramfile_cmd
+        set_paramfile()
+
+        # Append
+        user_nl_clm_path = os.path.join(self._get_caseroot(), "user_nl_clm")
+        with open(user_nl_clm_path, "a", encoding="utf-8") as user_nl_clm:
+            user_nl_clm.write(f"paramfile = '{self.paramfile_out}'\n")
@@ -155,6 +155,16 @@ This defines various CTSM-specific system tests
     <HIST_N>$STOP_N</HIST_N>
   </test>
 
+  <test NAME="SETPARAMFILE">
+    <DESC>Modify a copy of the paramfile and run with it.</DESC>
+    <INFO_DBUG>1</INFO_DBUG>
+    <DOUT_S>FALSE</DOUT_S>
+    <CONTINUE_RUN>FALSE</CONTINUE_RUN>
+    <REST_OPTION>never</REST_OPTION>
+    <HIST_OPTION>$STOP_OPTION</HIST_OPTION>
+    <HIST_N>$STOP_N</HIST_N>
+  </test>
+
 <!--
 SSP    smoke CLM spinup test (only valid for CLM compsets with CLM45)
        do an initial spin test (setting CLM_ACCELERATED_SPINUP to on)
 
@@ -4495,5 +4495,15 @@
     </options>
   </test>
 
+  <test name="SETPARAMFILE_Ld5" grid="f10_f10_mg37" compset="I1850Clm60BgcCrujra" testmods="clm/default">
+    <machines>
+      <machine name="derecho" compiler="gnu" category="aux_clm"/>
+      <machine name="derecho" compiler="gnu" category="clm_pymods"/>
+    </machines>
+    <options>
+      <option name="wallclock">00:20:00</option>
+    </options>
+  </test>
+
 
 </testlist>
@@ -22,3 +22,4 @@ Using CLM tools
    creating-domain-files.rst
    observational-sites-datasets.rst
    cprnc.rst
+   paramfile-tools.md
@@ -0,0 +1,68 @@
+
+# Tools for working with parameter files
+
+This guide describes the features and usage of the `query_paramfile` and `set_paramfile` tools, located in `tools/param_utils/`. These utilities help users inspect and modify CLM parameter files.
+
+Note that you need to have the `ctsm_pylib` conda environment activated to use these tools. See Sect. :numref:`using-ctsm-pylib` for more information.
+
+## `query_paramfile`
+**Purpose:** Print the values of one or more parameters from a CTSM parameter file (NetCDF format).
+
+**Features:**
+- Print values for specified parameters or all.
+- Optionally filter output by Plant Functional Types (PFTs) for PFT-specific parameters.
+
+For more information, do `tools/param_utils/query_paramfile --help`.
+
+
+### Example usage
+
+Print all variables in a parameter file:
+```bash
+tools/param_utils/query_paramfile -i paramfile.nc
+```
+
+Print specific variables:
+```bash
+tools/param_utils/query_paramfile -i paramfile.nc jmaxha jmaxhd
+```
+
+Print values for specific PFTs:
+```bash
+tools/param_utils/query_paramfile -i paramfile.nc -p needleleaf_evergreen_temperate_tree,c4_grass medlynintercept medlynslope
+```
+
+## `set_paramfile`
+**Purpose:** Change values of one or more parameters in a CTSM parameter file (NetCDF format).
+
+**Features:**
+- Modify parameter values for all or selected PFTs.
+- Optionally drop PFTs not specified.
+- Set parameter values to fill (missing) values using `nan`.
+- Ensures safe file handling and checks for argument validity.
+
+Note that the output file must not already exist.
+
+For more information, do `tools/param_utils/set_paramfile --help`.
+
+### Example usage
+
+Change a scalar parameter:
+```bash
+tools/param_utils/set_paramfile -i paramfile.nc -o output.nc jmaxha=51000
+```
+
+Change a one-dimensional parameter (`mimics_fmet` has the `segment` dimension, length 4):
+```bash
+tools/param_utils/set_paramfile -i paramfile.nc -o output.nc mimics_fmet=0.1,0.2,0.3,0.4
+```
+
+Change a parameter for specific PFTs:
+```bash
+tools/param_utils/set_paramfile -i paramfile.nc -o output.nc -p needleleaf_evergreen_temperate_tree,c4_grass medlynintercept=99.9,100.1 medlynslope=2.99,1.99
+```
+
+Set a parameter to the fill value:
+```bash
+tools/param_utils/set_paramfile -i paramfile.nc -o output.nc -p needleleaf_evergreen_temperate_tree,c4_grass fleafcn=nan,nan
+```
@@ -46,3 +46,12 @@ def plon_type(plon):
             "ERROR: Longitude should be between 0 and 360 or -180 and 180."
         )
     return plon_float
+
+
+def comma_separated_list(value):
+    """
+    Helper function for argparse to split comma-separated strings into a list.
+    """
+    if value is None:
+        return None
+    return [v.strip() for v in value.split(",")]
@@ -0,0 +1,141 @@
+"""
+Helper functions for working with netCDF files
+"""
+
+import numpy as np
+import xarray as xr
+from netCDF4 import Dataset  # pylint: disable=no-name-in-module
+
+
+def _is_dtype_nan_capable(ndarray: np.ndarray):
+    """
+    Given a numpy array, return True if it's capable of taking a NaN
+    """
+    try:
+        np.isnan(ndarray)
+        return True
+    except TypeError:
+        return False
+
+
+def _are_dicts_identical_nansequal(dict0: dict, dict1: dict, keys_to_ignore=None):
+    """
+    Compare two dictionaries, considering NaNs to be equal. Don't be strict here about types; if
+    they can be coerced to comparable types and then they match, return True.
+    """
+    # pylint: disable=too-many-return-statements
+
+    if keys_to_ignore is None:
+        keys_to_ignore = []
+    keys_to_ignore = np.array(keys_to_ignore)
+
+    if len(dict0) != len(dict1):
+        return False
+    for key, value0 in dict0.items():
+        if key in keys_to_ignore:
+            continue
+        if key not in dict1:
+            return False
+        value1 = dict1[key]
+
+        # Coerce to numpy arrays to simplify comparison code
+        value0 = np.array(value0)
+        value1 = np.array(value1)
+
+        # Compare, only asking to check equal NaNs if both are capable of taking NaN values
+        both_are_nan_capable = _is_dtype_nan_capable(value0) and _is_dtype_nan_capable(value1)
+        if not np.array_equal(value0, value1, equal_nan=both_are_nan_capable):
+            return False
+
+    return True
+
+
+def get_netcdf_format(file_path):
+    """
+    Get format of netCDF file
+    """
+    with Dataset(file_path, "r") as netcdf_file:
+        netcdf_format = netcdf_file.data_model
+    return netcdf_format
+
+
+def _is_dataarray_metadata_identical(da0: xr.DataArray, da1: xr.DataArray, keys_to_ignore=None):
+    """
+    Check whether two DataArrays have identical-enough metadata
+    """
+
+    # Check data type
+    if da0.dtype != da1.dtype:
+        return False
+
+    # Check encoding
+    if not _are_dicts_identical_nansequal(
+        da0.encoding, da1.encoding, keys_to_ignore=keys_to_ignore
+    ):
+        return False
+
+    # Check attributes
+    if not _are_dicts_identical_nansequal(da0.attrs, da1.attrs):
+        return False
+
+    # Check name
+    if da0.name != da1.name:
+        return False
+
+    # Check dims
+    if da0.dims != da1.dims:
+        return False
+
+    return True
+
+
+def _is_dataarray_data_identical(da0: xr.DataArray, da1: xr.DataArray):
+    """
+    Check whether two DataArrays have identical data
+    """
+    # pylint: disable=too-many-return-statements
+
+    # Check sizes
+    if da0.sizes != da1.sizes:
+        return False
+
+    # Check coordinates
+    if bool(da0.coords) or bool(da1.coords):
+        if not bool(da0.coords) or not bool(da1.coords):
+            return False
+        if not da0.coords.equals(da1.coords):
+            return False
+
+    # Check values ("The array's data converted to numpy.ndarray")
+    if not np.array_equal(da0.values, da1.values):
+        # Try-except to avoid TypeError from putting NaN-incapable dtypes through
+        # np.array_equal(..., equal_nan=True)
+        try:
+            if not np.array_equal(da0.values, da1.values, equal_nan=True):
+                return False
+        except TypeError:
+            return False
+
+    # Check data ("The DataArray's data as an array. The underlying array type (e.g. dask, sparse,
+    # pint) is preserved.")
+    da0_data_type = type(da0.data)
+    if not isinstance(da1.data, da0_data_type):
+        return False
+    if not isinstance(da0.data, np.ndarray):
+        raise NotImplementedError(f"Add support for comparing two objects of type {da0_data_type}")
+
+    return True
+
+
+def are_xr_dataarrays_identical(da0: xr.DataArray, da1: xr.DataArray, keys_to_ignore=None):
+    """
+    Comprehensively check whether two DataArrays are identical
+    """
+    if not _is_dataarray_metadata_identical(da0, da1, keys_to_ignore=keys_to_ignore):
+        return False
+
+    if not _is_dataarray_data_identical(da0, da1):
+        return False
+
+    # Fallback to however xarray defines equality, in case we missed something above
+    return da0.equals(da1)