EAMxx: improve test based on feedback

mahf708 · mahf708 · commit f2ba953ab9e3 · 2025-10-31T10:51:37.000-07:00
diff --git a/components/eamxx/cime_config/SystemTests/rcs.py b/components/eamxx/cime_config/SystemTests/rcs.py
@@ -15,16 +15,16 @@
 - rcs_stats.py: functions to conduct statistical testing
 """
 
-import os
 import glob
 import logging
 import sys
+from pathlib import Path
 
 import CIME.test_status
 import CIME.utils
 from CIME.status import append_testlog
 from CIME.SystemTests.system_tests_common import SystemTestsCommon
-from CIME.case.case_setup import case_setup
+from CIME.utils import expect
 
 logger = logging.getLogger(__name__)
 
@@ -56,37 +56,33 @@ def setup_phase(
             disable_git=disable_git,
         )
         self._case.flush()
-        # and again...?
-        case_setup(self._case, test_mode=False, reset=True)
 
         # get run directory
         run_dir = self._case.get_value("RUNDIR")
         # get n_inst
         n_inst = int(self._case.get_value("NINST_ATM"))
         # return early if n_inst <= 1
         # we really don't want people to run this test with n_inst=1
-        if n_inst <= 1:
-            msg = (
-                f"NINST_ATM = {n_inst}. This test requires NINST_ATM > 1. "
-                "Consider setting NINST_ATM > 1 in your env_run.xml "
-                "or use _C# specifier in test name for a multi-driver "
-                "multi-instance setup (producing # pelayout copies), "
-                "or _N# for a single-driver multi-instance setup "
-                "(dividing specified pelayout among # instances)."
-            )
-            raise ValueError(msg)
+        expect(
+            n_inst > 1,
+            f"NINST_ATM = {n_inst}. This test requires NINST_ATM > 1. "
+            "Consider setting NINST_ATM > 1 in your env_run.xml "
+            "or use _C# specifier in test name for a multi-driver "
+            "multi-instance setup (producing # pelayout copies), "
+            "or _N# for a single-driver multi-instance setup "
+            "(dividing specified pelayout among # instances)."
+        )
 
         # get rcs_perts functions
         # but first add the directory to sys.path if not already there
-        rcs_perts_path = os.path.join(
-            os.path.dirname(__file__), 'rcs_perts.py'
+        rcs_perts_path = Path(__file__).parent / 'rcs_perts.py'
+        expect(
+            rcs_perts_path.exists(),
+            f"Cannot find rcs_perts.py at {rcs_perts_path}"
         )
-        if not os.path.exists(rcs_perts_path):
-            raise ImportError(
-                f"Cannot find rcs_perts.py at {rcs_perts_path}"
-            )
-        if os.path.dirname(__file__) not in sys.path:
-            sys.path.insert(0, os.path.dirname(__file__))
+        script_dir = str(Path(__file__).parent)
+        if script_dir not in sys.path:
+            sys.path.insert(0, script_dir)
         # pylint: disable=import-outside-toplevel
         from rcs_perts import duplicate_yaml_file, update_yaml_file
 
@@ -98,11 +94,14 @@ def setup_phase(
         for i in range(1, n_inst + 1):
             yaml_file = f"{run_dir}/data/scream_input.yaml_{i:04d}"
             out_file = f"{run_dir}/data/monthly_average.yaml_{i:04d}"
-            if not os.path.isfile(yaml_file):
-                raise FileNotFoundError(
-                    f"File {yaml_file} does not exist.")
-            if not os.path.isfile(out_file):
-                raise FileNotFoundError(f"File {out_file} does not exist.")
+            expect(
+                Path(yaml_file).is_file(),
+                f"File {yaml_file} does not exist."
+            )
+            expect(
+                Path(out_file).is_file(),
+                f"File {out_file} does not exist."
+            )
             update_yaml_file(yaml_file, i, "pert")
             update_yaml_file(out_file, i, "out")
 
@@ -113,32 +112,29 @@ def _generate_baseline(self):
 
         with CIME.utils.SharedArea():
             # get the baseline and run directories
-            base_gen_dir = os.path.join(
-                self._case.get_value("BASELINE_ROOT"),
-                self._case.get_value("BASEGEN_CASE"),
-            )
-            run_dir = self._case.get_value("RUNDIR")
+            baseline_root = Path(self._case.get_value("BASELINE_ROOT"))
+            basegen_case = self._case.get_value("BASEGEN_CASE")
+            base_gen_dir = baseline_root / basegen_case
+            run_dir = Path(self._case.get_value("RUNDIR"))
 
             # Get all files that match the ensemble pattern
-            hists = glob.glob(
-                os.path.join(run_dir, self.ENSEMBLE_FILE_PATTERN)
-            )
-            hist_files = [os.path.basename(h) for h in hists]
+            hists = glob.glob(str(run_dir / self.ENSEMBLE_FILE_PATTERN))
+            hist_files = [Path(h).name for h in hists]
 
             for hist in hist_files:
-                src = os.path.join(run_dir, hist)
-                tgt = os.path.join(base_gen_dir, hist)
+                src = run_dir / hist
+                tgt = base_gen_dir / hist
                 # remove baselines if they exist
                 # this is safe because cime forces users to use -o
-                if os.path.exists(tgt):
-                    os.remove(tgt)
+                if tgt.exists():
+                    tgt.unlink()
 
                 # log and copy
                 logger.info(
                     "Copying ... \n \t %s \n ... to ... \n \t %s \n\n",
                     src, tgt
                 )
-                CIME.utils.safe_copy(src, tgt, preserve_meta=False)
+                CIME.utils.safe_copy(str(src), str(tgt), preserve_meta=False)
 
     def _compare_baseline(self):
         """compare phase implementation"""
@@ -159,31 +155,29 @@ def _compare_baseline(self):
 
             # get the run and baseline directories
             run_dir = self._case.get_value("RUNDIR")
-            base_dir = os.path.join(
-                self._case.get_value("BASELINE_ROOT"),
-                self._case.get_value("BASECMP_CASE"),
-            )
+            baseline_root = Path(self._case.get_value("BASELINE_ROOT"))
+            basecmp_case = self._case.get_value("BASECMP_CASE")
+            base_dir = baseline_root / basecmp_case
 
             # launch the statistics tests
             # first, import rcs_stats funcs from the other file
-            rcs_stats_path = os.path.join(
-                os.path.dirname(__file__), 'rcs_stats.py'
+            rcs_stats_path = Path(__file__).parent / 'rcs_stats.py'
+            expect(
+                rcs_stats_path.exists(),
+                f"Cannot find rcs_stats.py at {rcs_stats_path}"
             )
-            if not os.path.exists(rcs_stats_path):
-                raise ImportError(
-                    f"Cannot find rcs_stats.py at {rcs_stats_path}"
-                )
             # Add the directory to sys.path if not already there
-            if os.path.dirname(__file__) not in sys.path:
-                sys.path.insert(0, os.path.dirname(__file__))
+            script_dir = str(Path(__file__).parent)
+            if script_dir not in sys.path:
+                sys.path.insert(0, script_dir)
             # note be extra safe and import whole file
             # because we want to avoid import errors of needed pkgs
             # pylint: disable=import-outside-toplevel
             import rcs_stats as rcss
             # now, launch
             comments, new_ts = rcss.run_stats_comparison(
                 run_dir,
-                base_dir,
+                str(base_dir),
                 analysis_type="spatiotemporal",
                 test_type="ks",
                 alpha=0.01,
diff --git a/components/eamxx/cime_config/SystemTests/rcs_perts.py b/components/eamxx/cime_config/SystemTests/rcs_perts.py
@@ -1,5 +1,5 @@
 """
-Perturbation functions for EST system test.
+Perturbation functions for RCS system test.
 """
 
 import os
diff --git a/components/eamxx/cime_config/SystemTests/rcs_stats.py b/components/eamxx/cime_config/SystemTests/rcs_stats.py
@@ -1142,50 +1142,59 @@ def _prepare_variable_data(var):
     return var
 
 
-if __name__ == "__main__":
+###############################################################################
+def parse_command_line(args, description):
+###############################################################################
     import argparse
+    from pathlib import Path
 
     parser = argparse.ArgumentParser(
-        description="Statistical comparison of two ensemble simulations.",
+        usage="""\n{0} run_dir base_dir [options]
+OR
+{0} --help
+
+\033[1mEXAMPLES:\033[0m
+    \033[1;32m# Default: KS test with Bonferroni correction\033[0m
+    > {0} /path/to/run /path/to/baseline
+
+    \033[1;32m# Anderson-Darling with temporal analysis\033[0m
+    > {0} /path/to/run /path/to/baseline --test_type ad \\
+          --analysis_type temporal
+
+    \033[1;32m# Custom significance level with FDR correction\033[0m
+    > {0} /path/to/run /path/to/baseline --test_type ks \\
+          --alpha 0.001 --correction_method fdr
+
+    \033[1;32m# No multiple testing correction\033[0m
+    > {0} /path/to/run /path/to/baseline \\
+          --correction_method none --magnitude_threshold 0.01
+
+    \033[1;32m# Custom file patterns\033[0m
+    > {0} /path/to/run /path/to/baseline \\
+          --run_file_pattern "*.eam_????.h0.*.nc" \\
+          --base_file_pattern "*.scream_????.h.AVERAGE.*.nc"
+""".format(Path(args[0]).name),
+        description=description,
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
-Available Statistical Tests:
+\033[1mAVAILABLE STATISTICAL TESTS:\033[0m
 
-  DISTRIBUTION TESTS (compare entire distributions):
+  \033[1mDISTRIBUTION TESTS\033[0m (compare entire distributions):
     ks          Kolmogorov-Smirnov (recommended default)
     ad          Anderson-Darling (high sensitivity, especially tails)
     cvm         Cramér-von Mises (moderate-high sensitivity)
     epps        Epps-Singleton (location + scale)
     energy      Energy distance (powerful, any difference)
 
-  LOCATION TESTS (compare means/medians):
+  \033[1mLOCATION TESTS\033[0m (compare means/medians):
     mw          Mann-Whitney U (non-parametric median test)
     ttest       Welch's t-test (parametric mean test)
     brunner     Brunner-Munzel (robust alternative to t-test)
 
-  SCALE TESTS (compare variances/spread):
+  \033[1mSCALE TESTS\033[0m (compare variances/spread):
     levene      Levene's test (variance equality)
     ansari      Ansari-Bradley (non-parametric scale)
     mood        Mood's test (non-parametric dispersion)
-
-Examples:
-  # Default: KS test with spatiotemporal analysis and Bonferroni correction
-  %(prog)s /path/to/run /path/to/baseline
-
-  # Anderson-Darling with temporal analysis
-  %(prog)s /path/to/run /path/to/baseline --test_type ad --analysis_type temporal
-
-  # Custom significance level with FDR correction
-  %(prog)s /path/to/run /path/to/baseline --test_type ks --alpha 0.001 --correction_method fdr
-
-  # No multiple testing correction with magnitude threshold
-  %(prog)s /path/to/run /path/to/baseline \\
-      --correction_method none --magnitude_threshold 0.01
-
-  # Custom file patterns for different output formats
-  %(prog)s /path/to/run /path/to/baseline \\
-      --run_file_pattern "*.eam_????.h0.*.nc" \\
-      --base_file_pattern "*.scream_????.h.AVERAGE.*.nc"
 """,
     )
 
@@ -1268,20 +1277,26 @@ def _prepare_variable_data(var):
         "number, default: *.scream_????.h.AVERAGE.*.nc)",
     )
 
-    args = parser.parse_args()
+    return parser.parse_args(args[1:])
+
+
+###############################################################################
+def _main_func(description):
+###############################################################################
+    cli_args = parse_command_line(sys.argv, description)
 
     cli_comments, cli_status = run_stats_comparison(
-        args.run_dir,
-        args.base_dir,
-        analysis_type=args.analysis_type,
-        test_type=args.test_type,
-        alpha=args.alpha,
-        critical_fraction=args.critical_fraction,
-        correction_method=args.correction_method,
-        max_failed_vars=args.max_failed_vars,
-        magnitude_threshold=args.magnitude_threshold,
-        run_file_pattern=args.run_file_pattern,
-        base_file_pattern=args.base_file_pattern,
+        cli_args.run_dir,
+        cli_args.base_dir,
+        analysis_type=cli_args.analysis_type,
+        test_type=cli_args.test_type,
+        alpha=cli_args.alpha,
+        critical_fraction=cli_args.critical_fraction,
+        correction_method=cli_args.correction_method,
+        max_failed_vars=cli_args.max_failed_vars,
+        magnitude_threshold=cli_args.magnitude_threshold,
+        run_file_pattern=cli_args.run_file_pattern,
+        base_file_pattern=cli_args.base_file_pattern,
     )
 
     print("\n")
@@ -1295,3 +1310,9 @@ def _prepare_variable_data(var):
     print("=" * 70)
     print("\n")
     print(cli_comments)
+
+
+###############################################################################
+
+if (__name__ == "__main__"):
+    _main_func(__doc__)
diff --git a/components/eamxx/cime_config/config_tests.xml b/components/eamxx/cime_config/config_tests.xml
@@ -13,9 +13,9 @@ This defines any EAMxx specific CIME tests
     <CONTINUE_RUN>FALSE</CONTINUE_RUN>
     <STOP_OPTION>nmonths</STOP_OPTION>
     <STOP_N>12</STOP_N>
-    <REST_OPTION>$STOP_OPTION</REST_OPTION>
+    <REST_OPTION>never</REST_OPTION>
     <REST_N>$STOP_N</REST_N>
-    <HIST_OPTION>$STOP_OPTION</HIST_OPTION>
+    <HIST_OPTION>never</HIST_OPTION>
     <HIST_N>$STOP_N</HIST_N>
     <RESUBMIT>0</RESUBMIT>
   </test>
diff --git a/components/eamxx/docs/user/multi-instance-rcs.md b/components/eamxx/docs/user/multi-instance-rcs.md
@@ -122,7 +122,7 @@ These tests focus on differences in variability:
 - Best for: Non-parametric scale comparison
 - Assumption: Samples differ primarily in scale, not location
 
-###### `mood` - Mood's Test
+##### `mood` - Mood's Test
 
 - Sensitivity: Moderate
 - Best for: Non-parametric dispersion comparison