|
148 | 148 | <DESC>Perlmutter CPU-only nodes at NERSC. Phase2 only: Each node has 2 AMD EPYC 7713 64-Core (Milan) 512GB</DESC> |
149 | 149 | <NODENAME_REGEX>$ENV{NERSC_HOST}:perlmutter</NODENAME_REGEX> |
150 | 150 | <OS>Linux</OS> |
151 | | - <COMPILERS>gnu,nvidia,amdclang</COMPILERS> |
| 151 | + <COMPILERS>gnu,intel,nvidia,amdclang</COMPILERS> |
152 | 152 | <MPILIBS>mpich</MPILIBS> |
153 | 153 | <PROJECT>e3sm</PROJECT> |
154 | 154 | <SAVE_TIMING_DIR>/global/cfs/cdirs/e3sm</SAVE_TIMING_DIR> |
|
167 | 167 | <BATCH_SYSTEM>nersc_slurm</BATCH_SYSTEM> |
168 | 168 | <SUPPORTED_BY>e3sm</SUPPORTED_BY> |
169 | 169 | <MAX_TASKS_PER_NODE>256</MAX_TASKS_PER_NODE> |
170 | | - <MAX_MPITASKS_PER_NODE>64</MAX_MPITASKS_PER_NODE> |
| 170 | + <MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE> |
171 | 171 | <PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED> |
172 | 172 | <mpirun mpilib="default"> |
173 | 173 | <executable>srun</executable> |
|
194 | 194 | <command name="unload">cray-netcdf-hdf5parallel</command> |
195 | 195 | <command name="unload">cray-parallel-netcdf</command> |
196 | 196 | <command name="unload">PrgEnv-gnu</command> |
| 197 | + <command name="unload">PrgEnv-intel</command> |
197 | 198 | <command name="unload">PrgEnv-nvidia</command> |
198 | 199 | <command name="unload">PrgEnv-cray</command> |
199 | 200 | <command name="unload">PrgEnv-aocc</command> |
| 201 | + <command name="unload">intel</command> |
| 202 | + <command name="unload">intel-oneapi</command> |
200 | 203 | <command name="unload">cudatoolkit</command> |
201 | 204 | <command name="unload">craype-accel-nvidia80</command> |
202 | 205 | <command name="unload">craype-accel-host</command> |
|
208 | 211 | <modules compiler="gnu"> |
209 | 212 | <command name="load">PrgEnv-gnu/8.3.3</command> |
210 | 213 | <command name="load">gcc/11.2.0</command> |
| 214 | + <command name="load">cray-libsci/23.02.1.1</command> |
| 215 | + </modules> |
| 216 | + |
| 217 | + <modules compiler="intel"> |
| 218 | + <command name="load">PrgEnv-intel/8.3.3</command> |
| 219 | + <command name="load">intel/2023.0.0</command> |
211 | 220 | </modules> |
212 | 221 |
|
213 | 222 | <modules compiler="nvidia"> |
214 | 223 | <command name="load">PrgEnv-nvidia</command> |
215 | 224 | <command name="load">nvidia/22.7</command> |
| 225 | + <command name="load">cray-libsci/23.02.1.1</command> |
216 | 226 | </modules> |
217 | 227 |
|
218 | 228 | <modules compiler="amdclang"> |
219 | 229 | <command name="load">PrgEnv-aocc</command> |
220 | 230 | <command name="load">aocc/3.2.0</command> |
| 231 | + <command name="load">cray-libsci/23.02.1.1</command> |
221 | 232 | </modules> |
222 | 233 |
|
223 | 234 | <modules> |
224 | 235 | <command name="load">craype-accel-host</command> |
225 | | - <command name="load">cray-libsci/23.02.1.1</command> |
226 | 236 | <command name="load">craype/2.7.19</command> |
227 | 237 | <command name="load">cray-mpich/8.1.24</command> |
228 | 238 | <command name="load">cray-hdf5-parallel/1.12.2.3</command> |
|
420 | 430 | <BATCH_SYSTEM>nersc_slurm</BATCH_SYSTEM> |
421 | 431 | <SUPPORTED_BY>e3sm</SUPPORTED_BY> |
422 | 432 | <MAX_TASKS_PER_NODE>256</MAX_TASKS_PER_NODE> |
423 | | - <MAX_MPITASKS_PER_NODE>64</MAX_MPITASKS_PER_NODE> |
| 433 | + <MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE> |
424 | 434 | <PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED> |
425 | 435 | <mpirun mpilib="default"> |
426 | 436 | <executable>srun</executable> |
|
586 | 596 | </environment_variables> |
587 | 597 | </machine> |
588 | 598 |
|
| 599 | + <machine MACH="frontier"> |
| 600 | + <DESC>Frontier exascale supercomputer at ORNL. 9408 nodes, Node: 4 AMD MI250X GPUs (2 GCDs) ~ 8 GPUs, 512 GB HDB2E, AMD EPYC 64 cores, 512GB DDR4 </DESC> |
| 601 | + <NODENAME_REGEX>.*frontier.*</NODENAME_REGEX> |
| 602 | + <OS>CNL</OS> |
| 603 | + <COMPILERS>gnu,crayclang,amdclang,gnugpu,crayclanggpu,amdclanggpu</COMPILERS> |
| 604 | + <MPILIBS>mpich</MPILIBS> |
| 605 | + <PROJECT>cli115</PROJECT> |
| 606 | + <SAVE_TIMING_DIR>/lustre/orion/cli115/world-shared/frontier</SAVE_TIMING_DIR> |
| 607 | + <SAVE_TIMING_DIR_PROJECTS>.*</SAVE_TIMING_DIR_PROJECTS> |
| 608 | + <CIME_OUTPUT_ROOT>/lustre/orion/$PROJECT/proj-shared/$ENV{USER}/e3sm_scratch</CIME_OUTPUT_ROOT> |
| 609 | + <DIN_LOC_ROOT>/lustre/orion/cli115/world-shared/e3sm/inputdata</DIN_LOC_ROOT> |
| 610 | + <DIN_LOC_ROOT_CLMFORC>/lustre/orion/cli115/world-shared/e3sm/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC> |
| 611 | + <DOUT_S_ROOT>$CIME_OUTPUT_ROOT/archive/$CASE</DOUT_S_ROOT> |
| 612 | + <BASELINE_ROOT>/lustre/orion/cli115/world-shared/e3sm/baselines/frontier/$COMPILER</BASELINE_ROOT> |
| 613 | + <CCSM_CPRNC>/lustre/orion/cli115/world-shared/e3sm/tools/cprnc/cprnc</CCSM_CPRNC> |
| 614 | + <GMAKE_J>8</GMAKE_J> |
| 615 | + <NTEST_PARALLEL_JOBS>1</NTEST_PARALLEL_JOBS> |
| 616 | + <BATCH_SYSTEM>slurm</BATCH_SYSTEM> |
| 617 | + <SUPPORTED_BY>e3sm</SUPPORTED_BY> |
| 618 | + <MAX_TASKS_PER_NODE>56</MAX_TASKS_PER_NODE> |
| 619 | + <MAX_MPITASKS_PER_NODE>56</MAX_MPITASKS_PER_NODE> |
| 620 | + <MAX_MPITASKS_PER_NODE compiler="gnugpu">8</MAX_MPITASKS_PER_NODE> |
| 621 | + <MAX_MPITASKS_PER_NODE compiler="crayclanggpu">8</MAX_MPITASKS_PER_NODE> |
| 622 | + <MAX_MPITASKS_PER_NODE compiler="amdclanggpu">8</MAX_MPITASKS_PER_NODE> |
| 623 | + <PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED> |
| 624 | + <mpirun mpilib="default"> |
| 625 | + <executable>srun</executable> |
| 626 | + <arguments> |
| 627 | + <arg name="num_tasks"> -l -K -n {{ total_tasks }} -N {{ num_nodes }} </arg> |
| 628 | + <arg name="thread_count">-c $ENV{OMP_NUM_THREADS}</arg> |
| 629 | + <arg name="ntasks_per_gpu">$ENV{NTASKS_PER_GPU}</arg> |
| 630 | + <arg name="gpu_bind">$ENV{GPU_BIND_ARGS}</arg> |
| 631 | + </arguments> |
| 632 | + </mpirun> |
| 633 | + <module_system type="module" allow_error="true"> |
| 634 | + <init_path lang="sh">/usr/share/lmod/lmod/init/sh</init_path> |
| 635 | + <init_path lang="csh">/usr/share/lmod/lmod/init/csh</init_path> |
| 636 | + <init_path lang="perl">/usr/share/lmod/lmod/init/perl</init_path> |
| 637 | + <init_path lang="python">/usr/share/lmod/lmod/init/env_modules_python.py</init_path> |
| 638 | + <cmd_path lang="perl">/usr/share/lmod/lmod/libexec/lmod perl</cmd_path> |
| 639 | + <cmd_path lang="sh">module</cmd_path> |
| 640 | + <cmd_path lang="csh">module</cmd_path> |
| 641 | + <cmd_path lang="python">/usr/share/lmod/lmod/libexec/lmod python</cmd_path> |
| 642 | + <modules compiler="crayclang.*"> |
| 643 | + <command name="reset"></command> |
| 644 | + <command name="switch">PrgEnv-cray PrgEnv-cray/8.3.3</command> |
| 645 | + <command name="switch">cce cce/15.0.1</command> |
| 646 | + <!-- craype module to address tcmalloc runtime errors at startup --> |
| 647 | + <!-- tcmalloc.cc:647 Attempt to free invalid pointer --> |
| 648 | + <command name="switch">craype craype/2.7.20</command> |
| 649 | + </modules> |
| 650 | + <modules compiler="crayclanggpu"> |
| 651 | + <command name="load">craype-accel-amd-gfx90a</command> |
| 652 | + <command name="load">rocm/5.4.0</command> |
| 653 | + </modules> |
| 654 | + <modules compiler="amdclang.*"> |
| 655 | + <command name="reset"></command> |
| 656 | + <command name="switch">PrgEnv-cray PrgEnv-amd/8.3.3</command> |
| 657 | + <command name="switch">amd amd/5.4.0</command> |
| 658 | + </modules> |
| 659 | + <modules compiler="amdclanggpu"> |
| 660 | + <command name="load">craype-accel-amd-gfx90a</command> |
| 661 | + </modules> |
| 662 | + <modules compiler="gnu.*"> |
| 663 | + <command name="reset"></command> |
| 664 | + <command name="switch">PrgEnv-cray PrgEnv-gnu/8.3.3</command> |
| 665 | + <!-- TODO: gcc/12.2.0 is default, need to check --> |
| 666 | + <command name="switch">gcc gcc/11.2.0</command> |
| 667 | + </modules> |
| 668 | + <modules compiler="gnugpu"> |
| 669 | + <command name="load">craype-accel-amd-gfx90a</command> |
| 670 | + <command name="load">rocm/5.4.0</command> |
| 671 | + </modules> |
| 672 | + <modules> |
| 673 | + <command name="load">cray-python/3.9.13.1</command> |
| 674 | + <command name="load">subversion/1.14.1</command> |
| 675 | + <command name="load">git/2.36.1</command> |
| 676 | + <command name="load">cmake/3.21.3</command> |
| 677 | + <command name="load">zlib/1.2.11</command> |
| 678 | + <command name="load">cray-hdf5-parallel/1.12.2.1</command> |
| 679 | + <command name="load">cray-netcdf-hdf5parallel/4.9.0.1</command> |
| 680 | + <command name="load">cray-parallel-netcdf/1.12.3.1</command> |
| 681 | + </modules> |
| 682 | + </module_system> |
| 683 | + <RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR> |
| 684 | + <EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT> |
| 685 | + <TEST_TPUT_TOLERANCE>0.1</TEST_TPUT_TOLERANCE> |
| 686 | + <TEST_MEMLEAK_TOLERANCE>0.25</TEST_MEMLEAK_TOLERANCE> |
| 687 | + <MAX_GB_OLD_TEST_DATA>0</MAX_GB_OLD_TEST_DATA> |
| 688 | + <environment_variables> |
| 689 | + <env name="NETCDF_PATH">$ENV{NETCDF_DIR}</env> |
| 690 | + <env name="PNETCDF_PATH">$ENV{PNETCDF_DIR}</env> |
| 691 | + <env name="NTASKS_PER_GPU"> </env> |
| 692 | + <env name="GPU_BIND_ARGS"> </env> |
| 693 | + </environment_variables> |
| 694 | + <environment_variables compiler="amdclang"> |
| 695 | + <env name="LD_LIBRARY_PATH">$ENV{CRAY_LIBSCI_DIR}/amd/4.0/x86_64/lib:$ENV{LD_LIBRARY_PATH}</env> |
| 696 | + </environment_variables> |
| 697 | + <environment_variables compiler=".*gpu.*"> |
| 698 | + <env name="NTASKS_PER_GPU">--ntasks-per-gpu=$SHELL{echo "`./xmlquery --value MAX_MPITASKS_PER_NODE`/8"|bc}</env> |
| 699 | + <env name="GPU_BIND_ARGS">--gpu-bind=closest</env> |
| 700 | + <env name="PNETCDF_HINTS">romio_cb_read=disable</env> |
| 701 | + <env name="MPICH_GPU_SUPPORT_ENABLED">0</env> |
| 702 | + </environment_variables> |
| 703 | + <environment_variables compiler=".*gpu.*" DEBUG="TRUE"> |
| 704 | + <env name="AMD_LOG_LEVEL">10</env> |
| 705 | + <env name="CRAY_ACC_DEBUG">3</env> |
| 706 | + </environment_variables> |
| 707 | + <environment_variables SMP_PRESENT="TRUE"> |
| 708 | + <env name="OMP_STACKSIZE">128M</env> |
| 709 | + <env name="OMP_PROC_BIND">spread</env> |
| 710 | + <env name="OMP_PLACES">threads</env> |
| 711 | + </environment_variables> |
| 712 | + <environment_variables compiler="gnu.*" mpilib="mpich"> |
| 713 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/gcc-11.2.0</env> |
| 714 | + </environment_variables> |
| 715 | + <environment_variables compiler="crayclang.*" mpilib="mpich"> |
| 716 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/crayclang-14.0.2</env> |
| 717 | + </environment_variables> |
| 718 | + <environment_variables compiler="amdclang.*" mpilib="mpich"> |
| 719 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/amdclang-15.0.0</env> |
| 720 | + </environment_variables> |
| 721 | + </machine> |
| 722 | + |
589 | 723 | <machine MACH="crusher"> |
590 | 724 | <DESC>Crusher. NCCS moderate-security system that contains similar hardware and software as the upcoming Frontier system at ORNL. 192 AMD EPYC 7A53 64C nodes, 128 hwthreads, 512GB DDR4, 4 MI250X GPUs</DESC> |
591 | 725 | <NODENAME_REGEX>.*crusher.*</NODENAME_REGEX> |
592 | 726 | <OS>CNL</OS> |
593 | 727 | <COMPILERS>gnu,crayclang,amdclang,gnugpu,crayclanggpu,amdclanggpu</COMPILERS> |
594 | 728 | <MPILIBS>mpich</MPILIBS> |
595 | | - <PROJECT>cli133_crusher</PROJECT> |
596 | | - <CIME_OUTPUT_ROOT>/gpfs/alpine/cli133/proj-shared/$ENV{USER}/e3sm_scratch/crusher</CIME_OUTPUT_ROOT> |
597 | | - <DIN_LOC_ROOT>/gpfs/alpine/cli115/world-shared/e3sm/inputdata</DIN_LOC_ROOT> |
598 | | - <DIN_LOC_ROOT_CLMFORC>/gpfs/alpine/cli115/world-shared/e3sm/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC> |
| 729 | + <PROJECT>cli115</PROJECT> |
| 730 | + <SAVE_TIMING_DIR>/lustre/orion/cli115/world-shared/crusher</SAVE_TIMING_DIR> |
| 731 | + <SAVE_TIMING_DIR_PROJECTS>.*</SAVE_TIMING_DIR_PROJECTS> |
| 732 | + <CIME_OUTPUT_ROOT>/lustre/orion/$PROJECT/proj-shared/$ENV{USER}/e3sm_scratch/crusher</CIME_OUTPUT_ROOT> |
| 733 | + <DIN_LOC_ROOT>/lustre/orion/cli115/world-shared/e3sm/inputdata</DIN_LOC_ROOT> |
| 734 | + <DIN_LOC_ROOT_CLMFORC>/lustre/orion/cli115/world-shared/e3sm/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC> |
599 | 735 | <DOUT_S_ROOT>$CIME_OUTPUT_ROOT/archive/$CASE</DOUT_S_ROOT> |
600 | | - <BASELINE_ROOT>/gpfs/alpine/cli133/world-shared/e3sm/baselines/$COMPILER</BASELINE_ROOT> |
601 | | - <CCSM_CPRNC>/gpfs/alpine/cli133/world-shared/e3sm/tools/cprnc/cprnc</CCSM_CPRNC> |
| 736 | + <BASELINE_ROOT>/lustre/orion/cli115/world-shared/e3sm/baselines/crusher/$COMPILER</BASELINE_ROOT> |
| 737 | + <CCSM_CPRNC>/lustre/orion/cli115/world-shared/e3sm/tools/cprnc/cprnc</CCSM_CPRNC> |
602 | 738 | <GMAKE_J>8</GMAKE_J> |
603 | 739 | <NTEST_PARALLEL_JOBS>1</NTEST_PARALLEL_JOBS> |
604 | 740 | <BATCH_SYSTEM>slurm</BATCH_SYSTEM> |
|
630 | 766 | <modules compiler="crayclang.*"> |
631 | 767 | <command name="reset"></command> |
632 | 768 | <command name="switch">PrgEnv-cray PrgEnv-cray/8.3.3</command> |
633 | | - <command name="switch">cce cce/14.0.2</command> |
| 769 | + <command name="switch">cce cce/15.0.1</command> |
| 770 | + <!-- craype module to address tcmalloc runtime errors at startup --> |
| 771 | + <!-- tcmalloc.cc:647 Attempt to free invalid pointer --> |
| 772 | + <command name="switch">craype craype/2.7.20</command> |
634 | 773 | </modules> |
635 | 774 | <modules compiler="crayclanggpu"> |
636 | 775 | <command name="load">craype-accel-amd-gfx90a</command> |
|
654 | 793 | <command name="load">rocm/5.4.0</command> |
655 | 794 | </modules> |
656 | 795 | <modules> |
657 | | - <command name="load">cray-python/3.9.12.1</command> |
| 796 | + <command name="load">cray-python/3.9.13.1</command> |
658 | 797 | <command name="load">subversion/1.14.1</command> |
659 | 798 | <command name="load">git/2.36.1</command> |
660 | 799 | <command name="load">cmake/3.21.3</command> |
661 | | - <command name="load">cray-hdf5-parallel/1.12.1.1</command> |
662 | | - <command name="load">cray-netcdf-hdf5parallel/4.8.1.1</command> |
663 | | - <command name="load">cray-parallel-netcdf/1.12.1.7</command> |
| 800 | + <command name="load">zlib/1.2.11</command> |
| 801 | + <command name="load">cray-hdf5-parallel/1.12.2.1</command> |
| 802 | + <command name="load">cray-netcdf-hdf5parallel/4.9.0.1</command> |
| 803 | + <command name="load">cray-parallel-netcdf/1.12.3.1</command> |
664 | 804 | </modules> |
665 | 805 | </module_system> |
666 | 806 | <RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR> |
|
692 | 832 | <env name="OMP_PROC_BIND">spread</env> |
693 | 833 | <env name="OMP_PLACES">threads</env> |
694 | 834 | </environment_variables> |
| 835 | + <environment_variables compiler="gnu.*" mpilib="mpich"> |
| 836 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/gcc-11.2.0</env> |
| 837 | + </environment_variables> |
| 838 | + <environment_variables compiler="crayclang.*" mpilib="mpich"> |
| 839 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/crayclang-14.0.2</env> |
| 840 | + </environment_variables> |
| 841 | + <environment_variables compiler="amdclang.*" mpilib="mpich"> |
| 842 | + <env name="ADIOS2_DIR">/lustre/orion/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/amdclang-15.0.0</env> |
| 843 | + </environment_variables> |
695 | 844 | </machine> |
696 | 845 |
|
697 | 846 |
|
|
791 | 940 | <env name="OMP_PROC_BIND">spread</env> |
792 | 941 | <env name="OMP_PLACES">threads</env> |
793 | 942 | </environment_variables> |
| 943 | + <environment_variables compiler="crayclang-scream" mpilib="mpich"> |
| 944 | + <env name="ADIOS2_DIR">/gpfs/alpine/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/crayclang-14.0.2</env> |
| 945 | + </environment_variables> |
794 | 946 | </machine> |
795 | 947 |
|
796 | 948 |
|
|
876 | 1028 | <env name="OMP_PROC_BIND">spread</env> |
877 | 1029 | <env name="OMP_PLACES">threads</env> |
878 | 1030 | </environment_variables> |
| 1031 | + <environment_variables compiler="crayclang-scream" mpilib="mpich"> |
| 1032 | + <env name="ADIOS2_DIR">/gpfs/alpine/cli133/world-shared/3rdparty/adios2/2.8.3.patch/cray-mpich-8.1.17/crayclang-14.0.2</env> |
| 1033 | + </environment_variables> |
879 | 1034 | </machine> |
880 | 1035 |
|
881 | 1036 |
|
|
4655 | 4810 | <DOUT_S_ROOT>$CIME_OUTPUT_ROOT/archive/$CASE</DOUT_S_ROOT> |
4656 | 4811 | <BASELINE_ROOT>/home/baselines/$COMPILER</BASELINE_ROOT> |
4657 | 4812 | <CCSM_CPRNC>/home/tools/cprnc/cprnc</CCSM_CPRNC> |
4658 | | - <GMAKE_J>24</GMAKE_J> |
| 4813 | + <GMAKE_J>20</GMAKE_J> |
4659 | 4814 | <TESTS>e3sm_developer</TESTS> |
4660 | 4815 | <NTEST_PARALLEL_JOBS>8</NTEST_PARALLEL_JOBS> |
4661 | 4816 | <BATCH_SYSTEM>slurm</BATCH_SYSTEM> |
|
0 commit comments