@@ -3623,103 +3623,111 @@ commented out until "*** No rule to make target '.../libadios2pio-nm-lib.a'" iss
36233623 <DESC >ALCF Aurora, 10624 nodes, 2x52c SPR, 6x2s PVC, 2x512GB DDR5, 2x64GB CPU-HBM, 6x128GB GPU-HBM, Slingshot 11, PBSPro</DESC >
36243624 <NODENAME_REGEX >aurora-uan-.*</NODENAME_REGEX >
36253625 <OS >LINUX</OS >
3626- <COMPILERS >oneapi-ifx ,oneapi-ifxgpu,gnu </COMPILERS >
3627- <MPILIBS >mpich</MPILIBS >
3628- <CHARGE_ACCOUNT >CSC249ADSE15_CNDA</ CHARGE_ACCOUNT >
3629- <SAVE_TIMING_DIR >/lus/flare/projects/CSC249ADSE15_CNDA/performance_archive </SAVE_TIMING_DIR >
3626+ <COMPILERS >oneapi-ifxgpu ,oneapi-ifx </COMPILERS >
3627+ <MPILIBS >mpich,mpich1024 </MPILIBS >
3628+ <PROJECT >E3SM_Dec</ PROJECT >
3629+ <SAVE_TIMING_DIR >/lus/flare/projects/E3SMinput/baselines </SAVE_TIMING_DIR >
36303630 <SAVE_TIMING_DIR_PROJECTS >.*</SAVE_TIMING_DIR_PROJECTS >
3631- <CIME_OUTPUT_ROOT >/lus/flare/projects/CSC249ADSE15_CNDA /$USER/scratch</CIME_OUTPUT_ROOT >
3632- <DIN_LOC_ROOT >/lus/flare/projects/CSC249ADSE15_CNDA/inputdata </DIN_LOC_ROOT >
3633- <DIN_LOC_ROOT_CLMFORC >/lus/flare/projects/CSC249ADSE15_CNDA/inputdata /atm/datm7</DIN_LOC_ROOT_CLMFORC >
3631+ <CIME_OUTPUT_ROOT >/lus/flare/projects/$PROJECT /$USER/scratch</CIME_OUTPUT_ROOT >
3632+ <DIN_LOC_ROOT >/lus/flare/projects/E3SMinput/data </DIN_LOC_ROOT >
3633+ <DIN_LOC_ROOT_CLMFORC >/lus/flare/projects/E3SMinput/data /atm/datm7</DIN_LOC_ROOT_CLMFORC >
36343634 <DOUT_S_ROOT >$CIME_OUTPUT_ROOT/archive/$CASE</DOUT_S_ROOT >
3635- <BASELINE_ROOT >/lus/flare/projects/CSC249ADSE15_CNDA /baselines/$COMPILER</BASELINE_ROOT >
3636- <CCSM_CPRNC >/lus/flare/projects/CSC249ADSE15_CNDA /tools/cprnc/cprnc</CCSM_CPRNC >
3635+ <BASELINE_ROOT >/lus/flare/projects/E3SMinput /baselines/$COMPILER</BASELINE_ROOT >
3636+ <CCSM_CPRNC >/lus/flare/projects/E3SMinput /tools/cprnc/cprnc</CCSM_CPRNC >
36373637 <GMAKE_J >16</GMAKE_J >
3638- <TESTS >e3sm_developer </TESTS >
3638+ <TESTS >e3sm_integration </TESTS >
36393639 <NTEST_PARALLEL_JOBS >4</NTEST_PARALLEL_JOBS >
36403640 <BATCH_SYSTEM >pbspro</BATCH_SYSTEM >
36413641 <SUPPORTED_BY >e3sm</SUPPORTED_BY >
3642- <MAX_TASKS_PER_NODE >208 </MAX_TASKS_PER_NODE >
3643- <MAX_TASKS_PER_NODE compiler =" oneapi-ifxgpu" >104 </MAX_TASKS_PER_NODE >
3644- <MAX_MPITASKS_PER_NODE >104 </MAX_MPITASKS_PER_NODE >
3645- <MAX_MPITASKS_PER_NODE compiler =" oneapi-ifxgpu" >48 </MAX_MPITASKS_PER_NODE >
3646- <PROJECT_REQUIRED >FALSE </PROJECT_REQUIRED >
3642+ <MAX_TASKS_PER_NODE >102 </MAX_TASKS_PER_NODE >
3643+ <MAX_TASKS_PER_NODE compiler =" oneapi-ifxgpu" >96 </MAX_TASKS_PER_NODE >
3644+ <MAX_MPITASKS_PER_NODE >102 </MAX_MPITASKS_PER_NODE >
3645+ <MAX_MPITASKS_PER_NODE compiler =" oneapi-ifxgpu" >12 </MAX_MPITASKS_PER_NODE >
3646+ <PROJECT_REQUIRED >TRUE </PROJECT_REQUIRED >
36473647 <mpirun mpilib =" default" >
3648- <executable >mpiexec</executable >
3649- <!-- executable>numactl -m 2-3 mpiexec</executable--><!-- for HBM runs-->
3650- <arguments >
3651- <arg name =" total_num_tasks" >-np {{ total_tasks }} --label</arg >
3652- <arg name =" ranks_per_node" >-ppn {{ tasks_per_node }}</arg >
3653- <arg name =" ranks_bind" >-envall </arg >
3654- <arg name =" threads_per_rank" >-d $ENV{OMP_NUM_THREADS}</arg >
3655- <arg name =" gpu_maps" >$ENV{GPU_TILE_COMPACT}</arg >
3656- </arguments >
3648+ <executable >mpiexec</executable >
3649+ <!-- executable>numactl -m 2-3 mpiexec</executable--><!-- for HBM runs-->
3650+ <arguments >
3651+ <arg name =" total_num_tasks" >-np {{ total_tasks }} --label</arg >
3652+ <arg name =" ranks_per_node" >-ppn {{ tasks_per_node }}</arg >
3653+ <arg name =" ranks_bind" >--cpu-bind $ENV{RANKS_BIND} </arg >
3654+ <arg name =" threads_per_rank" >-d $ENV{OMP_NUM_THREADS} $ENV{RLIMITS }</arg >
3655+ <arg name =" gpu_maps" >$ENV{GPU_TILE_COMPACT}</arg >
3656+ </arguments >
36573657 </mpirun >
36583658 <module_system type =" module" allow_error =" true" >
3659- <init_path lang =" sh" >/lus/flare/projects/CSC249ADSE15_CNDA/modules/lmod.sh</init_path >
3660- <init_path lang =" csh" >/soft/sunspot_migrate/soft/packaging/lmod/lmod/init/csh</init_path >
3661- <init_path lang =" python" >/soft/sunspot_migrate/soft/packaging/lmod/lmod/init/env_modules_python.py</init_path >
3662- <cmd_path lang =" sh" >module</cmd_path >
3663- <cmd_path lang =" csh" >module</cmd_path >
3664- <cmd_path lang =" python" >/soft/sunspot_migrate/soft/packaging/lmod/lmod/libexec/lmod python</cmd_path >
3665- <modules >
3666- <command name =" load" >cmake</command >
3667- </modules >
3668- <modules compiler =" !gnu" >
3669- <command name =" load" >oneapi/eng-compiler/2024.04.15.002</command >
3670- </modules >
3671- <modules compiler =" oneapi-ifxgpu" >
3672- <command name =" load" >kokkos/git.7ff87a5-omp-sycl</command >
3673- </modules >
3674- <modules compiler =" gnu" >
3675- <command name =" unload" >spack-pe-gcc cmake</command >
3676- <command name =" load" >gcc/10.3.0</command >
3659+ <init_path lang =" sh" >/usr/share/lmod/lmod/init/sh</init_path >
3660+ <init_path lang =" csh" >/usr/share/lmod/lmod/init/csh</init_path >
3661+ <init_path lang =" python" >/usr/share/lmod/lmod/init/env_modules_python.py</init_path >
3662+ <cmd_path lang =" sh" >module</cmd_path >
3663+ <cmd_path lang =" csh" >module</cmd_path >
3664+ <cmd_path lang =" python" >/usr/share/lmod/lmod/libexec/lmod python</cmd_path >
3665+ <modules >
3666+ <command name =" use" >/lus/flare/projects/E3SMinput/soft/modulefiles</command >
3667+ <command name =" load" >cmake/3.31.8</command >
3668+ <command name =" load" >oneapi/release/2025.2.0</command >
3669+ <command name =" load" >netcdf/4.9.3c-4.6.2f</command >
3670+ <command name =" load" >pnetcdf/1.14.0</command >
3671+ <command name =" load" >adios2/2.10.2</command >
36773672 </modules >
3678- </ module_system >
3679- < RUNDIR >$CIME_OUTPUT_ROOT/$CASE/run</ RUNDIR >
3680- < EXEROOT >$CIME_OUTPUT_ROOT/$CASE/bld</ EXEROOT >
3681- < environment_variables >
3682- < env name = " NETCDF_C_PATH " >/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002</ env >
3683- < env name = " NETCDF_FORTRAN_PATH " >/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002</ env >
3684- < env name = " PNETCDF_PATH " >/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002</ env >
3685- < env name = " LD_LIBRARY_PATH " >/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/lib:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/lib:$ENV{LD_LIBRARY_PATH}</ env >
3686- <env name =" PATH " >/lus/flare/projects/CSC249ADSE15_CNDA/software/pnetcdf/1.12.3/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-fortran/4.6.1/oneapi.eng.2024.04.15.002/bin:/lus/flare/projects/CSC249ADSE15_CNDA/software/netcdf-c/4.9.2/oneapi.eng.2024.04.15.002/bin:$ENV{PATH} </env >
3687- </ environment_variables >
3688- < environment_variables DEBUG = " TRUE " >
3689- <env name =" HYDRA_TOPO_DEBUG " >1 </env >
3673+ < modules mpilib = " mpich1024 " >
3674+ < command name = " load " >mpich-config/collective-tuning/1024</ command >
3675+ </ modules >
3676+ </ module_system >
3677+ < RUNDIR >$CIME_OUTPUT_ROOT/$CASE/run</ RUNDIR >
3678+ < EXEROOT >$CIME_OUTPUT_ROOT/$CASE/bld</ EXEROOT >
3679+ < MAX_GB_OLD_TEST_DATA >0</ MAX_GB_OLD_TEST_DATA >
3680+ < environment_variables >
3681+ <env name =" MPIR_CVAR_CH4_OFI_EAGER_THRESHOLD " > </env >
3682+ < env name = " FI_CXI_DEFAULT_CQ_SIZE " >131072</ env >
3683+ < env name = " FI_CXI_CQ_FILL_PERCENT " >20</ env >
3684+ <env name =" RLIMITS " > </env >
36903685 </environment_variables >
36913686 <environment_variables compiler =" oneapi-ifxgpu" >
36923687 <env name =" ONEAPI_DEVICE_SELECTOR" >level_zero:gpu</env >
3693- <env name =" ONEAPI_MPICH_GPU" >NO_GPU</env >
3694- <env name =" MPIR_CVAR_ENABLE_GPU" >0</env >
3688+ <env name =" UR_L0_USE_DRIVER_INORDER_LISTS" >1</env >
3689+ <env name =" UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS" >1</env >
3690+ <env name =" UR_L0_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE" >1</env >
3691+ <env name =" FI_MR_CACHE_MONITOR" >disabled</env >
3692+ <env name =" FI_CXI_OVFLOW_BUF_SIZE" >8388608</env >
3693+ <env name =" PALS_PING_PERIOD" >240</env >
3694+ <env name =" PALS_RPC_TIMEOUT" >240</env >
3695+ <env name =" SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE" >1</env >
3696+ <env name =" SYCL_PI_LEVEL_ZERO_DISABLE_USM_ALLOCATOR" >1</env >
3697+ <env name =" SYCL_PI_LEVEL_ZERO_USM_RESIDENT" >0x001</env >
3698+ <env name =" UR_L0_USE_DRIVER_INORDER_LISTS" >1</env >
3699+ <env name =" UR_L0_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE" >1</env >
3700+
3701+ <env name =" MPIR_CVAR_ENABLE_GPU" >1</env >
36953702 <env name =" romio_cb_read" >disable</env >
36963703 <env name =" romio_cb_write" >disable</env >
3697- <env name =" SYCL_CACHE_PERSISTENT" >1</env >
36983704 <env name =" GATOR_INITIAL_MB" >4000MB</env >
36993705 <env name =" GATOR_DISABLE" >0</env >
3700- <env name =" GPU_TILE_COMPACT" >/soft/tools/mpi_wrapper_utils/gpu_tile_compact.sh</env >
3701- <env name =" FI_CXI_DEFAULT_CQ_SIZE" >131072</env >
3702- <env name =" FI_CXI_CQ_FILL_PERCENT" >20</env >
3703- <env name =" Kokkos_ROOT" >$ENV{KOKKOS_ROOT}</env >
3706+ <env name =" GPU_TILE_COMPACT" >/lus/flare/projects/E3SMinput/tools/mpi_wrapper_utils/gpu_tile_compact.sh</env >
3707+ <env name =" RANKS_BIND" >list:1-8:9-16:17-24:25-32:33-40:41-48:53-60:61-68:69-76:77-84:85-92:93-100 --gpu-bind list:0.0:0.1:1.0:1.1:2.0:2.1:3.0:3.1:4.0:4.1:5.0:5.1 --mem-bind list:0:0:0:0:0:0:1:1:1:1:1:1</env >
37043708 <env name =" ZES_ENABLE_SYSMAN" >1</env >
3705- <env name =" ZEX_NUMBER_OF_CCS" >0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4</env >
3709+ <!-- default is ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE: enable this to run 4 MPI/tile or 48 MPI/node
3710+ <env name="ZEX_NUMBER_OF_CCS">0:4,1:4,2:4,3:4:4:4,5:4</env>-->
3711+ <!-- <env name="ZE_FLAT_DEVICE_HIERARCHY">FLAT</env>
3712+ <env name="ZEX_NUMBER_OF_CCS">0:4,1:4,2:4,3:4:4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4</env>-->
37063713 </environment_variables >
37073714 <environment_variables compiler =" oneapi-ifx" >
37083715 <env name =" LIBOMPTARGET_DEBUG" >0</env ><!-- default 0, max 5 -->
37093716 <env name =" OMP_TARGET_OFFLOAD" >DISABLED</env ><!-- default OMP_TARGET_OFFLOAD=MANDATORY-->
3710- <env name =" FI_CXI_DEFAULT_CQ_SIZE" >131072</env >
3711- <env name =" FI_CXI_CQ_FILL_PERCENT" >20</env >
37123717 <env name =" MPIR_CVAR_ENABLE_GPU" >0</env >
37133718 <env name =" GPU_TILE_COMPACT" > </env >
3719+ <env name =" RANKS_BIND" >core</env >
37143720 </environment_variables >
3715- <environment_variables BUILD_THREADED =" TRUE" compiler = " !gnu " >
3716- <env name =" KMP_AFFINITY" >verbose, granularity=thread ,balanced</env >
3721+ <environment_variables BUILD_THREADED =" TRUE" >
3722+ <env name =" KMP_AFFINITY" >granularity=core ,balanced</env >
37173723 <env name =" OMP_STACKSIZE" >128M</env >
37183724 </environment_variables >
3719- <environment_variables BUILD_THREADED =" TRUE" compiler =" gnu" >
3720- <env name =" OMP_PLACES" >threads</env >
3721- <env name =" OMP_STACKSIZE" >128M</env >
3725+ <environment_variables DEBUG =" TRUE" >
3726+ <env name =" RLIMITS" >--rlimits CORE</env >
37223727 </environment_variables >
3728+ <resource_limits DEBUG =" TRUE" >
3729+ <resource name =" RLIMIT_CORE" >-1</resource >
3730+ </resource_limits >
37233731 <resource_limits >
37243732 <resource name =" RLIMIT_STACK" >-1</resource >
37253733 </resource_limits >
0 commit comments