Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Tools/machines/greatlakes-umich/greatlakes_v100.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ INPUTS=inputs
# per node are 2x 2.4 GHz Intel Xeon Gold 6148
# note: the system seems to only expose cores (20 per socket),
# not hyperthreads (40 per socket)
export SRUN_CPUS_PER_TASK=20
export OMP_NUM_THREADS=${SRUN_CPUS_PER_TASK}
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

# GPU-aware MPI optimizations
GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
Expand Down
7 changes: 3 additions & 4 deletions Tools/machines/karolina-it4i/karolina_gpu.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@
#SBATCH -o stdout_%j
#SBATCH -e stderr_%j

# OpenMP threads per MPI rank
export OMP_NUM_THREADS=16
export SRUN_CPUS_PER_TASK=16

# set user rights to u=rwx;g=r-x;o=---
umask 0027

# OpenMP threads per MPI rank
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

# executable & inputs file or python interpreter & PICMI script here
EXE=./warpx.rz
INPUTS=./inputs_rz
Expand Down
3 changes: 2 additions & 1 deletion Tools/machines/lonestar6-tacc/lonestar6_a100.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#SBATCH -q regular
#SBATCH -C gpu
#SBATCH --exclusive
#SBATCH --cpus-per-task=32
#SBATCH --gpu-bind=none
#SBATCH --gpus-per-node=4
#SBATCH -o WarpX.o%j
Expand All @@ -27,7 +28,7 @@ INPUTS=inputs_small
export MPICH_OFI_NIC_POLICY=GPU

# threads for OpenMP and threaded compressors per MPI rank
export SRUN_CPUS_PER_TASK=32
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

# depends on https://github.com/ECP-WarpX/WarpX/issues/2009
#GPU_AWARE_MPI="amrex.the_arena_is_managed=0 amrex.use_gpu_aware_mpi=1"
Expand Down
5 changes: 3 additions & 2 deletions Tools/machines/perlmutter-nersc/perlmutter_cpu.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#SBATCH -A <proj>
#SBATCH -q regular
#SBATCH -C cpu
# 8 cores per chiplet, 2x SMP
#SBATCH --cpus-per-task=16
#SBATCH --ntasks-per-node=16
#SBATCH --exclusive
#SBATCH -o WarpX.o%j
Expand All @@ -30,10 +32,9 @@ INPUTS=inputs_small
# This will be our MPI rank assignment (2x8 is 16 ranks/node).

# threads for OpenMP and threaded compressors per MPI rank
export SRUN_CPUS_PER_TASK=16 # 8 cores per chiplet, 2x SMP
export OMP_PLACES=threads
export OMP_PROC_BIND=spread
export OMP_NUM_THREADS=${SRUN_CPUS_PER_TASK}
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

srun --cpu-bind=cores \
${EXE} ${INPUTS} \
Expand Down
4 changes: 2 additions & 2 deletions Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# A100 80GB (256 nodes)
#S BATCH -C gpu&hbm80g
#SBATCH --exclusive
#SBATCH --cpus-per-task=16
# ideally single:1, but NERSC cgroups issue
#SBATCH --gpu-bind=none
#SBATCH --ntasks-per-node=4
Expand All @@ -33,8 +34,7 @@ export MPICH_OFI_NIC_POLICY=GPU

# threads for OpenMP and threaded compressors per MPI rank
# note: 16 avoids hyperthreading (32 virtual cores, 16 physical)
export SRUN_CPUS_PER_TASK=16
export OMP_NUM_THREADS=${SRUN_CPUS_PER_TASK}
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

# GPU-aware MPI optimizations
GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
Expand Down
4 changes: 2 additions & 2 deletions Tools/machines/tioga-llnl/tioga_mi300a.sbatch
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#SBATCH -J WarpX
#S BATCH -A <proj> # project name not needed yet
#SBATCH -p mi300a
#SBATCH --cpus-per-task=16
#SBATCH --gpu-bind=none
#SBATCH --ntasks-per-node=4
#SBATCH --gpus-per-node=4
Expand All @@ -27,8 +28,7 @@ export MPICH_OFI_NIC_POLICY=GPU

# threads for OpenMP and threaded compressors per MPI rank
# note: 16 avoids hyperthreading (32 virtual cores, 16 physical)
export SRUN_CPUS_PER_TASK=16
export OMP_NUM_THREADS=${SRUN_CPUS_PER_TASK}
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}

# GPU-aware MPI optimizations
GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
Expand Down