@@ -318,6 +318,86 @@ srun ./select_gpu ${EXE_DIR}/TestGaussian 1024 1024 1024 pencils a2av no-reorder
318318rm -rf ./select_gpu
319319```
320320
321+ # Profiling on LUMI
322+
323+ ## rocprof
324+
325+ ```
326+ #!/bin/bash -l
327+ #
328+ #SBATCH --job-name=opalx1
329+ #SBATCH --error=opalx-%j.error
330+ #SBATCH --output=opalx-2-%j.out
331+ #SBATCH --time=00:05:00
332+ #SBATCH --partition=standard-g
333+ #SBATCH --nodes 1
334+ #SBATCH --ntasks-per-core=1
335+ #SBATCH -c 56 --threads-per-core=1
336+ #SBATCH --ntasks-per-node=1
337+ #SBATCH --gpus-per-node=8
338+ #SBATCH --account=project_465001705
339+ #SBATCH --hint=nomultithread
340+ #SBATCH --hint=exclusive
341+ CPU_BIND="map_cpu:49,57,17,25,1,9,33,41"
342+ export MPICH_GPU_SUPPORT_ENABLED=1
343+
344+ ulimit -s unlimited
345+ export EXE_DIR=/users/adelmann/sandbox/opalx/build/src/
346+ module load cray-python/3.11.7
347+ module use /appl/local/containers/test-modules
348+ module load LUMI/24.03 partition/G cpeAMD rocm/6.1.3 buildtools/24.03
349+
350+ cat << EOF > select_gpu
351+ #!/bin/bash
352+ export HIP_VISIBLE_DEVICES=\$SLURM_LOCALID
353+ exec \$*
354+ EOF
355+ chmod +x ./select_gpu
356+ srun ./select_gpu rocprof --hip-trace ${EXE_DIR}/opalx input.in --info 5
357+ rm -rf ./select_gpu
358+
359+ ```
360+
361+
362+ ## omniperf
363+
364+ ```
365+ #!/bin/bash -l
366+ #
367+ #SBATCH --job-name=opalx1
368+ #SBATCH --error=opalx-%j.error
369+ #SBATCH --output=opalx-2-%j.out
370+ #SBATCH --time=00:05:00
371+ #SBATCH --partition=standard-g
372+ #SBATCH --nodes 1
373+ #SBATCH --ntasks-per-core=1
374+ #SBATCH -c 56 --threads-per-core=1
375+ #SBATCH --ntasks-per-node=1
376+ #SBATCH --gpus-per-node=8
377+ #SBATCH --account=project_465001705
378+ #SBATCH --hint=nomultithread
379+ #SBATCH --hint=exclusive
380+ CPU_BIND="map_cpu:49,57,17,25,1,9,33,41"
381+ export MPICH_GPU_SUPPORT_ENABLED=1
382+
383+ ulimit -s unlimited
384+ export EXE_DIR=/users/adelmann/sandbox/opalx/build/src/
385+ module load cray-python/3.11.7
386+ module use /appl/local/containers/test-modules
387+ module load LUMI/24.03 partition/G cpeAMD rocm/6.1.3 buildtools/24.03
388+ module load omniperf
389+ cat << EOF > select_gpu
390+ #!/bin/bash
391+ #export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID
392+ export HIP_VISIBLE_DEVICES=\$SLURM_LOCALID
393+ exec \$*
394+ EOF
395+ chmod +x ./select_gpu
396+ srun ./select_gpu omniperf profile --name opalx --roof-only --kernel-names -- ${EXE_DIR}/opalx input.in --info 5
397+ rm -rf ./select_gpu
398+ ```
399+
400+
321401# Profiling IPPL MPI calls
322402
323403You can use the mpiP tool (https://github.com/LLNL/mpiP ) to get statistics about the MPI calls in IPPL.
0 commit comments