Skip to content

Commit fa89f2f

Browse files
authored
Add profiling section for LUMI
Added profiling instructions for LUMI using rocprof and omniperf.
1 parent 1013cd4 commit fa89f2f

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

README.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,86 @@ srun ./select_gpu ${EXE_DIR}/TestGaussian 1024 1024 1024 pencils a2av no-reorder
318318
rm -rf ./select_gpu
319319
```
320320

321+
# Profiling on LUMI
322+
323+
## rocprof
324+
325+
```
326+
#!/bin/bash -l
327+
#
328+
#SBATCH --job-name=opalx1
329+
#SBATCH --error=opalx-%j.error
330+
#SBATCH --output=opalx-2-%j.out
331+
#SBATCH --time=00:05:00
332+
#SBATCH --partition=standard-g
333+
#SBATCH --nodes 1
334+
#SBATCH --ntasks-per-core=1
335+
#SBATCH -c 56 --threads-per-core=1
336+
#SBATCH --ntasks-per-node=1
337+
#SBATCH --gpus-per-node=8
338+
#SBATCH --account=project_465001705
339+
#SBATCH --hint=nomultithread
340+
#SBATCH --hint=exclusive
341+
CPU_BIND="map_cpu:49,57,17,25,1,9,33,41"
342+
export MPICH_GPU_SUPPORT_ENABLED=1
343+
344+
ulimit -s unlimited
345+
export EXE_DIR=/users/adelmann/sandbox/opalx/build/src/
346+
module load cray-python/3.11.7
347+
module use /appl/local/containers/test-modules
348+
module load LUMI/24.03 partition/G cpeAMD rocm/6.1.3 buildtools/24.03
349+
350+
cat << EOF > select_gpu
351+
#!/bin/bash
352+
export HIP_VISIBLE_DEVICES=\$SLURM_LOCALID
353+
exec \$*
354+
EOF
355+
chmod +x ./select_gpu
356+
srun ./select_gpu rocprof --hip-trace ${EXE_DIR}/opalx input.in --info 5
357+
rm -rf ./select_gpu
358+
359+
```
360+
361+
362+
## omniperf
363+
364+
```
365+
#!/bin/bash -l
366+
#
367+
#SBATCH --job-name=opalx1
368+
#SBATCH --error=opalx-%j.error
369+
#SBATCH --output=opalx-2-%j.out
370+
#SBATCH --time=00:05:00
371+
#SBATCH --partition=standard-g
372+
#SBATCH --nodes 1
373+
#SBATCH --ntasks-per-core=1
374+
#SBATCH -c 56 --threads-per-core=1
375+
#SBATCH --ntasks-per-node=1
376+
#SBATCH --gpus-per-node=8
377+
#SBATCH --account=project_465001705
378+
#SBATCH --hint=nomultithread
379+
#SBATCH --hint=exclusive
380+
CPU_BIND="map_cpu:49,57,17,25,1,9,33,41"
381+
export MPICH_GPU_SUPPORT_ENABLED=1
382+
383+
ulimit -s unlimited
384+
export EXE_DIR=/users/adelmann/sandbox/opalx/build/src/
385+
module load cray-python/3.11.7
386+
module use /appl/local/containers/test-modules
387+
module load LUMI/24.03 partition/G cpeAMD rocm/6.1.3 buildtools/24.03
388+
module load omniperf
389+
cat << EOF > select_gpu
390+
#!/bin/bash
391+
#export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID
392+
export HIP_VISIBLE_DEVICES=\$SLURM_LOCALID
393+
exec \$*
394+
EOF
395+
chmod +x ./select_gpu
396+
srun ./select_gpu omniperf profile --name opalx --roof-only --kernel-names -- ${EXE_DIR}/opalx input.in --info 5
397+
rm -rf ./select_gpu
398+
```
399+
400+
321401
# Profiling IPPL MPI calls
322402

323403
You can use the mpiP tool (https://github.com/LLNL/mpiP) to get statistics about the MPI calls in IPPL.

0 commit comments

Comments
 (0)