diff --git a/.github/workflows/darshan_ldms_test_ci.yml b/.github/workflows/darshan_ldms_test_ci.yml index 08b0540a3..c7ff1500f 100644 --- a/.github/workflows/darshan_ldms_test_ci.yml +++ b/.github/workflows/darshan_ldms_test_ci.yml @@ -30,6 +30,7 @@ jobs: sudo apt-get update -y sudo apt-get install openmpi-bin libopenmpi-dev sudo apt-get install libjansson-dev + sudo apt-get install python3-docutils - name: Clone LDMS uses: actions/checkout@v3 with: diff --git a/.github/workflows/end_to_end_regression_aurora.yml b/.github/workflows/end_to_end_regression_aurora.yml new file mode 100644 index 000000000..d731b2f96 --- /dev/null +++ b/.github/workflows/end_to_end_regression_aurora.yml @@ -0,0 +1,24 @@ +name: End-to-end Testing (regression) Aurora + +on: + push: + branches: + - main + paths: + - darshan-runtime/** + - .github/workflows/end_to_end_regression_aurora.yml + workflow_dispatch: + +jobs: + trigger_alcf_aurora_regression_ci: + runs-on: ubuntu-latest + steps: + - name: Get branch/tag name + run: echo "GITHUB_REF_NAME=$(echo ${GITHUB_REF} | cut --complement -d/ -f1,2)" >> $GITHUB_ENV + - uses: eic/trigger-gitlab-ci@v3 + with: + url: https://gitlab-ci.alcf.anl.gov + project_id: 174 + token: ${{ secrets.ALCF_GITLAB_CI_TOKEN }} + variables: | + GITHUB_REF_NAME="${{ env.GITHUB_REF_NAME }}" diff --git a/darshan-test/regression/README.CRAY.txt b/darshan-test/regression/README.CRAY.txt index 250099619..f266f2278 100644 --- a/darshan-test/regression/README.CRAY.txt +++ b/darshan-test/regression/README.CRAY.txt @@ -11,17 +11,17 @@ To run regression tests: examples: # darshan runtime - ../configure --with-mem-align=64 --with-log-path=/projects/radix-io/snyder/darshan-logs --prefix=/home/snyder/working/darshan/install-theta --with-jobid-env=COBALT_JOBID --disable-cuserid --host=x86_64 CC=cc + ../configure --with-mem-align=64 --with-log-path=/projects/radix-io/snyder/darshan-logs --prefix=/home/snyder/working/darshan/install-polaris --with-jobid-env=COBALT_JOBID --disable-cuserid --host=x86_64 CC=cc make install # darshan util - ../configure --prefix=/home/snyder/working/darshan/install-theta + ../configure --prefix=/home/snyder/working/darshan/install-polaris make install - start a screen session by running "screen" note: this is suggested because the tests may take a while to complete depending on scheduler availability -- run regression tests using the corresponding system configuration (e.g., cray-module-alcf-theta for ALCF Theta) - ./run-all.sh /home/snyder/working/darshan/install-theta /projects/radix-io/snyder/darshan-test cray-module-alcf-theta +- run regression tests using the corresponding system configuration (e.g., alcf-polaris-cray-module for ALCF Polaris) + ./run-all.sh /home/snyder/working/darshan/install-polaris /projects/radix-io/snyder/darshan-test alcf-polaris-cray-module diff --git a/darshan-test/regression/README.txt b/darshan-test/regression/README.txt index e85821e26..530b9c9c1 100644 --- a/darshan-test/regression/README.txt +++ b/darshan-test/regression/README.txt @@ -16,15 +16,15 @@ The master script must be executed with three arguments: profiling configuration hooks on a standard workstation) - workstation-ld-preload (for dynamic instrumentation via LD_PRELOAD on a standard workstation) - - cray-module-alcf-theta (for instrumentation using a Darshan - Cray module on the Theta system @ ALCF only) - - cray-module-alcf-polaris (for instrumentation using a Darshan + - alcf-aurora-ld-preload (for instrumentation using LD_PRELOAD on the + Aurora system @ ALCF only) + - alcf-polaris-cray-module (for instrumentation using a Darshan Cray module on the Polaris system @ ALCF only) - - cray-module-nersc-perlmutter (for instrumentation using a Darshan + - nersc-perlmutter-cray-module (for instrumentation using a Darshan Cray module on the Perlmutter system @ NERSC only) - - cray-module-olcf-frontier (for instrumentation using a Darshan + - olcf-frontier-cray-module (for instrumentation using a Darshan Cray module on the Frontier system @ OLCF only) - - cray-module-olcf-crusher (for instrumentation using a Darshan + - olcf-crusher-cray-module (for instrumentation using a Darshan Cray module on the Crusher system @ OLCF only) The platform type should map to a subdirectory containing scripts diff --git a/darshan-test/regression/alcf-aurora-ld-preload/env.sh b/darshan-test/regression/alcf-aurora-ld-preload/env.sh new file mode 100755 index 000000000..79cb8774b --- /dev/null +++ b/darshan-test/regression/alcf-aurora-ld-preload/env.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# General notes +####################### + +# Script to set up the environment for tests on this platform. Must export +# the following environment variables: +# +# DARSHAN_CC: command to compile C programs +# DARSHAN_CXX: command to compile C++ programs +# DARSHAN_F90: command to compile Fortran90 programs +# DARSHAN_F77: command to compile Fortran77 programs +# DARSHAN_RUNJOB: command to execute a job and wait for its completion + +# This script may load optional modules (as in a Cray PE), set LD_PRELOAD +# variables (as in a dynamically linked environment), or generate mpicc +# wrappers (as in a statically linked environment). + +# Notes specific to this platform (alcf-aurora-ld-preload) +######################## +# Use default compilers on Aurora and ultimately use LD_PRELOAD +# (in the pbs-submit script) to instrument binaries. +# +# RUNJOB is responsible for submitting a PBS job, waiting for its +# completion, and checking its return status + +export DARSHAN_CC=mpicc +export DARSHAN_CXX=mpicxx +export DARSHAN_F77=mpifort +export DARSHAN_F90=mpifort + +export DARSHAN_RUNJOB=$DARSHAN_TESTDIR/$DARSHAN_PLATFORM/runjob.sh diff --git a/darshan-test/regression/alcf-aurora-ld-preload/pbs-submit.sh b/darshan-test/regression/alcf-aurora-ld-preload/pbs-submit.sh new file mode 100755 index 000000000..03058ee56 --- /dev/null +++ b/darshan-test/regression/alcf-aurora-ld-preload/pbs-submit.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +nprocs=$DARSHAN_DEFAULT_NPROCS +nnodes=`wc -l < $PBS_NODEFILE` +ppn=$((nprocs / nnodes)) + +mpiexec -n $nprocs --ppn $ppn --env LD_PRELOAD=$DARSHAN_RUNTIME_PATH/lib/libdarshan.so $DARSHAN_SCRIPT_ARGS +EXIT_STATUS=$? + +exit $EXIT_STATUS diff --git a/darshan-test/regression/alcf-aurora-ld-preload/runjob.sh b/darshan-test/regression/alcf-aurora-ld-preload/runjob.sh new file mode 100755 index 000000000..c642062d4 --- /dev/null +++ b/darshan-test/regression/alcf-aurora-ld-preload/runjob.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +PROJ=radix-io + +# can't pass args to scripts with PBS, so we assign to an env +# var and reference that in the submit script +export DARSHAN_SCRIPT_ARGS="$@" + +# set list of env vars to pass through to PBS job +ENV_VAR_LIST="DARSHAN_LOGFILE,DARSHAN_DEFAULT_NPROCS,DARSHAN_SCRIPT_ARGS,DARSHAN_RUNTIME_PATH" +if [ -n "${DXT_ENABLE_IO_TRACE+defined}" ]; then + ENV_VAR_LIST="$ENV_VAR_LIST,DXT_ENABLE_IO_TRACE" +fi + +# submit job and wait for it to return +jobid=`qsub -A $PROJ -q debug -l select=1,walltime=0:10:00,filesystems=home:flare,daos=default -v $ENV_VAR_LIST -o $DARSHAN_TMP/$$-tmp.out -e $DARSHAN_TMP/$$-tmp.err $DARSHAN_TESTDIR/$DARSHAN_PLATFORM/pbs-submit.sh` + +if [ $? -ne 0 ]; then + echo "Error: failed to qsub $@" + exit 1 +fi + +output="foo" +rc=0 + +# loop as long as qstat succeeds and shows information about job +while [ -n "$output" -a "$rc" -eq 0 ]; do + sleep 5 + output=`qstat $jobid` + rc=$? +done + +# extract final job exit code using qstat +job_exit=`qstat -f -x $jobid | grep Exit_status | tr -d '[:blank:]' | cut -d= -f2` +if [ $job_exit -ne 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/darshan-test/regression/cray-module-alcf-polaris/env.sh b/darshan-test/regression/alcf-polaris-cray-module/env.sh similarity index 95% rename from darshan-test/regression/cray-module-alcf-polaris/env.sh rename to darshan-test/regression/alcf-polaris-cray-module/env.sh index eca6e3d81..38793f69a 100755 --- a/darshan-test/regression/cray-module-alcf-polaris/env.sh +++ b/darshan-test/regression/alcf-polaris-cray-module/env.sh @@ -16,7 +16,7 @@ # variables (as in a dynamically linked environment), or generate mpicc # wrappers (as in a statically linked environment). -# Notes specific to this platform (cray-module-alcf-polaris) +# Notes specific to this platform (alcf-polaris-cray-module) ######################## # Use Cray's default compiler wrappers and load the module associated with # this darshan install diff --git a/darshan-test/regression/cray-module-alcf-polaris/pbs-submit.sh b/darshan-test/regression/alcf-polaris-cray-module/pbs-submit.sh similarity index 100% rename from darshan-test/regression/cray-module-alcf-polaris/pbs-submit.sh rename to darshan-test/regression/alcf-polaris-cray-module/pbs-submit.sh diff --git a/darshan-test/regression/cray-module-alcf-polaris/runjob.sh b/darshan-test/regression/alcf-polaris-cray-module/runjob.sh similarity index 100% rename from darshan-test/regression/cray-module-alcf-polaris/runjob.sh rename to darshan-test/regression/alcf-polaris-cray-module/runjob.sh diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/env.sh b/darshan-test/regression/nersc-perlmutter-cray-module/env.sh similarity index 94% rename from darshan-test/regression/cray-module-nersc-perlmutter/env.sh rename to darshan-test/regression/nersc-perlmutter-cray-module/env.sh index eea613892..0b41490e6 100755 --- a/darshan-test/regression/cray-module-nersc-perlmutter/env.sh +++ b/darshan-test/regression/nersc-perlmutter-cray-module/env.sh @@ -16,7 +16,7 @@ # variables (as in a dynamically linked environment), or generate mpicc # wrappers (as in a statically linked environment). -# Notes specific to this platform (cray-module-nersc-perlmutter) +# Notes specific to this platform (nersc-perlmutter-cray-module) ######################## # Use Cray's default compiler wrappers and load the module associated with # this darshan install diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/runjob.sh b/darshan-test/regression/nersc-perlmutter-cray-module/runjob.sh similarity index 100% rename from darshan-test/regression/cray-module-nersc-perlmutter/runjob.sh rename to darshan-test/regression/nersc-perlmutter-cray-module/runjob.sh diff --git a/darshan-test/regression/cray-module-nersc-perlmutter/slurm-submit.sl b/darshan-test/regression/nersc-perlmutter-cray-module/slurm-submit.sl similarity index 100% rename from darshan-test/regression/cray-module-nersc-perlmutter/slurm-submit.sl rename to darshan-test/regression/nersc-perlmutter-cray-module/slurm-submit.sl diff --git a/darshan-test/regression/cray-module-olcf-crusher/env.sh b/darshan-test/regression/olcf-crusher-cray-module/env.sh similarity index 95% rename from darshan-test/regression/cray-module-olcf-crusher/env.sh rename to darshan-test/regression/olcf-crusher-cray-module/env.sh index 88e89dc43..a7ecaad96 100755 --- a/darshan-test/regression/cray-module-olcf-crusher/env.sh +++ b/darshan-test/regression/olcf-crusher-cray-module/env.sh @@ -16,7 +16,7 @@ # variables (as in a dynamically linked environment), or generate mpicc # wrappers (as in a statically linked environment). -# Notes specific to this platform (cray-module-olcf-crusher) +# Notes specific to this platform (olcf-crusher-cray-module) ######################## # Use Cray's default compiler wrappers and LD_PRELOAD the darshan library # associated with this install diff --git a/darshan-test/regression/cray-module-olcf-crusher/runjob.sh b/darshan-test/regression/olcf-crusher-cray-module/runjob.sh similarity index 100% rename from darshan-test/regression/cray-module-olcf-crusher/runjob.sh rename to darshan-test/regression/olcf-crusher-cray-module/runjob.sh diff --git a/darshan-test/regression/cray-module-olcf-crusher/slurm-submit.sl b/darshan-test/regression/olcf-crusher-cray-module/slurm-submit.sl similarity index 100% rename from darshan-test/regression/cray-module-olcf-crusher/slurm-submit.sl rename to darshan-test/regression/olcf-crusher-cray-module/slurm-submit.sl diff --git a/darshan-test/regression/cray-module-olcf-frontier/env.sh b/darshan-test/regression/olcf-frontier-cray-module/env.sh similarity index 95% rename from darshan-test/regression/cray-module-olcf-frontier/env.sh rename to darshan-test/regression/olcf-frontier-cray-module/env.sh index 45c327eaf..4bb03731b 100755 --- a/darshan-test/regression/cray-module-olcf-frontier/env.sh +++ b/darshan-test/regression/olcf-frontier-cray-module/env.sh @@ -16,7 +16,7 @@ # variables (as in a dynamically linked environment), or generate mpicc # wrappers (as in a statically linked environment). -# Notes specific to this platform (cray-module-olcf-fontier) +# Notes specific to this platform (olcf-frontier-cray-module) ######################## # Use Cray's default compiler wrappers and load the module associated with # this darshan install diff --git a/darshan-test/regression/cray-module-olcf-frontier/runjob.sh b/darshan-test/regression/olcf-frontier-cray-module/runjob.sh similarity index 100% rename from darshan-test/regression/cray-module-olcf-frontier/runjob.sh rename to darshan-test/regression/olcf-frontier-cray-module/runjob.sh diff --git a/darshan-test/regression/cray-module-olcf-frontier/slurm-submit.sl b/darshan-test/regression/olcf-frontier-cray-module/slurm-submit.sl similarity index 100% rename from darshan-test/regression/cray-module-olcf-frontier/slurm-submit.sl rename to darshan-test/regression/olcf-frontier-cray-module/slurm-submit.sl