Skip to content

Commit e424328

Browse files
author
Phil Carns
authored
Merge pull request #1071 from wkliao/check_opmi_506
Set default MPI-IO hint cb_nodes to 1 when using OpenMPI 5.0.5 and priors
2 parents 294c650 + a7ebe51 commit e424328

File tree

2 files changed

+80
-19
lines changed

2 files changed

+80
-19
lines changed

darshan-runtime/configure.ac

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -598,14 +598,6 @@ if test "x$enable_darshan_runtime" = xyes ; then
598598
AC_MSG_ERROR(must provide --with-jobid-env=<name> argument to configure.)
599599
fi
600600

601-
__DARSHAN_LOG_HINTS_DEFAULT="romio_no_indep_rw=true;cb_nodes=4"
602-
if test "x$GOT_LOG_HINTS" != xyes ; then
603-
dnl use default hints
604-
AC_DEFINE_UNQUOTED([__DARSHAN_LOG_HINTS], "$__DARSHAN_LOG_HINTS_DEFAULT",
605-
[Comma-separated list of MPI-IO hints for log file write])
606-
__DARSHAN_LOG_HINTS=$__DARSHAN_LOG_HINTS_DEFAULT
607-
fi
608-
609601
# checks to see how we can print 64 bit values on this architecture
610602
gt_INTTYPES_PRI
611603
if test "x$PRI_MACROS_BROKEN" = x1; then
@@ -694,6 +686,48 @@ if test "x$enable_darshan_runtime" = xyes ; then
694686

695687
if test "x$is_ompi" = x1 ; then
696688
AC_DEFINE(HAVE_OPEN_MPI, 1, [Define if OpenMPI is being used])
689+
690+
dnl Check if version is 5.0.6 or later
691+
AC_MSG_CHECKING([Check if OpenMPI version is 5.0.6 or later])
692+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <mpi.h>
693+
#if (OMPI_MAJOR_VERSION*1000000 + OMPI_MINOR_VERSION*1000 + OMPI_RELEASE_VERSION < 5000006)
694+
choke me
695+
#endif
696+
]])], [ompi_ge_5_0_6=1], [ompi_ge_5_0_6=0])
697+
AC_MSG_RESULT([$ompi_ge_5_0_6])
698+
AC_DEFINE_UNQUOTED(OMPI_GE_5_0_6, [$ompi_ge_5_0_6],
699+
[Define if OpenMPI version is 5.0.6 or later])
700+
fi
701+
702+
if test "x$GOT_LOG_HINTS" != xyes ; then
703+
# set default hints
704+
__DARSHAN_LOG_HINTS_DEFAULT="romio_no_indep_rw=true;cb_nodes=4"
705+
706+
if test "x$is_ompi" = x1 ; then
707+
# OpenMPI 5.0.5 and priors contain a bug that can corrupt the
708+
# Darshan log files. The bug is related to file locking protocols
709+
# implemented when data sieving is enabled. Setting hint cb_nodes
710+
# to 1 can avoid the bug, but may make writing log files slow.
711+
# For more information, see PR 1070,
712+
# https://github.com/darshan-hpc/darshan/pull/1070
713+
#
714+
# However, 5.0.6 and later can cause serialization of file writes
715+
# See https://github.com/open-mpi/ompi/issues/13376
716+
# To mimize the performance impact of writing the log file, we set
717+
# hint cb_nodes to 1.
718+
#
719+
# Note hint romio_no_indep_rw is an MPI thing, not implemented in
720+
# OpenMPI.
721+
#
722+
__DARSHAN_LOG_HINTS_DEFAULT="cb_nodes=1"
723+
724+
# if test "x$ompi_ge_5_0_6" = x0 ; then
725+
# __DARSHAN_LOG_HINTS_DEFAULT="romio_no_indep_rw=true;cb_nodes=1"
726+
# else
727+
# __DARSHAN_LOG_HINTS_DEFAULT="romio_no_indep_rw=true;cb_nodes=4"
728+
# fi
729+
fi
730+
__DARSHAN_LOG_HINTS=$__DARSHAN_LOG_HINTS_DEFAULT
697731
fi
698732

699733
# determine if the MPI library includes MPI-IO functions or not
@@ -889,6 +923,10 @@ else
889923
is_ompi=0
890924
fi
891925

926+
# __DARSHAN_LOG_HINTS is used by both MPI and non-MPI configurations
927+
AC_DEFINE_UNQUOTED([__DARSHAN_LOG_HINTS], "$__DARSHAN_LOG_HINTS",
928+
[Comma-separated list of MPI-IO hints for log file write])
929+
892930
AC_SUBST(ENABLE_LD_PRELOAD, ["$enable_ld_preload"])
893931
AC_SUBST(ENABLE_CUSERID, ["$enable_cuserid"])
894932
AC_SUBST(ENABLE_GROUP_READABLE_LOGS, ["$enable_group_readable_logs"])
@@ -923,6 +961,7 @@ AC_SUBST(PNETCDF_PATH, ["$with_pnetcdf"])
923961
AC_SUBST(DAOS_PATH, ["$with_daos"])
924962
AC_SUBST(LDMS_PATH, ["$LDMS_HOME"])
925963
AC_SUBST(HAVE_OPEN_MPI, ["$is_ompi"])
964+
AC_SUBST(OMPI_GE_5_0_6, ["$ompi_ge_5_0_6"])
926965

927966
AM_CONDITIONAL(ENABLE_MMAP_LOGS, [test "x$enable_mmap_logs" = xyes])
928967
AM_CONDITIONAL(ENABLE_LDPRELOAD, [test "x$enable_ld_preload" = xyes])

darshan-runtime/test/tst_runs.sh

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,11 @@
11
#!/bin/bash
22

3+
# Note this script is run during "make check" and "make install" must
4+
# run before "make check".
5+
36
# Exit immediately if a command exits with a non-zero status.
47
set -e
58

6-
TODAY_DATE_PATH=`date "+%Y/%-m/%-d"`
7-
TST_DARSHAN_LOG_PATH="${TST_DARSHAN_LOG_PATH}/${TODAY_DATE_PATH}"
8-
mkdir -p ${TST_DARSHAN_LOG_PATH}
9-
10-
# check what file system is used
11-
echo "df -T ${TST_DARSHAN_LOG_PATH}"
12-
df -T ${TST_DARSHAN_LOG_PATH}
13-
14-
echo "findmnt -n -o FSTYPE --target ${TST_DARSHAN_LOG_PATH}"
15-
findmnt -n -o FSTYPE --target ${TST_DARSHAN_LOG_PATH}
16-
179
if test "x$USERNAME_ENV" = xno ; then
1810
USERNAME_ENV=$USER
1911
fi
@@ -31,6 +23,36 @@ else
3123
DARSHAN_CONFIG=../../darshan-util/darshan-config
3224
fi
3325
echo "DARSHAN_CONFIG=$DARSHAN_CONFIG"
26+
echo ""
27+
28+
TODAY_DATE_PATH=`date "+%Y/%-m/%-d"`
29+
LOG_PATH_ENV=`$DARSHAN_CONFIG --log-path-by-env`
30+
if test "x${LOG_PATH_ENV}" != x ; then
31+
if test "x${!LOG_PATH_ENV}" = x ; then
32+
echo ""
33+
echo "Warning: ---------------------------------------------------------"
34+
echo " Darshan was configure with --log-path-by-env set to $LOG_PATH_ENV"
35+
echo " but this run-time environment variable is currently not set !"
36+
echo " Darshan now uses the path set in configure option of --log-path :"
37+
echo " $TST_DARSHAN_LOG_PATH"
38+
echo "------------------------------------------------------------------"
39+
echo ""
40+
TST_DARSHAN_LOG_PATH="${TST_DARSHAN_LOG_PATH}/${TODAY_DATE_PATH}"
41+
else
42+
TST_DARSHAN_LOG_PATH="${!LOG_PATH_ENV}"
43+
fi
44+
else
45+
TST_DARSHAN_LOG_PATH="${TST_DARSHAN_LOG_PATH}/${TODAY_DATE_PATH}"
46+
fi
47+
echo "TST_DARSHAN_LOG_PATH=$TST_DARSHAN_LOG_PATH"
48+
mkdir -p ${TST_DARSHAN_LOG_PATH}
49+
50+
# check what file system is used
51+
echo "df -T ${TST_DARSHAN_LOG_PATH}"
52+
df -T ${TST_DARSHAN_LOG_PATH}
53+
54+
echo "findmnt -n -o FSTYPE --target ${TST_DARSHAN_LOG_PATH}"
55+
findmnt -n -o FSTYPE --target ${TST_DARSHAN_LOG_PATH}
3456

3557
$DARSHAN_CONFIG --all
3658

0 commit comments

Comments
 (0)