diff --git a/benchmarks/C/parallel_run.sh b/benchmarks/C/parallel_run.sh index 8f2593030..3ab232b8c 100755 --- a/benchmarks/C/parallel_run.sh +++ b/benchmarks/C/parallel_run.sh @@ -32,6 +32,8 @@ unset PNETCDF_HINTS fixed_length=23 +TEST_MPIIO_MODES="0 1" + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do @@ -44,7 +46,7 @@ for i in ${check_PROGRAMS} ; do fi OUT_PREFIX="${TESTOUTDIR}/$i" - for mpiio_mode in 0 1 ; do + for mpiio_mode in ${TEST_MPIIO_MODES} ; do if test "$mpiio_mode" = 1 ; then USEMPIO_HINTS="nc_pncio=disable" DRIVER_OUT_FILE="${OUT_PREFIX}.mpio" @@ -128,12 +130,14 @@ for i in ${check_PROGRAMS} ; do done # mpiio_mode DIFF_OPT="-q" - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc + if test "x$TEST_MPIIO_MODES" = "x0 1" ; then + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc + fi + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc done # safe_modes rm -f ${OUTDIR}/$i*nc* diff --git a/benchmarks/FLASH-IO/parallel_run.sh b/benchmarks/FLASH-IO/parallel_run.sh index d642ff000..1a705ee86 100755 --- a/benchmarks/FLASH-IO/parallel_run.sh +++ b/benchmarks/FLASH-IO/parallel_run.sh @@ -34,6 +34,8 @@ FILE_EXTS="ncmpi_chk_0000 ncmpi_plt_cnt_0000 ncmpi_plt_crn_0000" fixed_length=23 +TEST_MPIIO_MODES="0 1" + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do @@ -46,7 +48,7 @@ for i in ${check_PROGRAMS} ; do fi OUT_PREFIX="${TESTOUTDIR}/$i" - for mpiio_mode in 0 1 ; do + for mpiio_mode in ${TEST_MPIIO_MODES} ; do if test "$mpiio_mode" = 1 ; then USEMPIO_HINTS="nc_pncio=disable" DRIVER_OUT_FILE="${OUT_PREFIX}.mpio" @@ -134,12 +136,14 @@ for i in ${check_PROGRAMS} ; do done # mpiio_mode for ext in $FILE_EXTS ; do - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.mpio.ina.$ext.nc ---" - $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.mpio.ina.$ext.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.$ext.nc ---" - $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.$ext.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.ina.$ext.nc ---" - $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.ina.$ext.nc + if test "x$TEST_MPIIO_MODES" = "x0 1" ; then + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.mpio.ina.$ext.nc ---" + $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.mpio.ina.$ext.nc + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.$ext.nc ---" + $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.mpio.$ext.nc $OUT_PREFIX.pncio.$ext.nc + fi + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.pncio.$ext.nc $OUT_PREFIX.pncio.ina.$ext.nc ---" + $MPIRUN $NCMPIDIFF -q $OUT_PREFIX.pncio.$ext.nc $OUT_PREFIX.pncio.ina.$ext.nc done # ext done # safe_modes diff --git a/benchmarks/WRF-IO/parallel_run.sh b/benchmarks/WRF-IO/parallel_run.sh index 5f5f0a1ca..c532411c4 100755 --- a/benchmarks/WRF-IO/parallel_run.sh +++ b/benchmarks/WRF-IO/parallel_run.sh @@ -32,6 +32,8 @@ unset PNETCDF_HINTS fixed_length=23 +TEST_MPIIO_MODES="0 1" + for i in ${check_PROGRAMS} ; do for j in ${safe_modes} ; do @@ -44,7 +46,7 @@ for i in ${check_PROGRAMS} ; do fi OUT_PREFIX="${TESTOUTDIR}/$i" - for mpiio_mode in 0 1 ; do + for mpiio_mode in ${TEST_MPIIO_MODES} ; do if test "$mpiio_mode" = 1 ; then USEMPIO_HINTS="nc_pncio=disable" DRIVER_OUT_FILE="${OUT_PREFIX}.mpio" @@ -124,12 +126,14 @@ for i in ${check_PROGRAMS} ; do done # mpiio_mode DIFF_OPT="-q" - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc - # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc + if test "x$TEST_MPIIO_MODES" = "x0 1" ; then + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc + fi + # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc done # safe_modes rm -f ${OUTDIR}/$i*nc* diff --git a/examples/parallel_run.sh b/examples/parallel_run.sh index f9771f743..53a773de5 100755 --- a/examples/parallel_run.sh +++ b/examples/parallel_run.sh @@ -194,16 +194,16 @@ for i in ${check_PROGRAMS} ; do $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.mpio.ina.nc.$j # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.nc.$j ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.nc.$j - # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j + # echo "--- ncmpidiff $OUT_PREFIX.pncio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j done else # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc - # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc + # echo "--- ncmpidiff $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc fi done # safe_modes diff --git a/src/dispatchers/error_codes.c b/src/dispatchers/error_codes.c index e74a20d48..b78c447d4 100644 --- a/src/dispatchers/error_codes.c +++ b/src/dispatchers/error_codes.c @@ -289,6 +289,8 @@ ncmpi_strerror(int err) return "Variable fill value is inconsistent among processes."; case NC_EMULTIDEFINE_CMODE: return "File create mode is inconsistent among processes."; + case NC_EMULTIDEFINE_HINTS: + return "I/O hints are not consistent among processes."; case NC_EBADLOG: return "Unrecognized burst buffering log file format."; case NC_EFLUSHED: @@ -747,6 +749,8 @@ ncmpi_strerrno(int err) case (NC_EMULTIDEFINE_VAR_FILL_MODE): return "NC_EMULTIDEFINE_VAR_FILL_MODE"; case (NC_EMULTIDEFINE_VAR_FILL_VALUE): return "NC_EMULTIDEFINE_VAR_FILL_VALUE"; case (NC_EMULTIDEFINE_CMODE): return "NC_EMULTIDEFINE_CMODE"; + case (NC_EMULTIDEFINE_HINTS): return "NC_EMULTIDEFINE_HINTS"; + default: sprintf(unknown_str,"Unknown code %d",err); } diff --git a/src/drivers/pncio/pncio.h b/src/drivers/pncio/pncio.h index 035febd79..9c4e4530b 100644 --- a/src/drivers/pncio/pncio.h +++ b/src/drivers/pncio/pncio.h @@ -101,33 +101,32 @@ #define PNCIO_TYPE_OVERLAP 0x00000002 /* if contains overlapping regions */ #define PNCIO_TYPE_NEGATIVE 0x00000004 /* if one of displacements is negative */ -enum { - PNCIO_HINT_AUTO = 0, - PNCIO_HINT_ENABLE = 1, - PNCIO_HINT_DISABLE = 2 -}; +#define PNCIO_HINT_AUTO -1 +#define PNCIO_HINT_DISABLE 0 +#define PNCIO_HINT_ENABLE 1 typedef struct { int striping_factor; int striping_unit; - int cb_read; - int cb_write; + int start_iodevice; int cb_nodes; int cb_buffer_size; - int ds_read; - int ds_write; - int no_indep_rw; int ind_rd_buffer_size; int ind_wr_buffer_size; - int start_iodevice; + + int romio_cb_read; + int romio_cb_write; + int romio_ds_read; + int romio_ds_write; + int romio_no_indep_rw; + + /* Hints for Lustre file system */ + int lustre_overstriping_ratio; + + /* Hints set by PnetCDF internally */ + int lustre_num_osts; int *ranklist; - union { - struct { - int num_osts; - int overstriping_ratio; - } lustre; - } fs_hints; } PNCIO_Hints; typedef struct { diff --git a/src/drivers/pncio/pncio_close.c b/src/drivers/pncio/pncio_close.c index 4ecc09cc6..c62240f74 100644 --- a/src/drivers/pncio/pncio_close.c +++ b/src/drivers/pncio/pncio_close.c @@ -22,9 +22,11 @@ int PNCIO_File_close(PNCIO_File *fh) { int err = NC_NOERR; - err = close(fh->fd_sys); - if (err != 0) - err = ncmpii_error_posix2nc("close"); + if (fh->is_open) { + err = close(fh->fd_sys); + if (err != 0) + err = ncmpii_error_posix2nc("close"); + } if (fh->hints->ranklist != NULL) NCI_Free(fh->hints->ranklist); diff --git a/src/drivers/pncio/pncio_hints.c b/src/drivers/pncio/pncio_hints.c index 0e7db29a3..4b20016da 100644 --- a/src/drivers/pncio/pncio_hints.c +++ b/src/drivers/pncio/pncio_hints.c @@ -19,175 +19,113 @@ #include #include "pncio.h" -/*----< PNCIO_File_get_info() >-----------------------------------------------*/ -int PNCIO_File_get_info(PNCIO_File *fd, - MPI_Info *info_used) -{ - int err; - - err = MPI_Info_dup(fd->info, info_used); - if (err == MPI_SUCCESS) - err = NC_NOERR; - else - err = ncmpii_error_mpi2nc(err, "MPI_Info_dup"); - - return err; +#define GET_INFO_INT(key) { \ + MPI_Info_get(users_info, #key, MPI_MAX_INFO_VAL, value, &flag); \ + if (flag) { \ + MPI_Info_set(fd->info, #key, value); \ + fd->hints->key = atoi(value); \ + } \ } -/*----< Info_check_and_install_int() >---------------------------------------*/ -static -int Info_check_and_install_int(PNCIO_File *fd, - MPI_Info info, - const char *key, - int *local_cache) -{ - int intval, tmp_val, flag, ret = 0; - char value[MPI_MAX_INFO_VAL + 1]; +#define GET_INFO_STR(key) { \ + MPI_Info_get(users_info, #key, MPI_MAX_INFO_VAL, value, &flag); \ + if (flag) { \ + MPI_Info_set(fd->info, #key, value); \ + if (!strcasecmp(value, "true")) \ + fd->hints->key = PNCIO_HINT_ENABLE; \ + else if (!strcasecmp(value, "false")) \ + fd->hints->key = PNCIO_HINT_DISABLE; \ + else if (!strcasecmp(value, "automatic")) \ + fd->hints->key = PNCIO_HINT_AUTO; \ + else if (!strcasecmp(value, "enable")) \ + fd->hints->key = PNCIO_HINT_ENABLE; \ + else if (!strcasecmp(value, "disable")) \ + fd->hints->key = PNCIO_HINT_DISABLE; \ + } \ +} - MPI_Info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); - if (flag) { - intval = atoi(value); - tmp_val = intval; - - MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); - /* --BEGIN ERROR HANDLING-- */ - if (tmp_val != intval) { - ret = ncmpii_error_mpi2nc(MPI_ERR_NOT_SAME, __func__); - goto fn_exit; - } - /* --END ERROR HANDLING-- */ - - MPI_Info_set(fd->info, key, value); - /* some file systems do not cache hints in the fd struct */ - if (local_cache != NULL) - *local_cache = intval; - } -fn_exit: - return ret; +#ifdef PNETCDF_DEBUG +#define CHECK_HINT(hint) { \ + if (fd->hints->hint != root_hints->hint) { \ + char int_str[16]; \ + fprintf(stderr, "Error: inconsistent I/O hint %s (%d at rank %d, %d at root)\n", \ + #hint, fd->hints->hint, root_hints->hint); \ + /* overwrite local's hint with root's */ \ + snprintf(int_str, 16, "%d", root_hints->hint); \ + MPI_Info_set(fd->info, #hint, int_str); \ + err = NC_EMULTIDEFINE_HINTS; \ + } \ } +#else +#define CHECK_HINT(hint) { \ + if (fd->hints->hint != root_hints->hint) { \ + /* overwrite local's hint with root's */ \ + char int_str[16]; \ + snprintf(int_str, 16, "%d", root_hints->hint); \ + MPI_Info_set(fd->info, #hint, int_str); \ + err = NC_EMULTIDEFINE_HINTS; \ + } \ +} +#endif -/*----< Info_check_and_install_enabled() >-----------------------------------*/ +/*----< hint_consistency_check() >-------------------------------------------*/ static -int Info_check_and_install_enabled(PNCIO_File *fd, - MPI_Info info, - const char *key, - int *local_cache) +int hint_consistency_check(PNCIO_File *fd) { - int tmp_val, flag, ret = 0; - char value[MPI_MAX_INFO_VAL + 1]; + int err, rank; - MPI_Info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); - if (flag) { - if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) { - MPI_Info_set(fd->info, key, value); - *local_cache = PNCIO_HINT_ENABLE; - } else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) { - MPI_Info_set(fd->info, key, value); - *local_cache = PNCIO_HINT_DISABLE; - } else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) { - MPI_Info_set(fd->info, key, value); - *local_cache = PNCIO_HINT_AUTO; - /* treat the user-provided string like "enabled": either it is a - * hint ROMIO knows about and can support it, or ROMIO will not - * return the hint at all in the MPI_File_get_info info object - */ - } else if (!strcmp(value, "requested") || !strcmp(value, "REQUESTED")) { - MPI_Info_set(fd->info, key, "enable"); - *local_cache = PNCIO_HINT_ENABLE; - } - - tmp_val = *local_cache; - - MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); - /* --BEGIN ERROR HANDLING-- */ - if (tmp_val != *local_cache) { - ret = ncmpii_error_mpi2nc(MPI_ERR_NOT_SAME, __func__); - goto fn_exit; - } - /* --END ERROR HANDLING-- */ - } -fn_exit: - return ret; -} + MPI_Comm_rank(fd->comm, &rank); -/*----< Info_check_and_install_true() >--------------------------------------*/ -static -int Info_check_and_install_true(PNCIO_File *fd, - MPI_Info info, - const char *key, - int *local_cache) -{ - int flag, tmp_val, ret = 0; - char value[MPI_MAX_INFO_VAL + 1]; + err = NC_NOERR; - MPI_Info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); - if (flag) { - if (!strcmp(value, "true") || !strcmp(value, "TRUE")) { - MPI_Info_set(fd->info, key, value); - *local_cache = 1; - } else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) { - MPI_Info_set(fd->info, key, value); - *local_cache = 0; - } - tmp_val = *local_cache; - - MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); - /* --BEGIN ERROR HANDLING-- */ - if (tmp_val != *local_cache) { - ret = ncmpii_error_mpi2nc(MPI_ERR_NOT_SAME, __func__); - goto fn_exit; - } - /* --END ERROR HANDLING-- */ + if (rank == 0) + /* broadcast root's hints */ + MPI_Bcast(fd->hints, sizeof(PNCIO_Hints), MPI_BYTE, 0, fd->comm); + else { + PNCIO_Hints *root_hints; + root_hints = (PNCIO_Hints*) NCI_Malloc(sizeof(PNCIO_Hints)); + + /* broadcast root's hints */ + MPI_Bcast(root_hints, sizeof(PNCIO_Hints), MPI_BYTE, 0, fd->comm); + + /* check hints individually against root's */ + CHECK_HINT(striping_factor); + CHECK_HINT(striping_unit); + CHECK_HINT(start_iodevice); + CHECK_HINT(cb_nodes); + CHECK_HINT(cb_buffer_size); + CHECK_HINT(ind_rd_buffer_size); + CHECK_HINT(ind_wr_buffer_size); + + CHECK_HINT(romio_cb_read); + CHECK_HINT(romio_cb_write); + CHECK_HINT(romio_ds_read); + CHECK_HINT(romio_ds_write); + CHECK_HINT(romio_no_indep_rw); + + CHECK_HINT(lustre_overstriping_ratio); + + NCI_Free(root_hints); } -fn_exit: - return ret; -} -#if 0 -/*----< Info_check_and_install_str() >---------------------------------------*/ -static -int Info_check_and_install_str(PNCIO_File *fd, - MPI_Info info, - const char *key, - char **local_cache) -{ - int flag, ret = 0; - size_t len; - char value[MPI_MAX_INFO_VAL + 1]; + /* All NetCDF erro codes are negative */ + MPI_Allreduce(MPI_IN_PLACE, &err, 1, MPI_INT, MPI_MIN, fd->comm); - MPI_Info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); - if (flag) { - MPI_Info_set(fd->info, key, value); - len = (strlen(value) + 1) * sizeof(char); - *local_cache = NCI_Malloc(len); - if (*local_cache == NULL) { - ret = NC_ENOMEM; - goto fn_exit; - } - strncpy(*local_cache, value, len); - } -fn_exit: - return ret; + return err; } -#endif /*----< PNCIO_File_SetInfo() >------------------------------------------------*/ /* For PnetCDF, a file info object can only be passed to PnetCDF at file create * or open call, i.e. I/O hints cannot be changed after file create/open. * - * When users_info == MPI_INFO_NULL, this subroutine is an independent call. - * When users_info != MPI_INFO_NULL, this subroutine is a collective call, - * because it calls Info_check_and_install_xxx(), which checks the consistency - * of all hints values set in user's info object. - * - * TODO: instead of sync each hint, a better implementation is to have root - * bcast all hints and let each process checks inconsistency locally. + * This subroutine is a collective call, because it checks consistency of all + * hints among all processes. */ int PNCIO_File_SetInfo(PNCIO_File *fd, MPI_Info users_info) { - int nprocs; + int err=NC_NOERR, flag, nprocs; char value[MPI_MAX_INFO_VAL + 1]; if (users_info == MPI_INFO_NULL) @@ -206,16 +144,16 @@ PNCIO_File_SetInfo(PNCIO_File *fd, * collective buffering */ MPI_Info_set(fd->info, "romio_cb_read", "automatic"); - fd->hints->cb_read = PNCIO_HINT_AUTO; + fd->hints->romio_cb_read = PNCIO_HINT_AUTO; MPI_Info_set(fd->info, "romio_cb_write", "automatic"); - fd->hints->cb_write = PNCIO_HINT_AUTO; + fd->hints->romio_cb_write = PNCIO_HINT_AUTO; /* cb_nodes may be set later right after file open call */ fd->hints->cb_nodes = 0; /* hint indicating that no indep. I/O will be performed on this file */ MPI_Info_set(fd->info, "romio_no_indep_rw", "false"); - fd->hints->no_indep_rw = 0; + fd->hints->romio_no_indep_rw = 0; /* buffer size for data sieving in independent reads */ MPI_Info_set(fd->info, "ind_rd_buffer_size", PNCIO_IND_RD_BUFFER_SIZE_DFLT); @@ -229,9 +167,9 @@ PNCIO_File_SetInfo(PNCIO_File *fd, * sieving */ MPI_Info_set(fd->info, "romio_ds_read", "automatic"); - fd->hints->ds_read = PNCIO_HINT_AUTO; + fd->hints->romio_ds_read = PNCIO_HINT_AUTO; MPI_Info_set(fd->info, "romio_ds_write", "automatic"); - fd->hints->ds_write = PNCIO_HINT_AUTO; + fd->hints->romio_ds_write = PNCIO_HINT_AUTO; /* File striping parameters will be retrieved from the file system set, * once the file is opened. These parameters can also be customized by @@ -242,51 +180,43 @@ PNCIO_File_SetInfo(PNCIO_File *fd, fd->hints->striping_factor = 0; fd->hints->start_iodevice = -1; /* Lustre overstriping ratio. 0 or 1 means disabled */ - fd->hints->fs_hints.lustre.overstriping_ratio = 1; + fd->hints->lustre_overstriping_ratio = 1; /* add in user's info --------------------------------------------------*/ - Info_check_and_install_int(fd, users_info, "cb_buffer_size", - &fd->hints->cb_buffer_size); + + /* size of internal buffer to be used in collective reads and writes */ + GET_INFO_INT(cb_buffer_size); /* enable/disable collective buffering */ - Info_check_and_install_enabled(fd, users_info, "romio_cb_read", - &fd->hints->cb_read); - if (fd->hints->cb_read == PNCIO_HINT_DISABLE) { - /* romio_cb_read overrides no_indep_rw */ + GET_INFO_STR(romio_cb_read); + if (fd->hints->romio_cb_read == PNCIO_HINT_DISABLE) { + /* romio_cb_read overrides romio_no_indep_rw */ MPI_Info_set(fd->info, "romio_no_indep_rw", "false"); - fd->hints->no_indep_rw = PNCIO_HINT_DISABLE; + fd->hints->romio_no_indep_rw = PNCIO_HINT_DISABLE; } - Info_check_and_install_enabled(fd, users_info, "romio_cb_write", - &fd->hints->cb_write); - if (fd->hints->cb_write == PNCIO_HINT_DISABLE) { - /* romio_cb_write overrides no_indep_rw */ + GET_INFO_STR(romio_cb_write); + if (fd->hints->romio_cb_write == PNCIO_HINT_DISABLE) { + /* romio_cb_write overrides romio_no_indep_rw */ MPI_Info_set(fd->info, "romio_no_indep_rw", "false"); - fd->hints->no_indep_rw = PNCIO_HINT_DISABLE; + fd->hints->romio_no_indep_rw = PNCIO_HINT_DISABLE; } /* user intends to call collective I/O APIs only */ - Info_check_and_install_true(fd, users_info, "romio_no_indep_rw", - &fd->hints->no_indep_rw); - if (fd->hints->no_indep_rw == 1) { - /* if 'no_indep_rw' set, also hint that we will do - * collective buffering: if we aren't doing independent io, - * then we have to do collective */ + GET_INFO_STR(romio_no_indep_rw); + if (fd->hints->romio_no_indep_rw == PNCIO_HINT_ENABLE) { MPI_Info_set(fd->info, "romio_cb_write", "enable"); MPI_Info_set(fd->info, "romio_cb_read", "enable"); - fd->hints->cb_read = PNCIO_HINT_ENABLE; - fd->hints->cb_write = PNCIO_HINT_ENABLE; + fd->hints->romio_cb_read = PNCIO_HINT_ENABLE; + fd->hints->romio_cb_write = PNCIO_HINT_ENABLE; } /* enable/disable data sieving */ - Info_check_and_install_enabled(fd, users_info, "romio_ds_read", - &fd->hints->ds_read); - Info_check_and_install_enabled(fd, users_info, "romio_ds_write", - &fd->hints->ds_write); + GET_INFO_STR(romio_ds_read); + GET_INFO_STR(romio_ds_write); /* number of I/O aggregators */ - Info_check_and_install_int(fd, users_info, "cb_nodes", - &fd->hints->cb_nodes); + GET_INFO_INT(cb_nodes); /* check ill value */ if (fd->hints->cb_nodes > 0 && fd->hints->cb_nodes <= nprocs) { snprintf(value, MPI_MAX_INFO_VAL + 1, "%d", fd->hints->cb_nodes); @@ -297,30 +227,40 @@ PNCIO_File_SetInfo(PNCIO_File *fd, MPI_Info_set(fd->info, "cb_nodes", "0"); } - Info_check_and_install_int(fd, users_info, "ind_wr_buffer_size", - &fd->hints->ind_wr_buffer_size); - Info_check_and_install_int(fd, users_info, "ind_rd_buffer_size", - &fd->hints->ind_rd_buffer_size); + GET_INFO_INT(ind_wr_buffer_size); + GET_INFO_INT(ind_rd_buffer_size); /* file striping configuration */ - Info_check_and_install_int(fd, users_info, "striping_unit", - &fd->hints->striping_unit); - - Info_check_and_install_int(fd, users_info, "striping_factor", - &fd->hints->striping_factor); - - Info_check_and_install_int(fd, users_info, "start_iodevice", - &fd->hints->start_iodevice); + GET_INFO_INT(striping_unit); + GET_INFO_INT(striping_factor); + GET_INFO_INT(start_iodevice); /* Lustre overstriping ratio. 0 or 1 means disabled */ - Info_check_and_install_int(fd, users_info, "lustre_overstriping_ratio", - &fd->hints->fs_hints.lustre.overstriping_ratio); + GET_INFO_INT(lustre_overstriping_ratio); + + /* Check hint consistency among all processes */ + err = hint_consistency_check(fd); /* PnetCDF ignores the following hints. * cb_config_list * deferred_open */ - return NC_NOERR; + return err; +} + +/*----< PNCIO_File_get_info() >-----------------------------------------------*/ +int PNCIO_File_get_info(PNCIO_File *fd, + MPI_Info *info_used) +{ + int err; + + err = MPI_Info_dup(fd->info, info_used); + if (err == MPI_SUCCESS) + err = NC_NOERR; + else + err = ncmpii_error_mpi2nc(err, "MPI_Info_dup"); + + return err; } diff --git a/src/drivers/pncio/pncio_lustre_open.c b/src/drivers/pncio/pncio_lustre_open.c index d9a1692a6..0cd348727 100644 --- a/src/drivers/pncio/pncio_lustre_open.c +++ b/src/drivers/pncio/pncio_lustre_open.c @@ -826,7 +826,7 @@ assert(mpi_io_mode & MPI_MODE_CREATE); str_unit = fd->hints->striping_unit; str_factor = fd->hints->striping_factor; start_iodev = fd->hints->start_iodevice; - overstriping_ratio = fd->hints->fs_hints.lustre.overstriping_ratio; + overstriping_ratio = fd->hints->lustre_overstriping_ratio; /* obtain the total number of OSTs available */ total_num_OSTs = get_total_avail_osts(fd->filename); @@ -847,7 +847,7 @@ assert(mpi_io_mode & MPI_MODE_CREATE); * fd->hints->striping_factor = 0; * fd->hints->striping_unit = 0; * fd->hints->start_iodevice = -1; - * fd->hints->fs_hints.lustre.overstriping_ratio = 1; + * fd->hints->lustre_overstriping_ratio = 1; */ /* In many cases, the Lustre striping configuration of the file to be @@ -973,6 +973,7 @@ assert(mpi_io_mode & MPI_MODE_CREATE); err = ncmpii_error_posix2nc("Lustre set striping"); goto err_out; } + fd->is_open = 1; /* Obtain Lustre file striping parameters actually set. */ numOSTs = get_striping(fd->fd_sys, fd->filename, &pattern, @@ -993,6 +994,7 @@ assert(mpi_io_mode & MPI_MODE_CREATE); err = ncmpii_error_posix2nc("open"); goto err_out; } + fd->is_open = 1; char *env_str = getenv("MIMIC_STRIPE_SIZE"); if (env_str != NULL) @@ -1017,8 +1019,8 @@ assert(mpi_io_mode & MPI_MODE_CREATE); fd->hints->striping_factor = stripin_info[1]; fd->hints->start_iodevice = stripin_info[2]; if (fd->file_system == PNCIO_LUSTRE) { - fd->hints->fs_hints.lustre.num_osts = stripin_info[3]; - fd->hints->fs_hints.lustre.overstriping_ratio = stripin_info[1] / stripin_info[3]; + fd->hints->lustre_num_osts = stripin_info[3]; + fd->hints->lustre_overstriping_ratio = stripin_info[1] / stripin_info[3]; } if (rank > 0) { /* non-root processes */ @@ -1028,6 +1030,7 @@ assert(mpi_io_mode & MPI_MODE_CREATE); __FILE__,__LINE__, rank, fd->filename, strerror(errno)); return ncmpii_error_posix2nc("ioctl"); } + fd->is_open = 1; } /* construct cb_nodes rank list */ @@ -1035,10 +1038,10 @@ assert(mpi_io_mode & MPI_MODE_CREATE); MPI_Info_set(fd->info, "romio_filesystem_type", "LUSTRE:"); - snprintf(int_str, 16, "%d", fd->hints->fs_hints.lustre.num_osts); + snprintf(int_str, 16, "%d", fd->hints->lustre_num_osts); MPI_Info_set(fd->info, "lustre_num_osts", int_str); - snprintf(int_str, 16, "%d", fd->hints->fs_hints.lustre.overstriping_ratio); + snprintf(int_str, 16, "%d", fd->hints->lustre_overstriping_ratio); MPI_Info_set(fd->info, "lustre_overstriping_ratio", int_str); return err; @@ -1080,6 +1083,7 @@ static int wkl=0; if (wkl == 0 && rank == 0) { printf("\nxxxx %s at %d: %s ---- err = ncmpii_error_posix2nc("open"); goto err_out; } + fd->is_open = 1; /* Only root obtains the striping information and bcast to all other * processes. @@ -1119,18 +1123,18 @@ static int wkl=0; if (wkl == 0 && rank == 0) { printf("\nxxxx %s at %d: %s ---- fd->hints->striping_unit = stripin_info[0]; fd->hints->striping_factor = stripin_info[1]; fd->hints->start_iodevice = stripin_info[2]; - fd->hints->fs_hints.lustre.num_osts = stripin_info[3]; - fd->hints->fs_hints.lustre.overstriping_ratio = stripin_info[1] / stripin_info[3]; + fd->hints->lustre_num_osts = stripin_info[3]; + fd->hints->lustre_overstriping_ratio = stripin_info[1] / stripin_info[3]; /* construct cb_nodes rank list */ Lustre_set_cb_node_list(fd); MPI_Info_set(fd->info, "romio_filesystem_type", "LUSTRE:"); - snprintf(int_str, 16, "%d", fd->hints->fs_hints.lustre.num_osts); + snprintf(int_str, 16, "%d", fd->hints->lustre_num_osts); MPI_Info_set(fd->info, "lustre_num_osts", int_str); - snprintf(int_str, 16, "%d", fd->hints->fs_hints.lustre.overstriping_ratio); + snprintf(int_str, 16, "%d", fd->hints->lustre_overstriping_ratio); MPI_Info_set(fd->info, "lustre_overstriping_ratio", int_str); return err; diff --git a/src/drivers/pncio/pncio_lustre_wrcoll.c b/src/drivers/pncio/pncio_lustre_wrcoll.c index 03b0a59e9..40192f186 100644 --- a/src/drivers/pncio/pncio_lustre_wrcoll.c +++ b/src/drivers/pncio/pncio_lustre_wrcoll.c @@ -638,7 +638,7 @@ double curT = MPI_Wtime(); if (buf_view.count > 1) buf_view.rem = buf_view.len[0]; - if (fd->hints->cb_write == PNCIO_HINT_DISABLE) { + if (fd->hints->romio_cb_write == PNCIO_HINT_DISABLE) { /* collective write is explicitly disabled by user */ do_collect = 0; } @@ -700,14 +700,14 @@ double curT = MPI_Wtime(); NCI_Free(st_end_all); // if (myrank==0) printf("%s %d: do_collect=%d is_interleaved=%d buf_view size=%lld count=%lld is_contig=%d start_offset=%lld end_offset=%lld\n",__func__,__LINE__, do_collect,is_interleaved,buf_view.size,buf_view.count,buf_view.is_contig, start_offset,end_offset); - if (fd->hints->cb_write == PNCIO_HINT_ENABLE) { + if (fd->hints->romio_cb_write == PNCIO_HINT_ENABLE) { /* explicitly enabled by user */ do_collect = 1; } - else if (fd->hints->cb_write == PNCIO_HINT_AUTO) { + else if (fd->hints->romio_cb_write == PNCIO_HINT_AUTO) { // if (myrank==0) printf("%s %d: large_indv_req=%d cb_nodes=%d striping_factor=%d\n",__func__,__LINE__, large_indv_req,fd->hints->cb_nodes , fd->hints->striping_factor); /* Check if collective write is actually necessary, only when - * cb_write hint is set to PNCIO_HINT_AUTO. + * romio_cb_write hint is set to PNCIO_HINT_AUTO. * * Two typical access patterns can benefit from collective write. * 1) access file regions of all processes are interleaved, and @@ -811,7 +811,7 @@ double curT = MPI_Wtime(); */ LUSTRE_Calc_my_req(fd, buf_view.is_contig, &my_req, buf_idx); - if (fd->hints->ds_write != PNCIO_HINT_DISABLE) { + if (fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) { /* When data sieving is considered, below check the current file size * first. If the aggregate access region of this collective write is * beyond the current file size, then we can safely skip the read of @@ -1952,12 +1952,12 @@ int Exchange_data_recv( #endif srt_off_len->off[0] = others_req[j].offsets[start_pos[j]]; srt_off_len->len[0] = others_req[j].lens[start_pos[j]]; - } else if (fd->hints->ds_write == PNCIO_HINT_ENABLE) { + } else if (fd->hints->romio_ds_write == PNCIO_HINT_ENABLE) { /* skip building of srt_off_len and proceed to read-modify-write */ build_srt_off_len = 0; /* assuming there are holes */ hole = 1; - } else if (fd->hints->ds_write == PNCIO_HINT_AUTO) { + } else if (fd->hints->romio_ds_write == PNCIO_HINT_AUTO) { if (DO_HEAP_MERGE(nprocs_recv, srt_off_len->num)) { /* When the number of sorted offset-length lists or the total * number of offset-length pairs are too large, the heap-merge sort @@ -1983,7 +1983,7 @@ int Exchange_data_recv( fd->write_counter[6] = MAX(fd->write_counter[6], nprocs_recv); } #endif - } else { /* if (fd->hints->ds_write == PNCIO_HINT_DISABLE) */ + } else { /* if (fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) */ /* User explicitly disable data sieving to skip read-modify-write. * Whether or not there is a hole is not important. However, * srt_off_len must be constructed to merge all others_req[] into a @@ -2027,11 +2027,11 @@ int Exchange_data_recv( hole = (srt_off_len->num > 1); } -// printf("%s at %d: ds_write=%s build_srt_off_len=%d hole=%d skip_read=%d srt_off_len->num=%lld\n",__func__,__LINE__, (fd->hints->ds_write == PNCIO_HINT_ENABLE)?"ENABLE": (fd->hints->ds_write == PNCIO_HINT_DISABLE)?"DISABLE":"AUTO", build_srt_off_len,hole,fd->skip_read,srt_off_len->num); -// printf("%s at %d: ds_write=%s build_srt_off_len=%d hole=%d nprocs_recv=%d(PNCIO_DS_WR_NAGGRS_LB=%d) numx=%lld(PNCIO_DS_WR_NPAIRS_LB=%d)\n",__func__,__LINE__, (fd->hints->ds_write == PNCIO_HINT_ENABLE)?"ENABLE": (fd->hints->ds_write == PNCIO_HINT_DISABLE)?"DISABLE":"AUTO", build_srt_off_len,hole,nprocs_recv,PNCIO_DS_WR_NAGGRS_LB,numx,PNCIO_DS_WR_NPAIRS_LB); +// printf("%s at %d: romio_ds_write=%s build_srt_off_len=%d hole=%d skip_read=%d srt_off_len->num=%lld\n",__func__,__LINE__, (fd->hints->romio_ds_write == PNCIO_HINT_ENABLE)?"ENABLE": (fd->hints->romio_ds_write == PNCIO_HINT_DISABLE)?"DISABLE":"AUTO", build_srt_off_len,hole,fd->skip_read,srt_off_len->num); +// printf("%s at %d: romio_ds_write=%s build_srt_off_len=%d hole=%d nprocs_recv=%d(PNCIO_DS_WR_NAGGRS_LB=%d) numx=%lld(PNCIO_DS_WR_NPAIRS_LB=%d)\n",__func__,__LINE__, (fd->hints->romio_ds_write == PNCIO_HINT_ENABLE)?"ENABLE": (fd->hints->romio_ds_write == PNCIO_HINT_DISABLE)?"DISABLE":"AUTO", build_srt_off_len,hole,nprocs_recv,PNCIO_DS_WR_NAGGRS_LB,numx,PNCIO_DS_WR_NPAIRS_LB); /* data sieving */ - if (fd->hints->ds_write != PNCIO_HINT_DISABLE && hole) { + if (fd->hints->romio_ds_write != PNCIO_HINT_DISABLE && hole) { if (fd->skip_read) memset(write_buf, 0, range_size); else { diff --git a/src/drivers/pncio/pncio_lustre_wrstr.c b/src/drivers/pncio/pncio_lustre_wrstr.c index 341fab400..fbd3c75d1 100644 --- a/src/drivers/pncio/pncio_lustre_wrstr.c +++ b/src/drivers/pncio/pncio_lustre_wrstr.c @@ -15,7 +15,7 @@ if (writebuf_len) { \ w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, \ writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (w_len < 0) { \ NCI_Free(writebuf); \ @@ -29,7 +29,7 @@ writebuf_len = MIN(end_offset - writebuf_off + 1, \ (writebuf_off / stripe_size + 1) * stripe_size \ - writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ r_len = PNCIO_ReadContig(fd, writebuf, writebuf_len, writebuf_off); \ if (r_len < 0) { \ @@ -42,7 +42,7 @@ write_sz); \ while (write_sz != req_len) { \ w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (w_len < 0) { \ NCI_Free(writebuf); \ @@ -56,7 +56,7 @@ writebuf_len = MIN(end_offset - writebuf_off + 1, \ (writebuf_off / stripe_size + 1) * stripe_size \ - writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ r_len = PNCIO_ReadContig(fd, writebuf, writebuf_len, writebuf_off); \ if (r_len < 0) { \ @@ -127,7 +127,7 @@ MPI_Offset PNCIO_LUSTRE_WriteStrided(PNCIO_File *fd, // printf("%s at %d:\n",__func__,__LINE__); - if (fd->hints->ds_write == PNCIO_HINT_DISABLE) { + if (fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) { /* if user has disabled data sieving on writes, use naive * approach instead. */ @@ -162,7 +162,7 @@ if (fd->flat_file.count > 0) assert(offset == 0); /* not whole file visible */ /* if atomicity is true or data sieving is not disable, lock the region * to be accessed */ - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize); for (i = 0; i < buf_view.count; i++) { @@ -176,7 +176,7 @@ if (fd->flat_file.count > 0) assert(offset == 0); /* not whole file visible */ /* write the buffer out the last round */ w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, start_off, SEEK_SET, bufsize); NCI_Free(writebuf); @@ -223,13 +223,13 @@ assert(disp == 0); BUFFERED_WRITE_WITHOUT_READ; /* write the buffer out the last round */ - if (fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); if (w_len > 0) total_w_len += w_len; - if (fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); NCI_Free(writebuf); @@ -255,7 +255,7 @@ assert(j < fd->flat_file.count); /* if atomicity is true or data sieving is not disable, lock the region * to be accessed */ - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); writebuf_off = 0; @@ -348,12 +348,12 @@ assert(k < buf_view.count); /* write the buffer out the last round */ if (writebuf_len) { w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (w_len < 0) return w_len; total_w_len += w_len; } - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); NCI_Free(writebuf); diff --git a/src/drivers/pncio/pncio_open.c b/src/drivers/pncio/pncio_open.c index 38981b2c8..9c279b0ce 100644 --- a/src/drivers/pncio/pncio_open.c +++ b/src/drivers/pncio/pncio_open.c @@ -139,6 +139,7 @@ if (rank == 0) { printf("\nxxxx %s at %d: ---- %s\n",__func__,__LINE__,fd->filen err = ncmpii_error_posix2nc("open"); goto err_out; } + fd->is_open = 1; err_out: MPI_Bcast(stripin_info, 4, MPI_INT, 0, fd->comm); @@ -154,6 +155,7 @@ if (rank == 0) { printf("\nxxxx %s at %d: ---- %s\n",__func__,__LINE__,fd->filen __func__,__LINE__, rank, fd->filename, strerror(errno)); return ncmpii_error_posix2nc("ioctl"); } + fd->is_open = 1; } /* construct cb_nodes rank list */ @@ -196,6 +198,7 @@ if (rank == 0) { printf("\nxxxx %s at %d: ---- %s\n",__func__,__LINE__,fd->filen err = ncmpii_error_posix2nc("open"); goto err_out; } + fd->is_open = 1; /* Only root obtains the striping information and bcast to all other * processes. @@ -233,7 +236,7 @@ int PNCIO_File_open(MPI_Comm comm, * called to check the file system type. */ char value[MPI_MAX_INFO_VAL + 1], int_str[16]; - int i, err, min_err; + int i, err, min_err, status=NC_NOERR; fd->comm = comm; fd->filename = filename; /* without file system type name prefix */ @@ -256,9 +259,13 @@ int PNCIO_File_open(MPI_Comm comm, else MPI_Info_dup(info, &fd->info); - err = PNCIO_File_SetInfo(fd, fd->info); - if (err != NC_NOERR) - return err; + status = PNCIO_File_SetInfo(fd, fd->info); + if (status != NC_NOERR && status != NC_EMULTIDEFINE_HINTS) { + /* Inconsistent I/O hints is not a fatal error. + * In PNCIO_File_SetInfo(), root's hints overwrite local's. + */ + goto err_out; + } #if defined(PNETCDF_PROFILING) && (PNETCDF_PROFILING == 1) for (i=0; ifile_system != PNCIO_FSTYPE_MPIIO); + /* TODO: When hint romio_no_indep_rw hint is set to true, only aggregators open + * the file. + * Note because fd->is_agg is set at the end of create/open call. + */ if (fd->file_system == PNCIO_LUSTRE) { if (amode & MPI_MODE_CREATE) err = PNCIO_Lustre_create(fd, amode); @@ -281,11 +292,10 @@ int PNCIO_File_open(MPI_Comm comm, else err = GEN_open(fd); } - if (err != NC_NOERR) goto err_out; - - /* TODO: when hint no_indep_rw hint is set to true, only aggregators open - * the file */ - fd->is_open = 1; + if (err != NC_NOERR) { /* fatal error */ + status = err; + goto err_out; + } /* set file striping hints */ snprintf(int_str, 16, "%d", fd->hints->striping_unit); @@ -323,15 +333,17 @@ int PNCIO_File_open(MPI_Comm comm, /* collective buffer is used only by I/O aggregators only */ if (fd->is_agg) { fd->io_buf = NCI_Calloc(1, fd->hints->cb_buffer_size); - if (fd->io_buf == NULL) - return NC_ENOMEM; + if (fd->io_buf == NULL) /* fatal error */ + status = NC_ENOMEM; } err_out: - MPI_Allreduce(&err, &min_err, 1, MPI_INT, MPI_MIN, comm); + MPI_Allreduce(&status, &min_err, 1, MPI_INT, MPI_MIN, comm); /* All NC errors are < 0 */ - if (min_err < 0) { - if (err == 0) /* close file if opened successfully */ + + if (min_err != NC_NOERR) { + if (status == NC_NOERR && fd->is_open) + /* close file if opened successfully */ close(fd->fd_sys); NCI_Free(fd->hints); if (fd->info != MPI_INFO_NULL) @@ -339,6 +351,6 @@ int PNCIO_File_open(MPI_Comm comm, if (fd->io_buf != NULL) NCI_Free(fd->io_buf); } - return err; + return status; } diff --git a/src/drivers/pncio/pncio_read_coll.c b/src/drivers/pncio/pncio_read_coll.c index 78af29b48..10ab05933 100644 --- a/src/drivers/pncio/pncio_read_coll.c +++ b/src/drivers/pncio/pncio_read_coll.c @@ -83,8 +83,8 @@ double curT = MPI_Wtime(); /* number of aggregators, cb_nodes, is stored in the hints */ nprocs_for_coll = fd->hints->cb_nodes; - /* only check for interleaving if cb_read isn't disabled */ - if (fd->hints->cb_read != PNCIO_HINT_DISABLE) { + /* only check for interleaving if romio_cb_read isn't disabled */ + if (fd->hints->romio_cb_read != PNCIO_HINT_DISABLE) { /* For this process's request, calculate the file start and end * offsets. Note: end_offset points to the last byte-offset that will * be accessed, e.g., if start_offset=0 and 100 bytes to be read, @@ -124,8 +124,8 @@ double curT = MPI_Wtime(); interleave_count++; } - if (fd->hints->cb_read == PNCIO_HINT_DISABLE - || (!interleave_count && (fd->hints->cb_read == PNCIO_HINT_AUTO))) { + if (fd->hints->romio_cb_read == PNCIO_HINT_DISABLE + || (!interleave_count && (fd->hints->romio_cb_read == PNCIO_HINT_AUTO))) { /* switch to independent read */ if (st_offsets != NULL) NCI_Free(st_offsets); diff --git a/src/drivers/pncio/pncio_read_str.c b/src/drivers/pncio/pncio_read_str.c index ae554c2fe..efbfe1a49 100644 --- a/src/drivers/pncio/pncio_read_str.c +++ b/src/drivers/pncio/pncio_read_str.c @@ -55,7 +55,7 @@ MPI_Offset PNCIO_GEN_ReadStrided(PNCIO_File *fd, // printf("%s at %d:\n",__func__,__LINE__); - if (fd->hints->ds_read == PNCIO_HINT_DISABLE) { + if (fd->hints->romio_ds_read == PNCIO_HINT_DISABLE) { /* if user has disabled data sieving on reads, use naive * approach instead. */ diff --git a/src/drivers/pncio/pncio_write_coll.c b/src/drivers/pncio/pncio_write_coll.c index d6126c5ce..f00ef3bf9 100644 --- a/src/drivers/pncio/pncio_write_coll.c +++ b/src/drivers/pncio/pncio_write_coll.c @@ -83,8 +83,8 @@ double curT = MPI_Wtime(); */ nprocs_for_coll = fd->hints->cb_nodes; - /* only check for interleaving if cb_write isn't disabled */ - if (fd->hints->cb_write != PNCIO_HINT_DISABLE) { + /* only check for interleaving if romio_cb_write isn't disabled */ + if (fd->hints->romio_cb_write != PNCIO_HINT_DISABLE) { /* For this process's request, calculate the file start and end * offsets. Note: end_offset points to the last byte-offset that will * be accessed, e.g., if start_offset=0 and 100 bytes to be read, @@ -127,11 +127,11 @@ double curT = MPI_Wtime(); interleave_count++; } - if (fd->hints->cb_write == PNCIO_HINT_DISABLE || - (!interleave_count && (fd->hints->cb_write == PNCIO_HINT_AUTO))) { + if (fd->hints->romio_cb_write == PNCIO_HINT_DISABLE || + (!interleave_count && (fd->hints->romio_cb_write == PNCIO_HINT_AUTO))) { /* use independent accesses */ - if (fd->hints->cb_write != PNCIO_HINT_DISABLE) + if (fd->hints->romio_cb_write != PNCIO_HINT_DISABLE) NCI_Free(st_offsets); if (buf_view.size == 0) return 0; diff --git a/src/drivers/pncio/pncio_write_str.c b/src/drivers/pncio/pncio_write_str.c index cb4ac25e8..6cc1f555f 100644 --- a/src/drivers/pncio/pncio_write_str.c +++ b/src/drivers/pncio/pncio_write_str.c @@ -14,14 +14,14 @@ if (writebuf_len) { \ w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, \ writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (w_len < 0) goto fn_exit; \ total_w_len += w_len; \ } \ writebuf_off = req_off; \ writebuf_len = MIN(max_bufsize,end_offset-writebuf_off+1); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ r_len = PNCIO_ReadContig(fd, writebuf, writebuf_len, writebuf_off); \ if (r_len < 0) goto fn_exit; \ @@ -30,7 +30,7 @@ memcpy(writebuf+req_off-writebuf_off, (char*)buf +userbuf_off, write_sz); \ while (write_sz != req_len) { \ w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ if (w_len < 0) goto fn_exit; \ total_w_len += w_len; \ @@ -38,7 +38,7 @@ userbuf_off += write_sz; \ writebuf_off += writebuf_len; \ writebuf_len = MIN(max_bufsize,end_offset-writebuf_off+1); \ - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) \ + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) \ PNCIO_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \ r_len = PNCIO_ReadContig(fd, writebuf, writebuf_len, writebuf_off); \ if (r_len < 0) goto fn_exit; \ @@ -70,7 +70,7 @@ MPI_Offset PNCIO_GEN_WriteStrided(PNCIO_File *fd, */ assert(!(buf_view.is_contig && fd->flat_file.is_contig)); - if (fd->hints->ds_write == PNCIO_HINT_DISABLE) { + if (fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) { /* If user has disabled data sieving on reads, use naive approach * instead. */ @@ -104,7 +104,7 @@ assert(fd->disp == 0); /* if atomicity is true or data sieving is not disable, lock the region * to be accessed */ - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); for (i = 0; i < buf_view.count; i++) { @@ -150,7 +150,7 @@ assert(fd->disp == 0); else w_len = 0; - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); if (w_len < 0) @@ -218,7 +218,7 @@ assert(offset == abs_off_in_filetype); /* if atomicity is true or data sieving is not disable, lock the region * to be accessed */ - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); writebuf_off = 0; @@ -311,12 +311,12 @@ assert(k < buf_view.count); /* write the buffer out finally */ if (writebuf_len) { w_len = PNCIO_WriteContig(fd, writebuf, writebuf_len, writebuf_off); - if (!fd->atomicity && fd->hints->ds_write == PNCIO_HINT_DISABLE) + if (!fd->atomicity && fd->hints->romio_ds_write == PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (w_len < 0) goto fn_exit; total_w_len += w_len; } - if (fd->atomicity || fd->hints->ds_write != PNCIO_HINT_DISABLE) + if (fd->atomicity || fd->hints->romio_ds_write != PNCIO_HINT_DISABLE) PNCIO_UNLOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); } diff --git a/src/include/pnetcdf.h.in b/src/include/pnetcdf.h.in index df7e9f66a..6ce7499c6 100644 --- a/src/include/pnetcdf.h.in +++ b/src/include/pnetcdf.h.in @@ -686,9 +686,10 @@ by the desired type. */ #define NC_EMULTIDEFINE_VAR_FILL_MODE (-271) /**< inconsistent variable fill mode */ #define NC_EMULTIDEFINE_VAR_FILL_VALUE (-272) /**< inconsistent variable fill value */ #define NC_EMULTIDEFINE_CMODE (-273) /**< inconsistent file create modes among processes */ +#define NC_EMULTIDEFINE_HINTS (-274) /**< inconsistent I/O hints among processes */ #define NC_EMULTIDEFINE_FIRST NC_EMULTIDEFINE -#define NC_EMULTIDEFINE_LAST NC_EMULTIDEFINE_CMODE +#define NC_EMULTIDEFINE_LAST NC_EMULTIDEFINE_HINTS /* backward compatible with PnetCDF 1.3.1 and earlier */ #define NC_ECMODE NC_EMULTIDEFINE_OMODE diff --git a/test/parallel_run.sh b/test/parallel_run.sh index 694cea504..295ea8ca7 100755 --- a/test/parallel_run.sh +++ b/test/parallel_run.sh @@ -258,8 +258,8 @@ for i in ${check_PROGRAMS} ; do $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc$j $OUT_PREFIX.mpio.ina.nc$j # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc$j $OUT_PREFIX.pncio.nc$j ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc$j $OUT_PREFIX.pncio.nc$j - # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc$j $OUT_PREFIX.pncio.ina.nc$j ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc$j $OUT_PREFIX.pncio.ina.nc$j + # echo "--- ncmpidiff $OUT_PREFIX.pncio.nc$j $OUT_PREFIX.pncio.ina.nc$j ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc$j $OUT_PREFIX.pncio.ina.nc$j done elif test "$i" = tst_pthread ; then for j in `seq 0 ${NTHREADS}` ; do @@ -267,8 +267,8 @@ for i in ${check_PROGRAMS} ; do $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.mpio.ina.nc.$j # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.nc.$j ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.nc.$j - # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j + # echo "--- ncmpidiff $OUT_PREFIX.pncio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc.$j $OUT_PREFIX.pncio.ina.nc.$j done elif test "$i" = mcoll_perf ; then for j in `seq 0 9` ; do @@ -277,16 +277,16 @@ for i in ${check_PROGRAMS} ; do $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.$ext $OUT_PREFIX.mpio.ina.$ext # echo "--- ncmpidiff $OUT_PREFIX.mpio.$ext $OUT_PREFIX.pncio.$ext ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.$ext $OUT_PREFIX.pncio.$ext - # echo "--- ncmpidiff $OUT_PREFIX.mpio.$ext $OUT_PREFIX.pncio.ina.$ext ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.$ext $OUT_PREFIX.pncio.ina.$ext + # echo "--- ncmpidiff $OUT_PREFIX.pncio.$ext $OUT_PREFIX.pncio.ina.$ext ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.$ext $OUT_PREFIX.pncio.ina.$ext done else # echo "${LINENO}: --- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.mpio.ina.nc # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc ---" $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.nc - # echo "--- ncmpidiff $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc ---" - $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.mpio.nc $OUT_PREFIX.pncio.ina.nc + # echo "--- ncmpidiff $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc ---" + $MPIRUN $NCMPIDIFF $DIFF_OPT $OUT_PREFIX.pncio.nc $OUT_PREFIX.pncio.ina.nc fi done # safe_modes