Skip to content

Incorrect data returned by pio_get_var for ADIOS type when IO root task rank is non-zero #574

@dqwu

Description

@dqwu

Within a union communicator, IO tasks constitute a subset of compute
tasks, wherein the rank of the IO root task may not necessarily be 0.
For instance, in a scenario involving 16 compute tasks, IO tasks could
be designated as {1, 5, 9, 13}.

Under these circumstances, pio_get_var retrieves inaccurate data
specifically with ADIOS type (although it functions correctly with
PnetCDF type).

[Steps to reproduce]

First, check out latest scorpio master branch.

git clone https://github.com/E3SM-Project/scorpio.git
cd scorpio

Next, replace the content of examples/c/example1.c with the test code shown below.

#include <stdio.h>
#include <mpi.h>
#include <pio.h>
#ifdef TIMING
#include <gptl.h>
#endif

#define NUM_NETCDF_FLAVORS 2

int main(int argc, char* argv[])
{
    const int niotasks = 4;
    const int ioproc_stride = 4;
    const int ioproc_start = 1;
    const int put_var_val = 42;

    int rank;
    int ntasks;
    int format[NUM_NETCDF_FLAVORS];
    int iosysid;
    char filename[PIO_MAX_NAME];
    int num_flavors = 0;
    int ncid;
    int varid;
    int get_var_val;

#ifdef TIMING
    GPTLinitialize();
#endif

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);

    if (ntasks != 16)
    {
        if (rank == 0)
            printf("This test must be run with exactly 16 MPI tasks!\n");

        MPI_Finalize();

#ifdef TIMING
        GPTLfinalize();
#endif

        return -1;
    }

    PIOc_Init_Intracomm(MPI_COMM_WORLD, niotasks, ioproc_stride, ioproc_start, PIO_REARR_BOX, &iosysid);

#ifdef _PNETCDF
    format[num_flavors++] = PIO_IOTYPE_PNETCDF;
#endif
#ifdef _ADIOS2
    format[num_flavors++] = PIO_IOTYPE_ADIOS;
#endif

    for (int fmt = 0; fmt < num_flavors; fmt++)
    {
        sprintf(filename, "test_get_var_%d.nc", fmt);

        ncid = -1;
        PIOc_createfile(iosysid, &ncid, &(format[fmt]), filename, PIO_CLOBBER);

        varid = -1;
        PIOc_def_var(ncid, "dummy_scalar_var", PIO_INT, 0, NULL, &varid);

        PIOc_enddef(ncid);

        PIOc_put_var_int(ncid, varid, &put_var_val);

        PIOc_closefile(ncid);

        ncid = -1;
        PIOc_openfile(iosysid, &ncid, &(format[fmt]), filename, PIO_NOWRITE);

        varid = -1;
        PIOc_inq_varid(ncid, "dummy_scalar_var", &varid);

        get_var_val = -1;
        PIOc_get_var_int(ncid, varid, &get_var_val);

        if (get_var_val != put_var_val)
            printf("fmt = %d, rank = %d, pio_get_var returned wrong data, expected: %d actual: %d\n",
                   fmt, rank, put_var_val, get_var_val);

        PIOc_closefile(ncid);
    }

    PIOc_finalize(iosysid);

    MPI_Finalize();

#ifdef TIMING
    GPTLfinalize();
#endif

    return 0;
}

Then, build scorpio and run example1.

mkdir build
cd build

ADIOS2_DIR=/path/to/adios2/installation \
CC=mpicc CXX=mpicxx FC=mpifort cmake -Wno-dev \
-DWITH_ADIOS2=ON \
-DWITH_NETCDF=OFF \
-DPnetCDF_PATH=/path/to/pnetcdf/installation \
-DPIO_USE_MALLOC=ON \
-DPIO_ENABLE_EXAMPLES=ON \
..

make

cd examples/c

mpiexec -n 16 ./example1

Output is shown below.

fmt = 1, rank = 0, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 1, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 2, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 3, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 4, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 5, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 6, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 7, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 8, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 9, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 10, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 11, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 12, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 13, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 14, pio_get_var returned wrong data, expected: 42 actual: -1
fmt = 1, rank = 15, pio_get_var returned wrong data, expected: 42 actual: -1

Metadata

Metadata

Assignees

Labels

ADIOSAll ADIOS related issues/enhancementsbug

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions