Skip to content

Commit c695d41

Browse files
committed
add MPI-IO large-count APIs
1 parent b577aa8 commit c695d41

File tree

3 files changed

+147
-143
lines changed

3 files changed

+147
-143
lines changed

configure.ac

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,11 @@ AC_CHECK_FUNCS([MPI_Type_create_subarray_c \
13391339
MPI_Bcast_c \
13401340
MPI_Get_count_c \
13411341
MPI_Pack_c \
1342-
MPI_Unpack_c], [], [have_mpi_large_count_apis=no])
1342+
MPI_Unpack_c \
1343+
MPI_File_read_at_c \
1344+
MPI_File_read_at_all_c \
1345+
MPI_File_write_at_c \
1346+
MPI_File_write_at_all_c], [], [have_mpi_large_count_apis=no])
13431347
# If one of the above APIs is not available, have_mpi_large_count_apis will be
13441348
# set to no
13451349
UD_MSG_DEBUG([have_mpi_large_count_apis=$have_mpi_large_count_apis])

src/drivers/ncmpio/ncmpio_file_io.c

Lines changed: 82 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ ncmpio_read_write(NC *ncp,
2828
void *buf,
2929
int buftype_is_contig)
3030
{
31+
char *mpi_name;
3132
int status=NC_NOERR, err=NC_NOERR, mpireturn;
3233
MPI_Status mpistatus;
3334
MPI_File fh;
@@ -37,16 +38,19 @@ ncmpio_read_write(NC *ncp,
3738
MPI_Count btype_size;
3839
/* MPI_Type_size_c is introduced in MPI 4.0 */
3940
mpireturn = MPI_Type_size_c(buf_type, &btype_size);
41+
mpi_name = "MPI_Type_size_c";
4042
#elif defined(HAVE_MPI_TYPE_SIZE_X)
4143
MPI_Count btype_size;
4244
/* MPI_Type_size_x is introduced in MPI 3.0 */
4345
mpireturn = MPI_Type_size_x(buf_type, &btype_size);
46+
mpi_name = "MPI_Type_size_x";
4447
#else
4548
int btype_size;
4649
mpireturn = MPI_Type_size(buf_type, &btype_size);
50+
mpi_name = "MPI_Type_size";
4751
#endif
4852
if (mpireturn != MPI_SUCCESS) {
49-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_size");
53+
err = ncmpii_error_mpi2nc(mpireturn, mpi_name);
5054
/* return the first encountered error if there is any */
5155
err = (err == NC_EFILE) ? NC_EREAD : err;
5256
}
@@ -85,22 +89,14 @@ ncmpio_read_write(NC *ncp,
8589

8690
if (rw_flag == NC_REQ_RD) {
8791
void *xbuf=buf;
88-
int xlen=(int)buf_count;
8992
MPI_Datatype xbuf_type=buf_type;
9093

91-
if (buf_count > NC_MAX_INT) {
9294
#ifdef HAVE_MPI_LARGE_COUNT
93-
mpireturn = MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type);
94-
if (mpireturn != MPI_SUCCESS) {
95-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c");
96-
if (coll_indep == NC_REQ_COLL)
97-
DEBUG_ASSIGN_ERROR(status, err)
98-
else
99-
DEBUG_RETURN_ERROR(err)
100-
}
101-
MPI_Type_commit(&xbuf_type);
102-
xlen = 1;
95+
MPI_Count xlen = (MPI_Count)buf_count;
10396
#else
97+
int xlen = (int)buf_count;
98+
99+
if (buf_count > NC_MAX_INT) {
104100
if (coll_indep == NC_REQ_COLL) {
105101
#ifdef PNETCDF_DEBUG
106102
fprintf(stderr,"%d: %s line %d: NC_EINTOVERFLOW buf_count=%lld\n",
@@ -112,17 +108,21 @@ ncmpio_read_write(NC *ncp,
112108
}
113109
else
114110
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
115-
#endif
116111
}
117-
else if (buf_count > 0 && !buftype_is_contig &&
118-
req_size <= ncp->ibuf_size) {
112+
#endif
113+
114+
if (xlen > 0 && !buftype_is_contig && req_size <= ncp->ibuf_size) {
119115
/* if read buffer is noncontiguous and size is < ncp->ibuf_size,
120116
* allocate a temporary buffer and use it to read, as some MPI,
121117
* e.g. Cray on KNL, can be significantly slow when read buffer is
122118
* noncontiguous.
123119
*/
120+
#ifdef HAVE_MPI_LARGE_COUNT
121+
xbuf_type = MPI_BYTE;
122+
xlen = (MPI_Count)req_size;
123+
#else
124124
if (req_size > NC_MAX_INT) {
125-
mpireturn = MPI_Type_contiguous((int)buf_count, buf_type, &xbuf_type);
125+
mpireturn = MPI_Type_contiguous(xlen, buf_type, &xbuf_type);
126126
if (mpireturn != MPI_SUCCESS) {
127127
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous");
128128
if (coll_indep == NC_REQ_COLL)
@@ -137,33 +137,32 @@ ncmpio_read_write(NC *ncp,
137137
xbuf_type = MPI_BYTE;
138138
xlen = (int)req_size;
139139
}
140+
#endif
140141
xbuf = NCI_Malloc((size_t)req_size);
141142
}
142143

143144
if (ncp->nprocs > 1 && coll_indep == NC_REQ_COLL) {
144-
TRACE_IO(MPI_File_read_at_all)(fh, offset, xbuf, xlen, xbuf_type,
145-
&mpistatus);
146-
if (mpireturn != MPI_SUCCESS) {
147-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_read_at_all");
148-
/* return the first encountered error if there is any */
149-
if (status == NC_NOERR) {
150-
err = (err == NC_EFILE) ? NC_EREAD : err;
151-
DEBUG_ASSIGN_ERROR(status, err)
152-
}
153-
}
145+
#ifdef HAVE_MPI_LARGE_COUNT
146+
TRACE_IO(MPI_File_read_at_all_c, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
147+
#else
148+
TRACE_IO(MPI_File_read_at_all, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
149+
#endif
154150
} else {
155-
TRACE_IO(MPI_File_read_at)(fh, offset, xbuf, xlen, xbuf_type,
156-
&mpistatus);
157-
if (mpireturn != MPI_SUCCESS) {
158-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_read_at");
159-
/* return the first encountered error if there is any */
160-
if (status == NC_NOERR) {
161-
err = (err == NC_EFILE) ? NC_EREAD : err;
162-
DEBUG_RETURN_ERROR(err)
163-
}
151+
#ifdef HAVE_MPI_LARGE_COUNT
152+
TRACE_IO(MPI_File_read_at_c, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
153+
#else
154+
TRACE_IO(MPI_File_read_at, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
155+
#endif
156+
}
157+
if (mpireturn != MPI_SUCCESS) {
158+
err = ncmpii_error_mpi2nc(mpireturn, mpi_name);
159+
/* return the first encountered error if there is any */
160+
if (status == NC_NOERR) {
161+
err = (err == NC_EFILE) ? NC_EREAD : err;
162+
DEBUG_ASSIGN_ERROR(status, err)
164163
}
165164
}
166-
if (mpireturn == MPI_SUCCESS) {
165+
else {
167166
/* update the number of bytes read since file open */
168167
#ifdef HAVE_MPI_GET_COUNT_C
169168
MPI_Count get_size;
@@ -193,13 +192,15 @@ ncmpio_read_write(NC *ncp,
193192
MPI_Count pos=0;
194193
mpireturn = MPI_Unpack_c(xbuf, xlen, &pos, buf, (MPI_Count)buf_count,
195194
buf_type, MPI_COMM_SELF);
195+
mpi_name = "MPI_Unpack_c";
196196
#else
197197
int pos=0;
198198
mpireturn = MPI_Unpack(xbuf, xlen, &pos, buf, (int)buf_count,
199199
buf_type, MPI_COMM_SELF);
200+
mpi_name = "MPI_Unpack";
200201
#endif
201202
if (mpireturn != MPI_SUCCESS) {
202-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Unpack");
203+
err = ncmpii_error_mpi2nc(mpireturn, mpi_name);
203204
if (coll_indep == NC_REQ_COLL)
204205
DEBUG_ASSIGN_ERROR(status, err)
205206
else
@@ -211,22 +212,13 @@ ncmpio_read_write(NC *ncp,
211212
MPI_Type_free(&xbuf_type);
212213
} else { /* NC_REQ_WR */
213214
void *xbuf=buf;
214-
int xlen=(int)buf_count;
215215
MPI_Datatype xbuf_type=buf_type;
216216

217-
if (buf_count > NC_MAX_INT) {
218217
#ifdef HAVE_MPI_LARGE_COUNT
219-
mpireturn = MPI_Type_contiguous_c((MPI_Count)buf_count, buf_type, &xbuf_type);
220-
if (mpireturn != MPI_SUCCESS) {
221-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c");
222-
if (coll_indep == NC_REQ_COLL)
223-
DEBUG_ASSIGN_ERROR(status, err)
224-
else
225-
DEBUG_RETURN_ERROR(err)
226-
}
227-
MPI_Type_commit(&xbuf_type);
228-
xlen = 1;
218+
MPI_Count xlen = (MPI_Count)buf_count;
229219
#else
220+
int xlen = (int)buf_count;
221+
if (buf_count > NC_MAX_INT) {
230222
if (coll_indep == NC_REQ_COLL) {
231223
#ifdef PNETCDF_DEBUG
232224
fprintf(stderr,"%d: %s line %d: NC_EINTOVERFLOW buf_count=%lld\n",
@@ -238,87 +230,71 @@ ncmpio_read_write(NC *ncp,
238230
}
239231
else
240232
DEBUG_RETURN_ERROR(NC_EINTOVERFLOW)
241-
#endif
242233
}
243-
else if (buf_count > 0 && !buftype_is_contig &&
244-
req_size <= ncp->ibuf_size) {
234+
#endif
235+
236+
if (xlen > 0 && !buftype_is_contig && req_size <= ncp->ibuf_size) {
245237
/* if write buffer is noncontiguous and size is < ncp->ibuf_size,
246238
* allocate a temporary buffer and use it to write, as some MPI,
247239
* e.g. Cray on KNL, can be significantly slow when write buffer is
248240
* noncontiguous.
249241
*/
250-
if (req_size > NC_MAX_INT) {
251242
#ifdef HAVE_MPI_LARGE_COUNT
252-
MPI_Count pos=0;
253-
xbuf = NCI_Malloc(req_size);
254-
mpireturn = MPI_Pack_c(buf, (MPI_Count)buf_count, buf_type, xbuf,
255-
(MPI_Count)req_size, &pos, MPI_COMM_SELF);
256-
if (mpireturn != MPI_SUCCESS) {
257-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Pack_c");
258-
if (coll_indep == NC_REQ_COLL)
259-
DEBUG_ASSIGN_ERROR(status, err)
260-
else
261-
DEBUG_RETURN_ERROR(err)
262-
}
263-
mpireturn = MPI_Type_contiguous_c((MPI_Count)req_size, MPI_BYTE, &xbuf_type);
264-
if (mpireturn != MPI_SUCCESS) {
265-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Type_contiguous_c");
266-
if (coll_indep == NC_REQ_COLL) {
267-
DEBUG_ASSIGN_ERROR(status, err)
268-
xlen = 0;
269-
}
270-
else
271-
DEBUG_RETURN_ERROR(err)
272-
}
273-
MPI_Type_commit(&xbuf_type);
274-
xlen = 1;
243+
MPI_Count pos=0;
244+
xbuf_type = MPI_BYTE;
245+
xlen = (MPI_Count)req_size;
246+
xbuf = NCI_Malloc(req_size);
247+
mpireturn = MPI_Pack_c(buf, (MPI_Count)buf_count, buf_type, xbuf,
248+
(MPI_Count)req_size, &pos, MPI_COMM_SELF);
249+
mpi_name = "MPI_Pack_c";
275250
#else
251+
if (req_size > NC_MAX_INT) {
276252
/* skip packing write data into a temp buffer */
277253
xlen = (int)buf_count;
278254
xbuf_type = buf_type;
279-
#endif
255+
mpireturn = MPI_SUCCESS;
280256
}
281257
else {
282258
int pos=0;
259+
xbuf_type = MPI_BYTE;
283260
xlen = (int)req_size;
284261
xbuf = NCI_Malloc(xlen);
285262
mpireturn = MPI_Pack(buf, (int)buf_count, buf_type, xbuf,
286263
xlen, &pos, MPI_COMM_SELF);
287-
if (mpireturn != MPI_SUCCESS) {
288-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_Pack");
289-
if (coll_indep == NC_REQ_COLL)
290-
DEBUG_ASSIGN_ERROR(status, err)
291-
else
292-
DEBUG_RETURN_ERROR(err)
293-
}
294-
xbuf_type = MPI_BYTE;
264+
mpi_name = "MPI_Pack";
295265
}
296-
}
297-
298-
if (ncp->nprocs > 1 && coll_indep == NC_REQ_COLL) {
299-
TRACE_IO(MPI_File_write_at_all)(fh, offset, xbuf, xlen, xbuf_type,
300-
&mpistatus);
266+
#endif
301267
if (mpireturn != MPI_SUCCESS) {
302-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at_all");
303-
/* return the first encountered error if there is any */
304-
if (status == NC_NOERR) {
305-
err = (err == NC_EFILE) ? NC_EWRITE : err;
268+
err = ncmpii_error_mpi2nc(mpireturn, mpi_name);
269+
if (coll_indep == NC_REQ_COLL)
306270
DEBUG_ASSIGN_ERROR(status, err)
307-
}
271+
else
272+
DEBUG_RETURN_ERROR(err)
308273
}
274+
}
275+
276+
if (ncp->nprocs > 1 && coll_indep == NC_REQ_COLL) {
277+
#ifdef HAVE_MPI_LARGE_COUNT
278+
TRACE_IO(MPI_File_write_at_all_c, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
279+
#else
280+
TRACE_IO(MPI_File_write_at_all, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
281+
#endif
309282
} else {
310-
TRACE_IO(MPI_File_write_at)(fh, offset, xbuf, xlen, xbuf_type,
311-
&mpistatus);
312-
if (mpireturn != MPI_SUCCESS) {
313-
err = ncmpii_error_mpi2nc(mpireturn, "MPI_File_write_at");
314-
/* return the first encountered error if there is any */
315-
if (status == NC_NOERR) {
316-
err = (err == NC_EFILE) ? NC_EWRITE : err;
317-
DEBUG_RETURN_ERROR(err)
318-
}
283+
#ifdef HAVE_MPI_LARGE_COUNT
284+
TRACE_IO(MPI_File_write_at_c, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
285+
#else
286+
TRACE_IO(MPI_File_write_at, (fh, offset, xbuf, xlen, xbuf_type, &mpistatus));
287+
#endif
288+
}
289+
if (mpireturn != MPI_SUCCESS) {
290+
err = ncmpii_error_mpi2nc(mpireturn, mpi_name);
291+
/* return the first encountered error if there is any */
292+
if (status == NC_NOERR) {
293+
err = (err == NC_EFILE) ? NC_EWRITE : err;
294+
DEBUG_ASSIGN_ERROR(status, err)
319295
}
320296
}
321-
if (mpireturn == MPI_SUCCESS) {
297+
else {
322298
/* update the number of bytes written since file open */
323299
#ifdef HAVE_MPI_GET_COUNT_C
324300
MPI_Count put_size;

0 commit comments

Comments
 (0)