1414#include < stdlib.h>
1515#include < string.h>
1616#include < libgen.h> /* basename() */
17+ #include < limits.h> /* LONG_MAX */
1718#include < assert.h>
1819
1920#include < mpi.h>
2021#include < e3sm_io.h>
2122
23+ typedef struct {
24+ MPI_Offset nreqs;
25+ MPI_Offset amount;
26+ MPI_Offset sum_decomp_varlen;
27+ double timing[7 ];
28+ } perf_metrics;
29+
2230#ifdef ENABLE_PNC
2331/* ----< pnetcdf_check_mem_usage() >------------------------------------------*/
2432/* check PnetCDF library internal memory usage */
@@ -62,12 +70,11 @@ int print_timing_WR(e3sm_io_config *cfg,
6270 e3sm_io_decom *decom,
6371 case_meta *cmeta)
6472{
65- int i, ndecomp;
66- MPI_Offset off_msg[3 ], sum_off[3 ];
67- MPI_Offset sum_nreqs, sum_amount_WR, max_nreqs, min_nreqs, max_wr, min_wr;
73+ int i, j, ndecomp;
6874 MPI_Offset vlen, sum_decomp_varlen;
69- double wTime;
7075 MPI_Comm comm=cfg->io_comm ;
76+ double ioTime;
77+ perf_metrics metrics, *all_metrics;
7178
7279 ndecomp = decom->num_decomp ;
7380
@@ -90,31 +97,54 @@ int print_timing_WR(e3sm_io_config *cfg,
9097 * total size.
9198 */
9299
93- off_msg[0 ] = cmeta->my_nreqs ;
94- off_msg[1 ] = cmeta->amount_WR ;
95- off_msg[2 ] = sum_decomp_varlen;
96- MPI_Reduce (off_msg, sum_off, 3 , MPI_OFFSET, MPI_SUM, 0 , comm);
97- sum_nreqs = sum_off[0 ];
98- sum_amount_WR = sum_off[1 ];
99- sum_decomp_varlen = sum_off[2 ];
100-
101- MPI_Reduce (&cmeta->my_nreqs , &max_nreqs, 1 , MPI_OFFSET, MPI_MAX, 0 , comm);
102- MPI_Reduce (&cmeta->my_nreqs , &min_nreqs, 1 , MPI_OFFSET, MPI_MIN, 0 , comm);
103- MPI_Reduce (&cmeta->amount_WR , &max_wr, 1 , MPI_OFFSET, MPI_MAX, 0 , comm);
104- MPI_Reduce (&cmeta->amount_WR , &min_wr, 1 , MPI_OFFSET, MPI_MIN, 0 , comm);
105-
106- double dbl_tmp[7 ], max_dbl[7 ], min_dbl[7 ];
107- dbl_tmp[0 ] = cmeta->pre_time ;
108- dbl_tmp[1 ] = cmeta->open_time ;
109- dbl_tmp[2 ] = cmeta->def_time ;
110- dbl_tmp[3 ] = cmeta->post_time ;
111- dbl_tmp[4 ] = cmeta->flush_time ;
112- dbl_tmp[5 ] = cmeta->close_time ;
113- dbl_tmp[6 ] = cmeta->end2end_time ;
114- MPI_Reduce (dbl_tmp, max_dbl, 7 , MPI_DOUBLE, MPI_MAX, 0 , comm);
115- MPI_Reduce (dbl_tmp, min_dbl, 7 , MPI_DOUBLE, MPI_MIN, 0 , comm);
100+ metrics.nreqs = cmeta->my_nreqs ;
101+ metrics.amount = cmeta->amount_WR ;
102+ metrics.sum_decomp_varlen = sum_decomp_varlen;
103+ metrics.timing [0 ] = cmeta->pre_time ;
104+ metrics.timing [1 ] = cmeta->open_time ;
105+ metrics.timing [2 ] = cmeta->def_time ;
106+ metrics.timing [3 ] = cmeta->post_time ;
107+ metrics.timing [4 ] = cmeta->flush_time ;
108+ metrics.timing [5 ] = cmeta->close_time ;
109+ metrics.timing [6 ] = cmeta->end2end_time ;
110+
111+ if (cfg->rank == 0 )
112+ all_metrics = (perf_metrics*) malloc (sizeof (perf_metrics) * cfg->np );
113+
114+ MPI_Gather (&metrics, sizeof (perf_metrics), MPI_BYTE,
115+ all_metrics, sizeof (perf_metrics), MPI_BYTE, 0 , comm);
116116
117117 if (cfg->rank == 0 ) {
118+ MPI_Offset sum_nreqs, sum_amount, max_nreqs, min_nreqs, max_amnt, min_amnt;
119+ double max_t [7 ], min_t [7 ];
120+
121+ sum_nreqs = 0 ;
122+ sum_amount = 0 ;
123+ sum_decomp_varlen = 0 ;
124+ max_nreqs = 0 ;
125+ max_amnt = 0 ;
126+ min_nreqs = LONG_MAX;
127+ min_amnt = LONG_MAX;
128+ for (j=0 ; j<7 ; j++) {
129+ max_t [j] = 0 ;
130+ min_t [j] = (double )LONG_MAX;
131+ }
132+
133+ for (i=0 ; i<cfg->np ; i++) {
134+ sum_nreqs += all_metrics[i].nreqs ;
135+ sum_amount += all_metrics[i].amount ;
136+ sum_decomp_varlen += all_metrics[i].sum_decomp_varlen ;
137+ max_nreqs = MAX (max_nreqs, all_metrics[i].nreqs );
138+ max_amnt = MAX (max_amnt, all_metrics[i].amount );
139+ min_nreqs = MIN (min_nreqs, all_metrics[i].nreqs );
140+ min_amnt = MIN (min_amnt, all_metrics[i].amount );
141+ for (j=0 ; j<7 ; j++) {
142+ max_t [j] = MAX (max_t [j], all_metrics[i].timing [j]);
143+ min_t [j] = MIN (min_t [j], all_metrics[i].timing [j]);
144+ }
145+ }
146+ free (all_metrics);
147+
118148 int nvars_noD = cmeta->nvars ;
119149 for (i=0 ; i<ndecomp; i++) nvars_noD -= cmeta->nvars_D [i];
120150
@@ -187,13 +217,13 @@ int print_timing_WR(e3sm_io_config *cfg,
187217 }
188218
189219 if (cfg->strategy == canonical && (cfg->api == netcdf4 || cfg->api == hdf5))
190- wTime = max_dbl [3 ];
220+ ioTime = max_t [3 ];
191221 else if (cfg->strategy == log && cfg->api == netcdf4)
192- wTime = max_dbl [5 ];
222+ ioTime = max_t [5 ];
193223 else if (cfg->api == pnetcdf || cfg->strategy == log || cfg->api == hdf5_md)
194- wTime = max_dbl [4 ];
224+ ioTime = max_t [4 ];
195225 else /* write happens at file close for hdf5 blob and adios blob */
196- wTime = max_dbl [5 ];
226+ ioTime = max_t [5 ];
197227
198228 if (cfg->strategy == log && (cfg->api == hdf5_log || cfg->api == netcdf4)) {
199229 if (cfg->num_subfiles != 0 ) {
@@ -247,24 +277,24 @@ int print_timing_WR(e3sm_io_config *cfg,
247277 printf (" No. I/O flush calls = %6d\n " , cmeta->num_flushes );
248278 printf (" -----------------------------------------------------------\n " );
249279 printf (" Indiv write amount MAX = %.2f MiB = %.2f GiB\n " ,
250- (double )max_wr / 1048576 , (double )max_wr / 1073741824 );
280+ (double )max_amnt / 1048576 , (double )max_amnt / 1073741824 );
251281 printf (" Indiv write amount MIN = %.2f MiB = %.2f GiB\n " ,
252- (double )min_wr / 1048576 , (double )min_wr / 1073741824 );
282+ (double )min_amnt / 1048576 , (double )min_amnt / 1073741824 );
253283 printf (" Total write amount = %.2f MiB = %.2f GiB\n " ,
254- (double )sum_amount_WR / 1048576 , (double )sum_amount_WR / 1073741824 );
284+ (double )sum_amount / 1048576 , (double )sum_amount / 1073741824 );
255285 printf (" -----------------------------------------------------------\n " );
256- printf (" Time of I/O preparing min/max = %8.4f / %8.4f\n " , min_dbl [0 ], max_dbl [0 ]);
257- printf (" Time of file open/create min/max = %8.4f / %8.4f\n " , min_dbl [1 ], max_dbl [1 ]);
258- printf (" Time of define variables min/max = %8.4f / %8.4f\n " , min_dbl [2 ], max_dbl [2 ]);
259- printf (" Time of posting write requests min/max = %8.4f / %8.4f\n " , min_dbl [3 ], max_dbl [3 ]);
260- printf (" Time of write flushing min/max = %8.4f / %8.4f\n " , min_dbl [4 ], max_dbl [4 ]);
261- printf (" Time of close min/max = %8.4f / %8.4f\n " , min_dbl [5 ], max_dbl [5 ]);
262- printf (" end-to-end time min/max = %8.4f / %8.4f\n " , min_dbl [6 ], max_dbl [6 ]);
286+ printf (" Time of I/O preparing min/max = %8.4f / %8.4f\n " , min_t [0 ], max_t [0 ]);
287+ printf (" Time of file open/create min/max = %8.4f / %8.4f\n " , min_t [1 ], max_t [1 ]);
288+ printf (" Time of define variables min/max = %8.4f / %8.4f\n " , min_t [2 ], max_t [2 ]);
289+ printf (" Time of posting write requests min/max = %8.4f / %8.4f\n " , min_t [3 ], max_t [3 ]);
290+ printf (" Time of write flushing min/max = %8.4f / %8.4f\n " , min_t [4 ], max_t [4 ]);
291+ printf (" Time of close min/max = %8.4f / %8.4f\n " , min_t [5 ], max_t [5 ]);
292+ printf (" end-to-end time min/max = %8.4f / %8.4f\n " , min_t [6 ], max_t [6 ]);
263293 printf (" Emulate computation time (sleep) min/max = %8.4f / %8.4f\n " , (double )(cfg->comp_time ), (double )(cfg->comp_time ));
264294 printf (" I/O bandwidth in MiB/sec (write-only) = %.4f\n " ,
265- (double )sum_amount_WR / 1048576.0 / wTime );
295+ (double )sum_amount / 1048576.0 / ioTime );
266296 printf (" I/O bandwidth in MiB/sec (open-to-close) = %.4f\n " ,
267- (double )sum_amount_WR / 1048576.0 / max_dbl [6 ]);
297+ (double )sum_amount / 1048576.0 / max_t [6 ]);
268298 printf (" -----------------------------------------------------------\n " );
269299
270300 /* print MPI-IO hints actually used */
@@ -308,12 +338,11 @@ int print_timing_RD(e3sm_io_config *cfg,
308338 e3sm_io_decom *decom,
309339 case_meta *cmeta)
310340{
311- int i, ndecomp;
312- MPI_Offset off_msg[3 ], sum_off[3 ];
313- MPI_Offset sum_nreqs, sum_amount_RD, max_nreqs, min_nreqs;
341+ int i, j, ndecomp;
314342 MPI_Offset vlen, sum_decomp_varlen;
315- double wTime;
316343 MPI_Comm comm=cfg->io_comm ;
344+ double ioTime;
345+ perf_metrics metrics, *all_metrics;
317346
318347 ndecomp = decom->num_decomp ;
319348
@@ -336,29 +365,54 @@ int print_timing_RD(e3sm_io_config *cfg,
336365 * total size.
337366 */
338367
339- off_msg[0 ] = cmeta->my_nreqs ;
340- off_msg[1 ] = cmeta->amount_RD ;
341- off_msg[2 ] = sum_decomp_varlen;
342- MPI_Reduce (off_msg, sum_off, 3 , MPI_OFFSET, MPI_SUM, 0 , comm);
343- sum_nreqs = sum_off[0 ];
344- sum_amount_RD = sum_off[1 ];
345- sum_decomp_varlen = sum_off[2 ];
346-
347- MPI_Reduce (&cmeta->my_nreqs , &max_nreqs, 1 , MPI_OFFSET, MPI_MAX, 0 , comm);
348- MPI_Reduce (&cmeta->my_nreqs , &min_nreqs, 1 , MPI_OFFSET, MPI_MIN, 0 , comm);
349-
350- double dbl_tmp[7 ], max_dbl[7 ], min_dbl[7 ];
351- dbl_tmp[0 ] = cmeta->pre_time ;
352- dbl_tmp[1 ] = cmeta->open_time ;
353- dbl_tmp[2 ] = cmeta->def_time ;
354- dbl_tmp[3 ] = cmeta->post_time ;
355- dbl_tmp[4 ] = cmeta->flush_time ;
356- dbl_tmp[5 ] = cmeta->close_time ;
357- dbl_tmp[6 ] = cmeta->end2end_time ;
358- MPI_Reduce (dbl_tmp, max_dbl, 7 , MPI_DOUBLE, MPI_MAX, 0 , comm);
359- MPI_Reduce (dbl_tmp, min_dbl, 7 , MPI_DOUBLE, MPI_MIN, 0 , comm);
368+ metrics.nreqs = cmeta->my_nreqs ;
369+ metrics.amount = cmeta->amount_RD ;
370+ metrics.sum_decomp_varlen = sum_decomp_varlen;
371+ metrics.timing [0 ] = cmeta->pre_time ;
372+ metrics.timing [1 ] = cmeta->open_time ;
373+ metrics.timing [2 ] = cmeta->def_time ;
374+ metrics.timing [3 ] = cmeta->post_time ;
375+ metrics.timing [4 ] = cmeta->flush_time ;
376+ metrics.timing [5 ] = cmeta->close_time ;
377+ metrics.timing [6 ] = cmeta->end2end_time ;
378+
379+ if (cfg->rank == 0 )
380+ all_metrics = (perf_metrics*) malloc (sizeof (perf_metrics) * cfg->np );
381+
382+ MPI_Gather (&metrics, sizeof (perf_metrics), MPI_BYTE,
383+ all_metrics, sizeof (perf_metrics), MPI_BYTE, 0 , comm);
360384
361385 if (cfg->rank == 0 ) {
386+ MPI_Offset sum_nreqs, sum_amount, max_nreqs, min_nreqs, max_amnt, min_amnt;
387+ double max_t [7 ], min_t [7 ];
388+
389+ sum_nreqs = 0 ;
390+ sum_amount = 0 ;
391+ sum_decomp_varlen = 0 ;
392+ max_nreqs = 0 ;
393+ max_amnt = 0 ;
394+ min_nreqs = LONG_MAX;
395+ min_amnt = LONG_MAX;
396+ for (j=0 ; j<7 ; j++) {
397+ max_t [j] = 0 ;
398+ min_t [j] = (double )LONG_MAX;
399+ }
400+
401+ for (i=0 ; i<cfg->np ; i++) {
402+ sum_nreqs += all_metrics[i].nreqs ;
403+ sum_amount += all_metrics[i].amount ;
404+ sum_decomp_varlen += all_metrics[i].sum_decomp_varlen ;
405+ max_nreqs = MAX (max_nreqs, all_metrics[i].nreqs );
406+ max_amnt = MAX (max_amnt, all_metrics[i].amount );
407+ min_nreqs = MIN (min_nreqs, all_metrics[i].nreqs );
408+ min_amnt = MIN (min_amnt, all_metrics[i].amount );
409+ for (j=0 ; j<7 ; j++) {
410+ max_t [j] = MAX (max_t [j], all_metrics[i].timing [j]);
411+ min_t [j] = MIN (min_t [j], all_metrics[i].timing [j]);
412+ }
413+ }
414+ free (all_metrics);
415+
362416 int nvars_noD = cmeta->nvars ;
363417 for (i=0 ; i<ndecomp; i++) nvars_noD -= cmeta->nvars_D [i];
364418
@@ -379,40 +433,64 @@ int print_timing_RD(e3sm_io_config *cfg,
379433 printf (" Variable read order: same as variables are defined\n " );
380434
381435 if (cfg->strategy == canonical) {
382- if (cfg->api == pnetcdf)
436+ if (cfg->api == pnetcdf) {
383437 printf (" ==== PnetCDF canonical I/O using varn API ============\n " );
384- else if (cfg->api == hdf5)
438+ printf (" PnetCDF library version = %s\n " ,cfg->pnetcdf_ver );
439+ }
440+ else if (cfg->api == hdf5) {
385441 printf (" ==== HDF5 canonical I/O ==============================\n " );
386- else if (cfg->api == hdf5_md)
442+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
443+ }
444+ else if (cfg->api == hdf5_md) {
387445 printf (" ==== HDF5 canonical I/O using multi-dataset API ======\n " );
388- else if (cfg->api == netcdf4)
446+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
447+ }
448+ else if (cfg->api == netcdf4) {
389449 printf (" ==== NetCDF-4 canonical I/O ==========================\n " );
450+ printf (" NetCDF4 library version = %s\n " ,cfg->netcdf4_ver );
451+ }
390452 }
391453 else if (cfg->strategy == log) {
392- if (cfg->api == hdf5)
454+ if (cfg->api == hdf5) {
393455 printf (" ==== HDF5 using log-based VOL through native APIs=====\n " );
394- else if (cfg->api == hdf5_log)
456+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
457+ printf (" HDF5 Log VOL connector version = %s\n " ,cfg->hdf5_log_ver );
458+ }
459+ else if (cfg->api == hdf5_log) {
395460 printf (" ==== HDF5 using log-based VOL APIs ===================\n " );
396- else if (cfg->api == netcdf4)
461+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
462+ printf (" HDF5 Log VOL connector version = %s\n " ,cfg->hdf5_log_ver );
463+ }
464+ else if (cfg->api == netcdf4) {
397465 printf (" ==== NetCDF-4 using HDF5 log-based VOL ===============\n " );
466+ printf (" NetCDF4 library version = %s\n " ,cfg->netcdf4_ver );
467+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
468+ printf (" HDF5 Log VOL connector version = %s\n " ,cfg->hdf5_log_ver );
469+ }
398470 }
399471 else if (cfg->strategy == blob) {
400- if (cfg->api == pnetcdf)
472+ if (cfg->api == pnetcdf) {
401473 printf (" ==== PnetCDF blob I/O ================================\n " );
402- else if (cfg->api == hdf5)
474+ printf (" PnetCDF library version = %s\n " ,cfg->pnetcdf_ver );
475+ }
476+ else if (cfg->api == hdf5) {
403477 printf (" ==== HDF5 blob I/O ===================================\n " );
404- else if (cfg->api == adios)
478+ printf (" HDF5 library version = %s\n " ,cfg->hdf5_ver );
479+ }
480+ else if (cfg->api == adios) {
405481 printf (" ==== ADIOS blob I/O ==================================\n " );
482+ printf (" ADIOS library version = %s\n " ,cfg->adios_ver );
483+ }
406484 }
407485
408486 if (cfg->strategy == canonical && (cfg->api == netcdf4 || cfg->api == hdf5))
409- wTime = max_dbl [3 ];
487+ ioTime = max_t [3 ];
410488 else if (cfg->strategy == log && cfg->api == netcdf4)
411- wTime = max_dbl [5 ];
489+ ioTime = max_t [5 ];
412490 else if (cfg->api == pnetcdf || cfg->strategy == log || cfg->api == hdf5_md)
413- wTime = max_dbl [4 ];
491+ ioTime = max_t [4 ];
414492 else /* read happens at file close for hdf5 blob and adios blob */
415- wTime = max_dbl [5 ];
493+ ioTime = max_t [5 ];
416494
417495 if (cfg->strategy == log && (cfg->api == hdf5_log || cfg->api == netcdf4)) {
418496 if (cfg->num_subfiles != 0 ) {
@@ -465,27 +543,35 @@ int print_timing_RD(e3sm_io_config *cfg,
465543 printf (" I/O flush frequency = %6d\n " , cmeta->ffreq );
466544 printf (" No. I/O flush calls = %6d\n " , cmeta->num_flushes );
467545 printf (" -----------------------------------------------------------\n " );
468- printf (" Total read amount = %.2f MiB = %.2f GiB\n " ,
469- (double )sum_amount_RD / 1048576 , (double )sum_amount_RD / 1073741824 );
470- printf (" Time of I/O preparing min/max = %8.4f / %8.4f\n " , min_dbl[0 ], max_dbl[0 ]);
471- printf (" Time of file open/create min/max = %8.4f / %8.4f\n " , min_dbl[1 ], max_dbl[1 ]);
472- printf (" Time of define variables min/max = %8.4f / %8.4f\n " , min_dbl[2 ], max_dbl[2 ]);
473- printf (" Time of posting read requests min/max = %8.4f / %8.4f\n " , min_dbl[3 ], max_dbl[3 ]);
474- printf (" Time of read flushing min/max = %8.4f / %8.4f\n " , min_dbl[4 ], max_dbl[4 ]);
475- printf (" Time of close min/max = %8.4f / %8.4f\n " , min_dbl[5 ], max_dbl[5 ]);
476- printf (" end-to-end time min/max = %8.4f / %8.4f\n " , min_dbl[6 ], max_dbl[6 ]);
546+ printf (" Indiv write amount MAX = %.2f MiB = %.2f GiB\n " ,
547+ (double )max_amnt / 1048576 , (double )max_amnt / 1073741824 );
548+ printf (" Indiv write amount MIN = %.2f MiB = %.2f GiB\n " ,
549+ (double )min_amnt / 1048576 , (double )min_amnt / 1073741824 );
550+ printf (" Total read amount = %.2f MiB = %.2f GiB\n " ,
551+ (double )sum_amount / 1048576 , (double )sum_amount / 1073741824 );
552+ printf (" Time of I/O preparing min/max = %8.4f / %8.4f\n " , min_t [0 ], max_t [0 ]);
553+ printf (" Time of file open/create min/max = %8.4f / %8.4f\n " , min_t [1 ], max_t [1 ]);
554+ printf (" Time of define variables min/max = %8.4f / %8.4f\n " , min_t [2 ], max_t [2 ]);
555+ printf (" Time of posting read requests min/max = %8.4f / %8.4f\n " , min_t [3 ], max_t [3 ]);
556+ printf (" Time of read flushing min/max = %8.4f / %8.4f\n " , min_t [4 ], max_t [4 ]);
557+ printf (" Time of close min/max = %8.4f / %8.4f\n " , min_t [5 ], max_t [5 ]);
558+ printf (" end-to-end time min/max = %8.4f / %8.4f\n " , min_t [6 ], max_t [6 ]);
477559 printf (" Emulate computation time (sleep) min/max = %8.4f / %8.4f\n " , (double )(cfg->comp_time ), (double )(cfg->comp_time ));
478560 printf (" I/O bandwidth in MiB/sec (read-only) = %.4f\n " ,
479- (double )sum_amount_RD / 1048576.0 / wTime );
561+ (double )sum_amount / 1048576.0 / ioTime );
480562 printf (" I/O bandwidth in MiB/sec (open-to-close) = %.4f\n " ,
481- (double )sum_amount_RD / 1048576.0 / max_dbl [6 ]);
563+ (double )sum_amount / 1048576.0 / max_t [6 ]);
482564 printf (" -----------------------------------------------------------\n " );
483565
484566 /* print MPI-IO hints actually used */
485567 if (cfg->verbose ) print_info (&cmeta->info_used );
486568 }
487569 fflush (stdout);
488570
571+ #ifdef ENABLE_PNC
572+ pnetcdf_check_mem_usage (comm);
573+ #endif
574+
489575 if (cmeta->info_used != MPI_INFO_NULL) MPI_Info_free (&cmeta->info_used );
490576
491577 return 0 ;
0 commit comments