Skip to content

Commit dcf5ae0

Browse files
committed
Enable hint file, run the benchmark once per line in the hint file
hint file name is "e3sm_io_hints.txt"
1 parent 4759892 commit dcf5ae0

File tree

2 files changed

+168
-46
lines changed

2 files changed

+168
-46
lines changed

e3sm_io_hints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
start_iodevice=10;striping_unit=1048576;striping_factor=8;lustre_overstriping_ratio=1;cb_nodes=8
2+
start_iodevice=10;striping_unit=1048576;striping_factor=8;lustre_overstriping_ratio=1;cb_nodes=8;nc_num_aggrs_per_node=32

src/e3sm_io.c

Lines changed: 166 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -76,21 +76,124 @@ void check_connector_env(e3sm_io_config *cfg) {
7676
free(env_str);
7777
}
7878

79-
static inline int set_info (e3sm_io_config *cfg, e3sm_io_decom *decom) {
79+
static
80+
void parse_hint_file(e3sm_io_config *cfg,
81+
const char *filename,
82+
int *num_hint_lines,
83+
char **hint_lines)
84+
{
85+
/* read I/O hints from file e3sm_io_hints.txt, run e3sm_io_core() one per
86+
* line in the hint file. Each line contains hints as if it is set in the
87+
* environment variable PNETCDF_HINTS.
88+
*/
89+
MPI_Offset fsize = 0;
90+
char *hint_buf=NULL;
91+
92+
*num_hint_lines=0;
93+
94+
if (cfg->rank == 0) {
95+
FILE *fptr = fopen(filename, "r");
96+
if (fptr != NULL) {
97+
fseek(fptr, 0, SEEK_END);
98+
fsize = ftell(fptr) + 1;
99+
fseek(fptr, 0, SEEK_SET);
100+
hint_buf = (char*) malloc(fsize);
101+
fread(hint_buf, 1, fsize, fptr);
102+
hint_buf[fsize-1] = '\0';
103+
fclose(fptr);
104+
}
105+
}
106+
107+
MPI_Bcast(&fsize, 1, MPI_OFFSET, 0, cfg->io_comm);
108+
if (fsize > 0) {
109+
if (cfg->rank > 0) hint_buf = (char*) malloc(fsize);
110+
MPI_Bcast(hint_buf, fsize, MPI_BYTE, 0, cfg->io_comm);
111+
112+
char *hint_str = strtok(hint_buf, "\n");
113+
if (hint_str != NULL && strlen(hint_str) > 0)
114+
hint_lines[(*num_hint_lines)++] = strdup(hint_str);
115+
116+
while (strtok(NULL, "\n") != NULL)
117+
hint_lines[(*num_hint_lines)++] = strdup(hint_str);
118+
119+
free(hint_buf);
120+
}
121+
}
122+
123+
static
124+
int parse_hint_line(e3sm_io_config *cfg,
125+
const char *hint_str)
126+
{
127+
char *warn_str="Warning: skip ill-formed hint set in PNETCDF_HINTS";
128+
char *hint_saved, *ptr, *key, *val, *deli;
129+
int err=MPI_SUCCESS;
130+
131+
if (hint_str == NULL) return 1;
132+
133+
/* skip blank lines */
134+
hint_saved = strdup(hint_str);
135+
if (strlen(hint_saved) == 0 || strtok(hint_saved, " \t") == NULL) {
136+
free(hint_saved);
137+
return 1;
138+
}
139+
ptr = hint_saved;
140+
141+
do {
142+
if (*ptr == '\0') break; /* done with this line */
143+
144+
key = ptr;
145+
deli = strchr(ptr, ';');
146+
if (deli != NULL) {
147+
*deli = '\0'; /* add terminate char */
148+
ptr = deli + 1;
149+
}
150+
else
151+
ptr += strlen(ptr); /* last hint */
152+
153+
/* hint key */
154+
deli = strchr(key, '=');
155+
if (deli == NULL) {
156+
/* expect one token before = */
157+
printf("xxxx %s: '%s'\n", warn_str, key);
158+
break;
159+
}
160+
*deli = '\0'; /* add terminate char */
161+
162+
/* hint value */
163+
val = deli + 1;
164+
165+
/* override previouse set hint or add a new one */
166+
err = MPI_Info_set(cfg->info, key, val);
167+
CHECK_MPIERR
168+
169+
} while (*ptr != '\0');
170+
171+
err_out:
172+
free(hint_saved);
173+
return err;
174+
}
175+
176+
static
177+
int set_info(e3sm_io_config *cfg,
178+
const char *hint_str)
179+
{
80180
int err;
81181

82182
/* set MPI-IO hints */
83183

184+
err = MPI_Info_create (&(cfg->info));
185+
CHECK_MPIERR
186+
84187
/* collective write */
85188
err = MPI_Info_set (cfg->info, "romio_cb_write", "enable");
86189
CHECK_MPIERR
87190

88191
/* HDF5 may do independent I/O internally */
89192

90193
if (cfg->api == pnetcdf) {
91-
/* no independent MPI-IO */
92-
err = MPI_Info_set (cfg->info, "romio_no_indep_rw", "true");
93-
CHECK_MPIERR
194+
/* set MPI-IO hints here */
195+
// err = MPI_Info_set (cfg->info, "romio_no_indep_rw", "true");
196+
// CHECK_MPIERR
94197

95198
/* set PnetCDF I/O hints */
96199

@@ -121,6 +224,9 @@ static inline int set_info (e3sm_io_config *cfg, e3sm_io_decom *decom) {
121224
/* in-place byte swap */
122225
err = MPI_Info_set (cfg->info, "nc_in_place_swap", "enable");
123226
CHECK_MPIERR
227+
228+
err = parse_hint_line(cfg, hint_str);
229+
if (err == 0) goto err_out;
124230
}
125231

126232
err_out:
@@ -204,7 +310,7 @@ static void usage (char *argv0) {
204310

205311
/*----< main() >-------------------------------------------------------------*/
206312
int main (int argc, char **argv) {
207-
int i, err, nrecs=1, ffreq;
313+
int i, j, err, nrecs=1, ffreq;
208314
double timing[5], max_t[5];
209315
e3sm_io_config cfg;
210316
e3sm_io_decom decom;
@@ -224,7 +330,7 @@ int main (int argc, char **argv) {
224330
CHECK_ERR;
225331
#endif
226332

227-
timing[0] = MPI_Wtime();
333+
timing[1] = MPI_Wtime();
228334

229335
MPI_Comm_rank (MPI_COMM_WORLD, &(cfg.rank));
230336
MPI_Comm_size (MPI_COMM_WORLD, &(cfg.np));
@@ -562,7 +668,11 @@ int main (int argc, char **argv) {
562668
PRINT_MSG (1, "Input data file/folder name = %s\n", cfg.in_path);
563669
PRINT_MSG (1, "Output data file/folder name = %s\n", cfg.out_path);
564670

565-
timing[1] = MPI_Wtime() - timing[0];
671+
char *hint_lines[64];
672+
int num_hint_lines;
673+
parse_hint_file(&cfg, "e3sm_io_hints.txt", &num_hint_lines, hint_lines);
674+
675+
timing[1] = MPI_Wtime() - timing[1];
566676
MPI_Barrier(MPI_COMM_WORLD);
567677
timing[2] = MPI_Wtime();
568678

@@ -584,41 +694,65 @@ int main (int argc, char **argv) {
584694
cfg.run_case = unknown;
585695

586696
timing[2] = MPI_Wtime() - timing[2];
587-
MPI_Barrier(MPI_COMM_WORLD);
588-
timing[3] = MPI_Wtime();
589697

590-
/* set MPI-IO and PnetCDF hints */
591-
err = MPI_Info_create (&(cfg.info));
592-
CHECK_MPIERR
593-
err = set_info (&cfg, &decom);
594-
if (err < 0) goto err_out;
698+
j = 0;
699+
do {
595700

596-
/* the core of this benchmark */
597-
err = e3sm_io_core (&cfg, &decom);
598-
CHECK_ERR
701+
char *hint_str = (num_hint_lines == 0) ? NULL : hint_lines[j];
599702

600-
timing[3] = MPI_Wtime() - timing[3];
601-
MPI_Barrier(MPI_COMM_WORLD);
602-
timing[4] = MPI_Wtime();
703+
if (cfg.rank == 0) printf("\nHINTS: %s\n\n", (hint_str) ? hint_str : "");
603704

604-
/* report timing breakdowns */
605-
if (cfg.rd) {
606-
report_timing_RD(&cfg, &decom);
607-
}
608-
else{
609-
report_timing_WR(&cfg, &decom);
610-
}
705+
/* set MPI-IO and PnetCDF hints */
706+
err = set_info(&cfg, hint_str);
707+
if (err < 0) goto err_out;
708+
709+
/* the core of this benchmark */
710+
MPI_Barrier(MPI_COMM_WORLD);
711+
timing[3] = MPI_Wtime();
712+
713+
err = e3sm_io_core(&cfg, &decom);
714+
CHECK_ERR
715+
716+
timing[3] = MPI_Wtime() - timing[3];
717+
718+
/* report timing breakdowns */
719+
if (cfg.rd)
720+
report_timing_RD(&cfg, &decom);
721+
else
722+
report_timing_WR(&cfg, &decom);
611723

612724
#ifdef E3SM_IO_PROFILING
613-
if (cfg.profiling) e3sm_io_print_profile(&cfg);
725+
if (cfg.profiling) e3sm_io_print_profile(&cfg);
614726
#else
615-
if (cfg.profiling && cfg.rank == 0)
616-
printf("\nWarning: E3SM-IO internal time profiling was disabled at configure time\n\n");
727+
if (cfg.profiling && cfg.rank == 0)
728+
printf("\nWarning: E3SM-IO internal time profiling was disabled at configure time\n\n");
617729
#endif
618730

731+
for (i = 0; i < MAX_NUM_DECOMP; i++) {
732+
if (decom.w_starts[i] != NULL) {
733+
free(decom.w_starts[i][0]);
734+
free(decom.w_starts[i]);
735+
}
736+
}
737+
738+
if (cfg.info != MPI_INFO_NULL) MPI_Info_free (&(cfg.info));
739+
740+
timing[0] = timing[1] + timing[2] + timing[3];
741+
MPI_Reduce(timing, max_t, 4, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
742+
if (cfg.rank == 0) {
743+
printf("init=%.2f read_decomp=%.2f e3sm_io_core=%.2f end-to-end=%.2f\n",
744+
max_t[1],max_t[2],max_t[3],max_t[0]);
745+
printf("-----------------------------------------------------------\n");
746+
printf("\n\n");
747+
}
748+
749+
j++;
750+
} while (j < num_hint_lines);
751+
752+
for (j=0; j<num_hint_lines; j++)
753+
free(hint_lines[j]);
754+
619755
err_out:
620-
if (cfg.info != MPI_INFO_NULL)
621-
MPI_Info_free (&(cfg.info));
622756
if (cfg.io_comm != MPI_COMM_WORLD && cfg.io_comm != MPI_COMM_NULL)
623757
MPI_Comm_free (&(cfg.io_comm));
624758
if (cfg.env_log_info != NULL)
@@ -629,20 +763,6 @@ int main (int argc, char **argv) {
629763
if (decom.blocklens[i]) free (decom.blocklens[i]);
630764
if (decom.disps[i]) free (decom.disps[i]);
631765
if (decom.raw_offsets[i]) free (decom.raw_offsets[i]);
632-
if (decom.w_starts[i] != NULL) {
633-
free(decom.w_starts[i][0]);
634-
free(decom.w_starts[i]);
635-
}
636-
}
637-
638-
timing[4] = MPI_Wtime() - timing[4];
639-
timing[0] = MPI_Wtime() - timing[0];
640-
MPI_Reduce(timing, max_t, 5, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
641-
if (cfg.rank == 0) {
642-
printf("init=%.2f read_decomp=%.2f e3sm_io_core=%.2f final=%.2f end-to-end=%.2f\n",
643-
max_t[1],max_t[2],max_t[3],max_t[4],max_t[0]);
644-
printf("-----------------------------------------------------------\n");
645-
printf("\n\n");
646766
}
647767

648768
MPI_Finalize ();

0 commit comments

Comments
 (0)