Skip to content

Commit 7d019dc

Browse files
authored
Merge pull request #464 from hpc/feature-latency
Feature latency
2 parents 4c741b6 + 6911b54 commit 7d019dc

File tree

7 files changed

+149
-20
lines changed

7 files changed

+149
-20
lines changed

src/ior.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static char **ParseFileName(char *, int *);
6666
static void InitTests(IOR_test_t *);
6767
static void TestIoSys(IOR_test_t *);
6868
static void ValidateTests(IOR_param_t * params, MPI_Comm com);
69-
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
69+
static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *results,
7070
aiori_fd_t *fd, const int access,
7171
IOR_io_buffers *ioBuffers);
7272

@@ -1267,7 +1267,7 @@ static void TestIoSys(IOR_test_t *test)
12671267
CurrentTimeString());
12681268
}
12691269
timer[IOR_TIMER_RDWR_START] = GetTimeStamp();
1270-
dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
1270+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, WRITE, &ioBuffers);
12711271
if (params->verbose >= VERBOSE_4) {
12721272
fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
12731273
fflush(out_logfile);
@@ -1318,7 +1318,7 @@ static void TestIoSys(IOR_test_t *test)
13181318
params->open = WRITECHECK;
13191319
fd = backend->open(testFileName, IOR_RDONLY, params->backend_options);
13201320
if(fd == NULL) FAIL("Cannot open file");
1321-
dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
1321+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, WRITECHECK, &ioBuffers);
13221322
backend->close(fd, params->backend_options);
13231323
rankOffset = 0;
13241324
}
@@ -1397,7 +1397,7 @@ static void TestIoSys(IOR_test_t *test)
13971397
CurrentTimeString());
13981398
}
13991399
timer[IOR_TIMER_RDWR_START] = GetTimeStamp();
1400-
dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
1400+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, operation_flag, &ioBuffers);
14011401
timer[IOR_TIMER_RDWR_STOP] = GetTimeStamp();
14021402
if (params->intraTestBarriers)
14031403
MPI_CHECK(MPI_Barrier(testComm),
@@ -1647,15 +1647,17 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offs
16471647
return (offsetArray);
16481648
}
16491649

1650-
static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){
1650+
static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access, OpTimer* ot, double startTime){
16511651
IOR_offset_t amtXferred = 0;
16521652

16531653
void *buffer = ioBuffers->buffer;
16541654
if (access == WRITE) {
16551655
/* fills each transfer with a unique pattern
16561656
* containing the offset into the file */
16571657
update_write_memory_pattern(offset, ioBuffers->buffer, transfer, test->setTimeStampSignature, pretendRank, test->dataPacketType, test->gpuMemoryFlags);
1658+
double start = GetTimeStamp();
16581659
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1660+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16591661
if (amtXferred != transfer)
16601662
ERR("cannot write to file");
16611663
if (test->fsyncPerWrite)
@@ -1665,7 +1667,9 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_
16651667
nanosleep( & wait, NULL);
16661668
}
16671669
} else if (access == READ) {
1670+
double start = GetTimeStamp();
16681671
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1672+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16691673
if (amtXferred != transfer)
16701674
ERR("cannot read from file");
16711675
if (test->interIODelay > 0){
@@ -1674,13 +1678,17 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_
16741678
}
16751679
} else if (access == WRITECHECK) {
16761680
invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1681+
double start = GetTimeStamp();
16771682
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1683+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16781684
if (amtXferred != transfer)
16791685
ERR("cannot read from file write check");
16801686
*errors += CompareData(buffer, transfer, test, offset, pretendRank, WRITECHECK);
16811687
} else if (access == READCHECK) {
16821688
invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1689+
double start = GetTimeStamp();
16831690
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1691+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16841692
if (amtXferred != transfer){
16851693
ERR("cannot read from file");
16861694
}
@@ -1703,7 +1711,7 @@ static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pr
17031711
} else {
17041712
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
17051713
}
1706-
WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & errors, test, fd, ioBuffers, WRITE);
1714+
WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & errors, test, fd, ioBuffers, WRITE, NULL, 0);
17071715
}
17081716
}
17091717
ioBuffers->buffer = oldBuffer;
@@ -1713,7 +1721,7 @@ static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pr
17131721
* Write or Read data to file(s). This loops through the strides, writing
17141722
* out the data to each block in transfer sizes, until the remainder left is 0.
17151723
*/
1716-
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
1724+
static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *results,
17171725
aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers)
17181726
{
17191727
int errors = 0;
@@ -1746,7 +1754,14 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
17461754
memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize);
17471755
}
17481756

1749-
// start timer after random offset was generated
1757+
/* Per operation statistics */
1758+
OpTimer * ot = NULL;
1759+
if(test->savePerOpDataCSV != NULL) {
1760+
char fname[FILENAME_MAX];
1761+
sprintf(fname, "%s-%d-%05d.csv", test->savePerOpDataCSV, rep, rank);
1762+
ot = OpTimerInit(fname, test->transferSize);
1763+
}
1764+
// start timer after random offset was generated
17501765
startForStonewall = GetTimeStamp();
17511766
hitStonewall = 0;
17521767

@@ -1787,7 +1802,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
17871802
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
17881803
}
17891804
}
1790-
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1805+
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access, ot, startForStonewall);
17911806
pairCnt++;
17921807

17931808
hitStonewall = ((test->deadlineForStonewalling != 0
@@ -1850,7 +1865,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
18501865
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
18511866
}
18521867
}
1853-
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1868+
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access, ot, startForStonewall);
18541869
pairCnt++;
18551870
}
18561871
j = 0;
@@ -1860,6 +1875,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
18601875
point->pairs_accessed = pairCnt;
18611876
}
18621877

1878+
OpTimerFree(& ot);
18631879
totalErrorCount += CountErrors(test, access, errors);
18641880

18651881
if (access == WRITE && test->fsync == TRUE) {

src/ior.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ typedef struct
115115
IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */
116116
IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */
117117

118+
char * savePerOpDataCSV; /* save details about each I/O operation into this file */
118119
char * saveRankDetailsCSV; /* save the details about the performance to a file */
119120
int summary_every_test; /* flag to print summary every test, not just at end */
120121
int uniqueDir; /* use unique directory for each fpp */

src/mdtest.c

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ typedef struct {
178178
int global_dir_layout;
179179
#endif /* HAVE_LUSTRE_LUSTREAPI */
180180
char * saveRankDetailsCSV; /* save the details about the performance to a file */
181+
char * savePerOpDataCSV;
181182
const char *prologue;
182183
const char *epilogue;
183184

@@ -197,6 +198,7 @@ static mdtest_options_t o;
197198

198199
/* This structure describes the processing status for stonewalling */
199200
typedef struct{
201+
OpTimer * ot; /* Operation timer*/
200202
double start_time;
201203

202204
int stone_wall_timer_seconds;
@@ -238,6 +240,7 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ...
238240
fflush(out_logfile);
239241
}
240242
}
243+
char const * mdtest_test_name(int i);
241244

242245
void parse_dirpath(char *dirpath_arg) {
243246
char * tmp, * token;
@@ -443,11 +446,13 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa
443446

444447
for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) {
445448
if (!dirs) {
449+
double start = GetTimeStamp();
446450
if (create) {
447451
create_file (path, itemNum + i);
448452
} else {
449453
remove_file (path, itemNum + i);
450454
}
455+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
451456
} else {
452457
create_remove_dirs (path, create, itemNum + i);
453458
}
@@ -644,14 +649,16 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch
644649

645650
/* below temp used to be hiername */
646651
VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item);
652+
double start = GetTimeStamp();
647653
if (-1 == o.backend->stat (item, &buf, o.backend_options)) {
648654
WARNF("unable to stat %s %s", dirs ? "directory" : "file", item);
649655
}
656+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
650657
}
651658
}
652659

653660
/* reads all of the items created as specified by the input parameters */
654-
void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
661+
void mdtest_read(int random, int dirs, const long dir_iter, char *path, rank_progress_t * progress) {
655662
uint64_t parent_dir, item_num = 0;
656663
char item[MAX_PATHLEN], temp[MAX_PATHLEN];
657664
aiori_fd_t *aiori_fh;
@@ -732,6 +739,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
732739

733740
o.hints.filePerProc = ! o.shared_file;
734741

742+
double start = GetTimeStamp();
735743
/* open file for reading */
736744
aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options);
737745
if (NULL == aiori_fh) {
@@ -746,7 +754,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
746754
WARNF("unable to read file %s", item);
747755
o.verification_error += 1;
748756
continue;
749-
}
757+
}
750758
int pretend_rank = (2 * o.nstride + rank) % o.size;
751759
if(o.verify_read){
752760
if (o.shared_file) {
@@ -759,6 +767,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
759767
}
760768
}
761769
}
770+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
762771

763772
/* close file */
764773
o.backend->close (aiori_fh, o.backend_options);
@@ -1200,7 +1209,7 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r
12001209
}
12011210
MPI_Barrier(testComm);
12021211
}
1203-
1212+
12041213
/* create files */
12051214
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
12061215
if(o.stone_wall_timer_seconds){
@@ -1244,6 +1253,11 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12441253
/* create phase */
12451254
if (o.create_only ) {
12461255
phase_prepare();
1256+
if(o.savePerOpDataCSV != NULL) {
1257+
char path[MAX_PATHLEN];
1258+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_CREATE_NUM), rank);
1259+
progress->ot = OpTimerInit(path, o.write_bytes > 0 ? o.write_bytes : 1);
1260+
}
12471261
t_start = GetTimeStamp();
12481262
#ifdef HAVE_GPFSCREATESHARING_T
12491263
/* Enable createSharingHint */
@@ -1270,6 +1284,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12701284
}
12711285
#endif /* HAVE_GPFSCREATESHARING_T */
12721286
t_end = GetTimeStamp();
1287+
OpTimerFree(& progress->ot);
12731288
updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier);
12741289
}else{
12751290
if (o.stoneWallingStatusFile){
@@ -1298,7 +1313,13 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12981313
/* stat phase */
12991314
if (o.stat_only ) {
13001315
phase_prepare();
1316+
if(o.savePerOpDataCSV != NULL) {
1317+
char path[MAX_PATHLEN];
1318+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_STAT_NUM), rank);
1319+
progress->ot = OpTimerInit(path, 1);
1320+
}
13011321
t_start = GetTimeStamp();
1322+
progress->start_time = t_start;
13021323
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13031324
prep_testdir(iteration, dir_iter);
13041325
if (o.unique_dir_per_task) {
@@ -1318,13 +1339,20 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13181339
t_end_before_barrier = GetTimeStamp();
13191340
phase_end();
13201341
t_end = GetTimeStamp();
1342+
OpTimerFree(& progress->ot);
13211343
updateResult(res, MDTEST_FILE_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier);
13221344
}
13231345

13241346
/* read phase */
13251347
if (o.read_only ) {
13261348
phase_prepare();
1349+
if(o.savePerOpDataCSV != NULL) {
1350+
char path[MAX_PATHLEN];
1351+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_READ_NUM), rank);
1352+
progress->ot = OpTimerInit(path, o.read_bytes > 0 ? o.read_bytes : 1);
1353+
}
13271354
t_start = GetTimeStamp();
1355+
progress->start_time = t_start;
13281356
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13291357
prep_testdir(iteration, dir_iter);
13301358
if (o.unique_dir_per_task) {
@@ -1340,23 +1368,28 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13401368

13411369
/* read files */
13421370
if (o.random_seed > 0) {
1343-
mdtest_read(1,0, dir_iter, temp_path);
1371+
mdtest_read(1, 0, dir_iter, temp_path, progress);
13441372
} else {
1345-
mdtest_read(0,0, dir_iter, temp_path);
1373+
mdtest_read(0, 0, dir_iter, temp_path, progress);
13461374
}
13471375
}
13481376
t_end_before_barrier = GetTimeStamp();
13491377
phase_end();
13501378
t_end = GetTimeStamp();
1379+
OpTimerFree(& progress->ot);
13511380
updateResult(res, MDTEST_FILE_READ_NUM, o.items, t_start, t_end, t_end_before_barrier);
13521381
}
13531382

13541383
/* remove phase */
13551384
if (o.remove_only) {
13561385
phase_prepare();
1386+
if(o.savePerOpDataCSV != NULL) {
1387+
sprintf(temp_path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_REMOVE_NUM), rank);
1388+
progress->ot = OpTimerInit(temp_path, o.write_bytes > 0 ? o.write_bytes : 1);
1389+
}
13571390
t_start = GetTimeStamp();
1391+
progress->start_time = t_start;
13581392
progress->items_start = 0;
1359-
13601393
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13611394
prep_testdir(iteration, dir_iter);
13621395
if (o.unique_dir_per_task) {
@@ -1369,19 +1402,19 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13691402
}
13701403

13711404
VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path );
1372-
13731405
if (o.collective_creates) {
13741406
if (rank == 0) {
13751407
collective_create_remove(0, 0, ntasks, temp_path, progress);
13761408
}
13771409
} else {
1378-
VERBOSE(3,5,"gonna create %s", temp_path);
1410+
VERBOSE(3,5,"gonna remove %s", temp_path);
13791411
create_remove_items(0, 0, 0, 0, temp_path, 0, progress);
13801412
}
13811413
}
13821414
t_end_before_barrier = GetTimeStamp();
13831415
phase_end();
13841416
t_end = GetTimeStamp();
1417+
OpTimerFree(& progress->ot);
13851418
updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier);
13861419
}
13871420

@@ -2322,8 +2355,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
23222355
#endif
23232356
{0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
23242357
{0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
2358+
{0, "savePerOpDataCSV", "Store the performance of each rank into an individual file prefixed with this option.", OPTION_OPTIONAL_ARGUMENT, 's', & o.savePerOpDataCSV},
23252359
{0, "showRankStatistics", "Include statistics per rank", OPTION_FLAG, 'd', & o.show_perrank_statistics},
2326-
23272360
LAST_OPTION
23282361
};
23292362
options_all_t * global_options = airoi_create_all_module_options(options);
@@ -2364,7 +2397,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
23642397
for (i = 1; i < argc; i++) {
23652398
snprintf(&cmd_buffer[strlen(cmd_buffer)], 4096-strlen(cmd_buffer), " '%s'", argv[i]);
23662399
}
2367-
23682400
VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
23692401
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes);
23702402
VERBOSE(0,-1,"Command line used: %s", cmd_buffer);

src/mdtest.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <mpi.h>
55
#include <stdio.h>
66
#include <stdint.h>
7+
#include <utilities.h>
78

89
typedef enum {
910
MDTEST_DIR_CREATE_NUM = 0,

src/parse_options.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
118118
fclose(fd);
119119
}
120120
params->saveRankDetailsCSV = strdup(value);
121+
} else if (strcasecmp(option, "savePerOpDataCSV") == 0){
122+
params->savePerOpDataCSV = strdup(value);
121123
} else if (strcasecmp(option, "summaryFormat") == 0) {
122124
if(strcasecmp(value, "default") == 0){
123125
outputFormat = OUTPUT_DEFAULT;
@@ -473,6 +475,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
473475
{.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT},
474476
{.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputting the summary", .arg = OPTION_OPTIONAL_ARGUMENT},
475477
{.help=" -O saveRankPerformanceDetailsCSV=<FILE> -- store the performance of each rank into the named CSV file.", .arg = OPTION_OPTIONAL_ARGUMENT},
478+
{.help=" -O savePerOpDataCSV=<FILE> -- store the performance of each rank into an individual file prefixed with this option.", .arg = OPTION_OPTIONAL_ARGUMENT},
476479
{0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun},
477480
LAST_OPTION,
478481
};

0 commit comments

Comments
 (0)