Skip to content

Commit ed97f54

Browse files
roblatham00wkliao
authored andcommitted
add support for annotating DXT segment with pthread_id
* Check DXT_POSIX and DXT_MPIIO versions of input log file The version numbers determine whether the input file contains the extra info field. For DXT_POSIX_VER < 2 and DXT_MPIIO_VER < 3, the buffer passed to darshan_log_get_mod() must not contain the space of pthread_id. Otherwise read contents will be incorrect.
1 parent 36f584a commit ed97f54

File tree

7 files changed

+148
-44
lines changed

7 files changed

+148
-44
lines changed

darshan-runtime/lib/darshan-dxt.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset,
262262
rec_ref->write_traces[file_rec->write_count].length = length;
263263
rec_ref->write_traces[file_rec->write_count].start_time = start_time;
264264
rec_ref->write_traces[file_rec->write_count].end_time = end_time;
265+
rec_ref->write_traces[file_rec->write_count].pthread_id = (unsigned long)pthread_self();
265266
file_rec->write_count += 1;
266267

267268
DXT_UNLOCK();
@@ -307,6 +308,7 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset,
307308
rec_ref->read_traces[file_rec->read_count].length = length;
308309
rec_ref->read_traces[file_rec->read_count].start_time = start_time;
309310
rec_ref->read_traces[file_rec->read_count].end_time = end_time;
311+
rec_ref->read_traces[file_rec->read_count].pthread_id = (unsigned long)pthread_self();
310312
file_rec->read_count += 1;
311313

312314
DXT_UNLOCK();
@@ -352,6 +354,7 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset,
352354
rec_ref->write_traces[file_rec->write_count].offset = offset;
353355
rec_ref->write_traces[file_rec->write_count].start_time = start_time;
354356
rec_ref->write_traces[file_rec->write_count].end_time = end_time;
357+
rec_ref->write_traces[file_rec->write_count].pthread_id = (unsigned long)pthread_self();
355358
file_rec->write_count += 1;
356359

357360
DXT_UNLOCK();
@@ -397,6 +400,7 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset,
397400
rec_ref->read_traces[file_rec->read_count].offset = offset;
398401
rec_ref->read_traces[file_rec->read_count].start_time = start_time;
399402
rec_ref->read_traces[file_rec->read_count].end_time = end_time;
403+
rec_ref->read_traces[file_rec->read_count].pthread_id = (unsigned long)pthread_self();
400404
file_rec->read_count += 1;
401405

402406
DXT_UNLOCK();

darshan-util/darshan-dxt-logutils.c

Lines changed: 108 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static void dxt_swap_segments(struct dxt_file_record *file_rec)
7070
int i;
7171
segment_info *tmp_seg;
7272

73-
tmp_seg = (segment_info *)((void *)file_rec + sizeof(struct dxt_file_record));
73+
tmp_seg = (segment_info *)((char *)file_rec + sizeof(struct dxt_file_record));
7474
for(i = 0; i < (file_rec->write_count + file_rec->read_count); i++)
7575
{
7676
DARSHAN_BSWAP64(&tmp_seg->offset);
@@ -86,7 +86,7 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p)
8686
struct dxt_file_record *rec = *((struct dxt_file_record **)dxt_posix_buf_p);
8787
struct dxt_file_record tmp_rec;
8888
int ret;
89-
int64_t io_trace_size;
89+
size_t rw_count;
9090

9191
if(fd->mod_map[DXT_POSIX_MOD].len == 0)
9292
return(0);
@@ -112,42 +112,70 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p)
112112
dxt_swap_file_record(&tmp_rec);
113113
}
114114

115-
io_trace_size = (tmp_rec.write_count + tmp_rec.read_count) *
116-
sizeof(segment_info);
115+
rw_count = tmp_rec.write_count + tmp_rec.read_count;
117116

118117
if (*dxt_posix_buf_p == NULL)
119118
{
120-
rec = malloc(sizeof(struct dxt_file_record) + io_trace_size);
119+
rec = malloc(sizeof(struct dxt_file_record) + rw_count * sizeof(segment_info));
121120
if (!rec)
122121
return(-1);
123122
}
123+
124+
/* copy over the metadta of dxt_file_record */
124125
memcpy(rec, &tmp_rec, sizeof(struct dxt_file_record));
125126

126-
if (io_trace_size > 0)
127+
if (rw_count > 0)
127128
{
128-
void *tmp_p = (void *)rec + sizeof(struct dxt_file_record);
129+
char *buf;
130+
int64_t io_trace_size;
131+
132+
/* Check POSIX DXT format version. When > 1, segment_info contains an additional pthread ID */
133+
if (fd->mod_ver[DXT_POSIX_MOD] == 1) {
134+
io_trace_size = rw_count * (sizeof(segment_info) - sizeof(unsigned long));
135+
buf = (char*) malloc(io_trace_size);
136+
if (!buf) return(-1);
137+
}
138+
else {
139+
io_trace_size = rw_count * sizeof(segment_info);
140+
buf = (char *)rec + sizeof(struct dxt_file_record);
141+
}
129142

130-
ret = darshan_log_get_mod(fd, DXT_POSIX_MOD, tmp_p,
131-
io_trace_size);
143+
ret = darshan_log_get_mod(fd, DXT_POSIX_MOD, buf, io_trace_size);
132144
if (ret < io_trace_size)
133145
ret = -1;
134146
else
135147
{
148+
if (fd->mod_ver[DXT_POSIX_MOD] == 1) {
149+
/* copy record data over to rec */
150+
size_t j;
151+
char *src = buf;
152+
char *dest = (char *)rec + sizeof(struct dxt_file_record);
153+
size_t rec_size = sizeof(segment_info) - sizeof(unsigned long);
154+
for (j=0; j<rw_count; j++) {
155+
memcpy(dest, src, rec_size);
156+
src += rec_size;
157+
dest += sizeof(segment_info);
158+
}
159+
}
160+
136161
ret = 1;
137162
if(fd->swap_flag)
138163
{
139164
/* byte swap trace data if necessary */
140165
dxt_swap_segments(rec);
141166
}
142167
}
168+
169+
if (fd->mod_ver[DXT_POSIX_MOD] == 1)
170+
free(buf);
143171
}
144172
else
145173
{
146174
ret = 1;
147175
}
148176

149177
if(*dxt_posix_buf_p == NULL)
150-
{
178+
{
151179
if(ret == 1)
152180
*dxt_posix_buf_p = rec;
153181
else
@@ -163,7 +191,7 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p)
163191
struct dxt_file_record tmp_rec;
164192
int i;
165193
int ret;
166-
int64_t io_trace_size;
194+
size_t rw_count;
167195

168196
if(fd->mod_map[DXT_MPIIO_MOD].len == 0)
169197
return(0);
@@ -189,27 +217,52 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p)
189217
dxt_swap_file_record(&tmp_rec);
190218
}
191219

192-
io_trace_size = (tmp_rec.write_count + tmp_rec.read_count) *
193-
sizeof(segment_info);
220+
rw_count = tmp_rec.write_count + tmp_rec.read_count;
194221

195222
if (*dxt_mpiio_buf_p == NULL)
196223
{
197-
rec = malloc(sizeof(struct dxt_file_record) + io_trace_size);
224+
rec = malloc(sizeof(struct dxt_file_record) + rw_count * sizeof(segment_info));
198225
if (!rec)
199226
return(-1);
200227
}
228+
229+
/* copy over the metadta of dxt_file_record */
201230
memcpy(rec, &tmp_rec, sizeof(struct dxt_file_record));
202231

203-
if (io_trace_size > 0)
232+
if (rw_count > 0)
204233
{
205-
void *tmp_p = (void *)rec + sizeof(struct dxt_file_record);
234+
char *buf;
235+
int64_t io_trace_size;
236+
237+
/* Check MPIIO DXT format version. When > 2, segment_info contains an additional pthread ID */
238+
if (fd->mod_ver[DXT_MPIIO_MOD] < 3) {
239+
io_trace_size = rw_count * (sizeof(segment_info) - sizeof(unsigned long));
240+
buf = (char*) malloc(io_trace_size);
241+
if (!buf) return(-1);
242+
}
243+
else {
244+
io_trace_size = rw_count * sizeof(segment_info);
245+
buf = (char *)rec + sizeof(struct dxt_file_record);
246+
}
206247

207-
ret = darshan_log_get_mod(fd, DXT_MPIIO_MOD, tmp_p,
208-
io_trace_size);
248+
ret = darshan_log_get_mod(fd, DXT_MPIIO_MOD, buf, io_trace_size);
209249
if (ret < io_trace_size)
210250
ret = -1;
211251
else
212252
{
253+
if (fd->mod_ver[DXT_MPIIO_MOD] < 3) {
254+
/* copy record data over to rec */
255+
size_t j;
256+
char *src = buf;
257+
char *dest = (char *)rec + sizeof(struct dxt_file_record);
258+
size_t rec_size = sizeof(segment_info) - sizeof(unsigned long);
259+
for (j=0; j<rw_count; j++) {
260+
memcpy(dest, src, rec_size);
261+
src += rec_size;
262+
dest += sizeof(segment_info);
263+
}
264+
}
265+
213266
ret = 1;
214267
if(fd->swap_flag)
215268
{
@@ -220,12 +273,16 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p)
220273
if(fd->mod_ver[DXT_MPIIO_MOD] == 1)
221274
{
222275
/* make sure to indicate offsets are invalid in version 1 */
223-
for(i = 0; i < (tmp_rec.write_count + tmp_rec.read_count); i++)
276+
segment_info *tmp_p = (segment_info*)((char *)rec + sizeof(struct dxt_file_record));
277+
for(i = 0; i < rw_count; i++)
224278
{
225-
((segment_info *)tmp_p)[i].offset = -1;
279+
tmp_p[i].offset = -1;
226280
}
227281
}
228282
}
283+
284+
if (fd->mod_ver[DXT_MPIIO_MOD] < 3)
285+
free(buf);
229286
}
230287
else
231288
{
@@ -286,7 +343,7 @@ static void dxt_log_print_mpiio_file_darshan(void *file_rec, char *file_name,
286343
}
287344

288345
void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
289-
char *mnt_pt, char *fs_type, struct lustre_record_ref *lustre_rec_ref)
346+
char *mnt_pt, char *fs_type, struct lustre_record_ref *lustre_rec_ref, uint32_t *mod_ver)
290347
{
291348
struct dxt_file_record *file_rec =
292349
(struct dxt_file_record *)posix_file_rec;
@@ -303,7 +360,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
303360
int64_t write_count = file_rec->write_count;
304361
int64_t read_count = file_rec->read_count;
305362
segment_info *io_trace = (segment_info *)
306-
((void *)file_rec + sizeof(struct dxt_file_record));
363+
((char *)file_rec + sizeof(struct dxt_file_record));
307364

308365
/* Lustre File System */
309366
struct darshan_lustre_record *rec;
@@ -349,12 +406,12 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
349406
}
350407

351408
/* Print header */
352-
printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)");
409+
printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)");
353410

354411
if (lustreFS) {
355412
printf(" [OST]");
356413
}
357-
printf("\n");
414+
printf(" Pthread-ID\n");
358415

359416
/* Print IO Traces information */
360417
for (i = 0; i < write_count; i++) {
@@ -363,7 +420,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
363420
start_time = io_trace[i].start_time;
364421
end_time = io_trace[i].end_time;
365422

366-
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "write", i, offset, length, start_time, end_time);
423+
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "write", i, offset, length, start_time, end_time);
367424

368425
if (lustreFS) {
369426
cur_file_offset = offset;
@@ -398,9 +455,12 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
398455
}
399456
cur_ost_offset += stripe_count;
400457
}
458+
if (rec->num_comps) printf(" ");
401459
}
402-
403-
printf("\n");
460+
if (mod_ver[DXT_POSIX_MOD] == 1)
461+
printf(" -1\n");
462+
else
463+
printf(" %-20lu\n", io_trace[i].pthread_id);
404464
}
405465

406466
for (i = write_count; i < write_count + read_count; i++) {
@@ -409,7 +469,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
409469
start_time = io_trace[i].start_time;
410470
end_time = io_trace[i].end_time;
411471

412-
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "read", (int)(i - write_count), offset, length, start_time, end_time);
472+
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "read", (int)(i - write_count), offset, length, start_time, end_time);
413473

414474
if (lustreFS) {
415475
cur_file_offset = offset;
@@ -444,15 +504,19 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name,
444504
}
445505
cur_ost_offset += stripe_count;
446506
}
507+
if (rec->num_comps) printf(" ");
447508
}
448509

449-
printf("\n");
510+
if (mod_ver[DXT_POSIX_MOD] == 1)
511+
printf(" -1\n");
512+
else
513+
printf(" %-20lu\n", io_trace[i].pthread_id);
450514
}
451515
return;
452516
}
453517

454518
void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name,
455-
char *mnt_pt, char *fs_type)
519+
char *mnt_pt, char *fs_type, uint32_t *mod_ver)
456520
{
457521
struct dxt_file_record *file_rec =
458522
(struct dxt_file_record *)mpiio_file_rec;
@@ -471,7 +535,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name,
471535
int64_t read_count = file_rec->read_count;
472536

473537
segment_info *io_trace = (segment_info *)
474-
((void *)file_rec + sizeof(struct dxt_file_record));
538+
((char *)file_rec + sizeof(struct dxt_file_record));
475539

476540
printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name);
477541
printf("# DXT, rank: %" PRId64 ", hostname: %s\n", rank, hostname);
@@ -481,7 +545,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name,
481545
printf("# DXT, mnt_pt: %s, fs_type: %s\n", mnt_pt, fs_type);
482546

483547
/* Print header */
484-
printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)\n");
548+
printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID\n");
485549

486550
/* Print IO Traces information */
487551
for (i = 0; i < write_count; i++) {
@@ -490,7 +554,12 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name,
490554
start_time = io_trace[i].start_time;
491555
end_time = io_trace[i].end_time;
492556

493-
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time);
557+
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time);
558+
559+
if (mod_ver[DXT_MPIIO_MOD] <= 2)
560+
printf(" -1\n");
561+
else
562+
printf(" %-20lu\n", io_trace[i].pthread_id);
494563
}
495564

496565
for (i = write_count; i < write_count + read_count; i++) {
@@ -499,7 +568,12 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name,
499568
start_time = io_trace[i].start_time;
500569
end_time = io_trace[i].end_time;
501570

502-
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time);
571+
printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time);
572+
573+
if (mod_ver[DXT_MPIIO_MOD] <= 2)
574+
printf(" -1\n");
575+
else
576+
printf(" %-20lu\n", io_trace[i].pthread_id);
503577
}
504578

505579
return;

darshan-util/darshan-dxt-logutils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ extern struct darshan_mod_logutil_funcs dxt_posix_logutils;
1212
extern struct darshan_mod_logutil_funcs dxt_mpiio_logutils;
1313

1414
void dxt_log_print_posix_file(void *file_rec, char *file_name,
15-
char *mnt_pt, char *fs_type, struct lustre_record_ref *rec_ref);
15+
char *mnt_pt, char *fs_type, struct lustre_record_ref *rec_ref, uint32_t *mod_ver);
1616
void dxt_log_print_mpiio_file(void *file_rec,
17-
char *file_name, char *mnt_pt, char *fs_type);
17+
char *file_name, char *mnt_pt, char *fs_type, uint32_t *mod_ver);
1818

1919
#endif

darshan-util/darshan-dxt-parser.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ int main(int argc, char **argv)
4848
char *token;
4949
char *save;
5050
char buffer[DARSHAN_JOB_METADATA_LEN];
51-
struct lustre_record_ref *lustre_rec_ref, *tmp_lustre_rec_ref;
51+
struct lustre_record_ref *lustre_rec_ref=NULL, *tmp_lustre_rec_ref;
5252
struct lustre_record_ref *lustre_rec_hash = NULL;
5353
char *mod_buf = NULL;
5454

@@ -316,11 +316,11 @@ int main(int argc, char **argv)
316316
HASH_FIND(hlink, lustre_rec_hash, &(base_rec->id),
317317
sizeof(darshan_record_id), lustre_rec_ref);
318318

319-
dxt_log_print_posix_file(mod_buf, rec_name,
320-
mnt_pt, fs_type, lustre_rec_ref);
319+
dxt_log_print_posix_file(mod_buf, rec_name, mnt_pt, fs_type,
320+
lustre_rec_ref, fd->mod_ver);
321321
} else if (i == DXT_MPIIO_MOD){
322-
dxt_log_print_mpiio_file(mod_buf, rec_name,
323-
mnt_pt, fs_type);
322+
dxt_log_print_mpiio_file(mod_buf, rec_name, mnt_pt, fs_type,
323+
fd->mod_ver);
324324
}
325325

326326
free(mod_buf);

darshan-util/pydarshan/darshan/backend/api_def_c.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@
205205
int64_t length;
206206
double start_time;
207207
double end_time;
208+
unsigned long pthread_id;
208209
} segment_info;
209210
210211
/* counter names */

0 commit comments

Comments
 (0)