diff --git a/darshan-runtime/lib/darshan-dxt.c b/darshan-runtime/lib/darshan-dxt.c index 7412b4e4c..33d7b33e8 100644 --- a/darshan-runtime/lib/darshan-dxt.c +++ b/darshan-runtime/lib/darshan-dxt.c @@ -262,6 +262,7 @@ void dxt_posix_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].length = length; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + rec_ref->write_traces[file_rec->write_count].pthread_id = (unsigned long)pthread_self(); file_rec->write_count += 1; DXT_UNLOCK(); @@ -307,6 +308,7 @@ void dxt_posix_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].length = length; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + rec_ref->read_traces[file_rec->read_count].pthread_id = (unsigned long)pthread_self(); file_rec->read_count += 1; DXT_UNLOCK(); @@ -352,6 +354,7 @@ void dxt_mpiio_write(darshan_record_id rec_id, int64_t offset, rec_ref->write_traces[file_rec->write_count].offset = offset; rec_ref->write_traces[file_rec->write_count].start_time = start_time; rec_ref->write_traces[file_rec->write_count].end_time = end_time; + rec_ref->write_traces[file_rec->write_count].pthread_id = (unsigned long)pthread_self(); file_rec->write_count += 1; DXT_UNLOCK(); @@ -397,6 +400,7 @@ void dxt_mpiio_read(darshan_record_id rec_id, int64_t offset, rec_ref->read_traces[file_rec->read_count].offset = offset; rec_ref->read_traces[file_rec->read_count].start_time = start_time; rec_ref->read_traces[file_rec->read_count].end_time = end_time; + rec_ref->read_traces[file_rec->read_count].pthread_id = (unsigned long)pthread_self(); file_rec->read_count += 1; DXT_UNLOCK(); diff --git a/darshan-test/tst_mpio_pthread.c b/darshan-test/tst_mpio_pthread.c new file mode 100644 index 000000000..47b805f2b --- /dev/null +++ b/darshan-test/tst_mpio_pthread.c @@ -0,0 +1,241 @@ +/* + * (C) 2025 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#include +#include +#include /* strcpy(), strncpy() */ +#include /* _POSIX_BARRIERS */ +#include /* open() */ +#include /* open() */ +#include /* open() */ +#include /* errno */ + +#include + +#include + +#define NTHREADS 3 +#define LEN 100 + +#define ERR \ + if (err != MPI_SUCCESS) { \ + int errorStringLen; \ + char errorString[MPI_MAX_ERROR_STRING]; \ + MPI_Error_string(err, errorString, &errorStringLen); \ + printf("Error at line %d: %s\n",__LINE__, errorString); \ + nerrs++; \ + } + + +#if !defined(_POSIX_BARRIERS) || _POSIX_BARRIERS <= 0 +/* According to opengroup.org, barriers are defined in the optional part of + * POSIX standard. For example, Mac OSX does not have pthread_barrier. If + * barriers were implemented, the _POSIX_BARRIERS macro is defined as a + * positive number. + */ + +typedef int pthread_barrierattr_t; +typedef struct { + pthread_mutex_t mutex; + pthread_cond_t cond; + int count; + int numThreads; +} pthread_barrier_t; + +static int pthread_barrier_init(pthread_barrier_t *barrier, + const pthread_barrierattr_t *attr, + unsigned int count) +{ + if (count == 0) { + errno = EINVAL; + return -1; + } + + if (pthread_mutex_init(&barrier->mutex, 0) < 0) + return -1; + + if (pthread_cond_init(&barrier->cond, 0) < 0) { + pthread_mutex_destroy(&barrier->mutex); + return -1; + } + barrier->numThreads = count; + barrier->count = 0; + + return 0; +} + +static int pthread_barrier_destroy(pthread_barrier_t *barrier) +{ + pthread_cond_destroy(&barrier->cond); + pthread_mutex_destroy(&barrier->mutex); + return 0; +} + +static int pthread_barrier_wait(pthread_barrier_t *barrier) +{ + int ret; + pthread_mutex_lock(&barrier->mutex); + ++(barrier->count); + if (barrier->count >= barrier->numThreads) { + barrier->count = 0; + pthread_cond_broadcast(&barrier->cond); + ret = 1; + } else { + pthread_cond_wait(&barrier->cond, &barrier->mutex); + ret = 0; + } + pthread_mutex_unlock(&barrier->mutex); + return ret; +} +#endif + +/* pthread barrier object */ +static pthread_barrier_t barr; + +typedef struct { + int id; /* globally unique thread ID */ + MPI_File fh; /* file handler */ + int nprocs; /* number of MPI processes */ + int rank; /* MPI rank ID */ + size_t count; /* write length */ + char fname[256]; /* output file name base */ +} thread_arg; + +pthread_mutex_t env_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int setenv_thread_safe(const char *name, const char *value, int overwrite) { + int err; + pthread_mutex_lock(&env_mutex); + err = setenv(name, value, overwrite); + pthread_mutex_unlock(&env_mutex); + return err; +} + +/*----< thread_func() >------------------------------------------------------*/ +static +void* thread_func(void *arg) +{ + char filename[512]; + int i, id, err, nerrs=0, nprocs, rank, *ret; + size_t count; + off_t off; + char buf[LEN], annotation[64]; + MPI_File fh; + MPI_Status status; + + /* make a unique file name for each thread */ + id = ((thread_arg*)arg)->id; + fh = ((thread_arg*)arg)->fh; + count = ((thread_arg*)arg)->count; + nprocs = ((thread_arg*)arg)->nprocs; + rank = ((thread_arg*)arg)->rank; + + for (i=0; i-------------------------------------------------------------*/ +int main(int argc, char **argv) { + extern int optind; + char filename[256]; + int i, err, nerrs=0, rank=0, nprocs, providedT; + MPI_File fh; + MPI_Status status; + pthread_t threads[NTHREADS]; + + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &providedT); + + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (providedT != MPI_THREAD_MULTIPLE) { + if (!rank) + printf("Error: MPI does not support MPI_THREAD_MULTIPLE\n"); + MPI_Finalize(); + return 0; + } + + if (argc == 1) strcpy(filename, "testfile"); + else strcpy(filename, argv[1]); + + /* create a file */ + err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, + MPI_INFO_NULL, &fh); + ERR + + char buf[LEN*NTHREADS]; + size_t count = LEN * NTHREADS; + MPI_Offset off = rank * count; + for (i=0; i 0); +} + diff --git a/darshan-util/darshan-dxt-logutils.c b/darshan-util/darshan-dxt-logutils.c index af1520e38..2b697f9ed 100644 --- a/darshan-util/darshan-dxt-logutils.c +++ b/darshan-util/darshan-dxt-logutils.c @@ -70,7 +70,7 @@ static void dxt_swap_segments(struct dxt_file_record *file_rec) int i; segment_info *tmp_seg; - tmp_seg = (segment_info *)((void *)file_rec + sizeof(struct dxt_file_record)); + tmp_seg = (segment_info *)((char *)file_rec + sizeof(struct dxt_file_record)); for(i = 0; i < (file_rec->write_count + file_rec->read_count); i++) { DARSHAN_BSWAP64(&tmp_seg->offset); @@ -86,7 +86,7 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p) struct dxt_file_record *rec = *((struct dxt_file_record **)dxt_posix_buf_p); struct dxt_file_record tmp_rec; int ret; - int64_t io_trace_size; + size_t rw_count; if(fd->mod_map[DXT_POSIX_MOD].len == 0) return(0); @@ -112,27 +112,52 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p) dxt_swap_file_record(&tmp_rec); } - io_trace_size = (tmp_rec.write_count + tmp_rec.read_count) * - sizeof(segment_info); + rw_count = tmp_rec.write_count + tmp_rec.read_count; if (*dxt_posix_buf_p == NULL) { - rec = malloc(sizeof(struct dxt_file_record) + io_trace_size); + rec = malloc(sizeof(struct dxt_file_record) + rw_count * sizeof(segment_info)); if (!rec) return(-1); } + + /* copy over the metadta of dxt_file_record */ memcpy(rec, &tmp_rec, sizeof(struct dxt_file_record)); - if (io_trace_size > 0) + if (rw_count > 0) { - void *tmp_p = (void *)rec + sizeof(struct dxt_file_record); + char *buf; + int64_t io_trace_size; + + /* Check POSIX DXT format version. When > 1, segment_info contains an additional pthread ID */ + if (fd->mod_ver[DXT_POSIX_MOD] == 1) { + io_trace_size = rw_count * (sizeof(segment_info) - sizeof(unsigned long)); + buf = (char*) malloc(io_trace_size); + if (!buf) return(-1); + } + else { + io_trace_size = rw_count * sizeof(segment_info); + buf = (char *)rec + sizeof(struct dxt_file_record); + } - ret = darshan_log_get_mod(fd, DXT_POSIX_MOD, tmp_p, - io_trace_size); + ret = darshan_log_get_mod(fd, DXT_POSIX_MOD, buf, io_trace_size); if (ret < io_trace_size) ret = -1; else { + if (fd->mod_ver[DXT_POSIX_MOD] == 1) { + /* copy record data over to rec */ + size_t j; + char *src = buf; + char *dest = (char *)rec + sizeof(struct dxt_file_record); + size_t rec_size = sizeof(segment_info) - sizeof(unsigned long); + for (j=0; jswap_flag) { @@ -140,6 +165,9 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p) dxt_swap_segments(rec); } } + + if (fd->mod_ver[DXT_POSIX_MOD] == 1) + free(buf); } else { @@ -147,7 +175,7 @@ static int dxt_log_get_posix_file(darshan_fd fd, void** dxt_posix_buf_p) } if(*dxt_posix_buf_p == NULL) - { + { if(ret == 1) *dxt_posix_buf_p = rec; else @@ -163,7 +191,7 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p) struct dxt_file_record tmp_rec; int i; int ret; - int64_t io_trace_size; + size_t rw_count; if(fd->mod_map[DXT_MPIIO_MOD].len == 0) return(0); @@ -189,27 +217,52 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p) dxt_swap_file_record(&tmp_rec); } - io_trace_size = (tmp_rec.write_count + tmp_rec.read_count) * - sizeof(segment_info); + rw_count = tmp_rec.write_count + tmp_rec.read_count; if (*dxt_mpiio_buf_p == NULL) { - rec = malloc(sizeof(struct dxt_file_record) + io_trace_size); + rec = malloc(sizeof(struct dxt_file_record) + rw_count * sizeof(segment_info)); if (!rec) return(-1); } + + /* copy over the metadta of dxt_file_record */ memcpy(rec, &tmp_rec, sizeof(struct dxt_file_record)); - if (io_trace_size > 0) + if (rw_count > 0) { - void *tmp_p = (void *)rec + sizeof(struct dxt_file_record); + char *buf; + int64_t io_trace_size; + + /* Check MPIIO DXT format version. When > 2, segment_info contains an additional pthread ID */ + if (fd->mod_ver[DXT_MPIIO_MOD] < 3) { + io_trace_size = rw_count * (sizeof(segment_info) - sizeof(unsigned long)); + buf = (char*) malloc(io_trace_size); + if (!buf) return(-1); + } + else { + io_trace_size = rw_count * sizeof(segment_info); + buf = (char *)rec + sizeof(struct dxt_file_record); + } - ret = darshan_log_get_mod(fd, DXT_MPIIO_MOD, tmp_p, - io_trace_size); + ret = darshan_log_get_mod(fd, DXT_MPIIO_MOD, buf, io_trace_size); if (ret < io_trace_size) ret = -1; else { + if (fd->mod_ver[DXT_MPIIO_MOD] < 3) { + /* copy record data over to rec */ + size_t j; + char *src = buf; + char *dest = (char *)rec + sizeof(struct dxt_file_record); + size_t rec_size = sizeof(segment_info) - sizeof(unsigned long); + for (j=0; jswap_flag) { @@ -220,12 +273,16 @@ static int dxt_log_get_mpiio_file(darshan_fd fd, void** dxt_mpiio_buf_p) if(fd->mod_ver[DXT_MPIIO_MOD] == 1) { /* make sure to indicate offsets are invalid in version 1 */ - for(i = 0; i < (tmp_rec.write_count + tmp_rec.read_count); i++) + segment_info *tmp_p = (segment_info*)((char *)rec + sizeof(struct dxt_file_record)); + for(i = 0; i < rw_count; i++) { - ((segment_info *)tmp_p)[i].offset = -1; + tmp_p[i].offset = -1; } } } + + if (fd->mod_ver[DXT_MPIIO_MOD] < 3) + free(buf); } else { @@ -286,7 +343,7 @@ static void dxt_log_print_mpiio_file_darshan(void *file_rec, char *file_name, } void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, - char *mnt_pt, char *fs_type, struct lustre_record_ref *lustre_rec_ref) + char *mnt_pt, char *fs_type, struct lustre_record_ref *lustre_rec_ref, uint32_t *mod_ver) { struct dxt_file_record *file_rec = (struct dxt_file_record *)posix_file_rec; @@ -303,7 +360,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, int64_t write_count = file_rec->write_count; int64_t read_count = file_rec->read_count; segment_info *io_trace = (segment_info *) - ((void *)file_rec + sizeof(struct dxt_file_record)); + ((char *)file_rec + sizeof(struct dxt_file_record)); /* Lustre File System */ struct darshan_lustre_record *rec; @@ -349,12 +406,12 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } /* Print header */ - printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)"); + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)"); if (lustreFS) { printf(" [OST]"); } - printf("\n"); + printf(" Pthread-ID\n"); /* Print IO Traces information */ for (i = 0; i < write_count; i++) { @@ -363,7 +420,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "write", i, offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "write", i, offset, length, start_time, end_time); if (lustreFS) { cur_file_offset = offset; @@ -398,9 +455,12 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } cur_ost_offset += stripe_count; } + if (rec->num_comps) printf(" "); } - - printf("\n"); + if (mod_ver[DXT_POSIX_MOD] == 1) + printf(" N/A\n"); + else + printf(" %-20lu\n", io_trace[i].pthread_id); } for (i = write_count; i < write_count + read_count; i++) { @@ -409,7 +469,7 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_POSIX", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); if (lustreFS) { cur_file_offset = offset; @@ -444,15 +504,19 @@ void dxt_log_print_posix_file(void *posix_file_rec, char *file_name, } cur_ost_offset += stripe_count; } + if (rec->num_comps) printf(" "); } - printf("\n"); + if (mod_ver[DXT_POSIX_MOD] == 1) + printf(" N/A\n"); + else + printf(" %-20lu\n", io_trace[i].pthread_id); } return; } void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, - char *mnt_pt, char *fs_type) + char *mnt_pt, char *fs_type, uint32_t *mod_ver) { struct dxt_file_record *file_rec = (struct dxt_file_record *)mpiio_file_rec; @@ -471,7 +535,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, int64_t read_count = file_rec->read_count; segment_info *io_trace = (segment_info *) - ((void *)file_rec + sizeof(struct dxt_file_record)); + ((char *)file_rec + sizeof(struct dxt_file_record)); printf("\n# DXT, file_id: %" PRIu64 ", file_name: %s\n", f_id, file_name); printf("# DXT, rank: %" PRId64 ", hostname: %s\n", rank, hostname); @@ -481,7 +545,7 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, printf("# DXT, mnt_pt: %s, fs_type: %s\n", mnt_pt, fs_type); /* Print header */ - printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s)\n"); + printf("# Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID\n"); /* Print IO Traces information */ for (i = 0; i < write_count; i++) { @@ -490,7 +554,12 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_MPIIO", rank, "write", i, offset, length, start_time, end_time); + + if (mod_ver[DXT_MPIIO_MOD] <= 2) + printf(" N/A\n"); + else + printf(" %-20lu\n", io_trace[i].pthread_id); } for (i = write_count; i < write_count + read_count; i++) { @@ -499,7 +568,12 @@ void dxt_log_print_mpiio_file(void *mpiio_file_rec, char *file_name, start_time = io_trace[i].start_time; end_time = io_trace[i].end_time; - printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f\n", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); + printf("%8s%8" PRId64 "%7s%9d%16" PRId64 "%16" PRId64 "%12.4f%12.4f ", "X_MPIIO", rank, "read", (int)(i - write_count), offset, length, start_time, end_time); + + if (mod_ver[DXT_MPIIO_MOD] <= 2) + printf(" N/A\n"); + else + printf(" %-20lu\n", io_trace[i].pthread_id); } return; diff --git a/darshan-util/darshan-dxt-logutils.h b/darshan-util/darshan-dxt-logutils.h index f58df8e39..098accc28 100644 --- a/darshan-util/darshan-dxt-logutils.h +++ b/darshan-util/darshan-dxt-logutils.h @@ -12,8 +12,8 @@ extern struct darshan_mod_logutil_funcs dxt_posix_logutils; extern struct darshan_mod_logutil_funcs dxt_mpiio_logutils; void dxt_log_print_posix_file(void *file_rec, char *file_name, - char *mnt_pt, char *fs_type, struct lustre_record_ref *rec_ref); + char *mnt_pt, char *fs_type, struct lustre_record_ref *rec_ref, uint32_t *mod_ver); void dxt_log_print_mpiio_file(void *file_rec, - char *file_name, char *mnt_pt, char *fs_type); + char *file_name, char *mnt_pt, char *fs_type, uint32_t *mod_ver); #endif diff --git a/darshan-util/darshan-dxt-parser.c b/darshan-util/darshan-dxt-parser.c index dbab3f055..1d723b515 100644 --- a/darshan-util/darshan-dxt-parser.c +++ b/darshan-util/darshan-dxt-parser.c @@ -48,7 +48,7 @@ int main(int argc, char **argv) char *token; char *save; char buffer[DARSHAN_JOB_METADATA_LEN]; - struct lustre_record_ref *lustre_rec_ref, *tmp_lustre_rec_ref; + struct lustre_record_ref *lustre_rec_ref=NULL, *tmp_lustre_rec_ref; struct lustre_record_ref *lustre_rec_hash = NULL; char *mod_buf = NULL; @@ -316,11 +316,11 @@ int main(int argc, char **argv) HASH_FIND(hlink, lustre_rec_hash, &(base_rec->id), sizeof(darshan_record_id), lustre_rec_ref); - dxt_log_print_posix_file(mod_buf, rec_name, - mnt_pt, fs_type, lustre_rec_ref); + dxt_log_print_posix_file(mod_buf, rec_name, mnt_pt, fs_type, + lustre_rec_ref, fd->mod_ver); } else if (i == DXT_MPIIO_MOD){ - dxt_log_print_mpiio_file(mod_buf, rec_name, - mnt_pt, fs_type); + dxt_log_print_mpiio_file(mod_buf, rec_name, mnt_pt, fs_type, + fd->mod_ver); } free(mod_buf); diff --git a/darshan-util/doc/darshan-util.rst b/darshan-util/doc/darshan-util.rst index cb17a81b3..9e22ab210 100644 --- a/darshan-util/doc/darshan-util.rst +++ b/darshan-util/doc/darshan-util.rst @@ -1352,37 +1352,75 @@ captured from both POSIX and MPI-IO interfaces. Example output is given below: # DXT_POSIX module data # *************************************************** - # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile - # DXT, rank: 0, hostname: shane-thinkpad - # DXT, write_count: 4, read_count: 4 - # DXT, mnt_pt: /, fs_type: ext4 - # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) - X_POSIX 0 write 0 0 262144 0.0029 0.0032 - X_POSIX 0 write 1 262144 262144 0.0032 0.0035 - X_POSIX 0 write 2 524288 262144 0.0035 0.0038 - X_POSIX 0 write 3 786432 262144 0.0038 0.0040 - X_POSIX 0 read 0 0 262144 0.0048 0.0048 - X_POSIX 0 read 1 262144 262144 0.0049 0.0049 - X_POSIX 0 read 2 524288 262144 0.0049 0.0050 - X_POSIX 0 read 3 786432 262144 0.0050 0.0051 + # DXT, file_id: 12998263815182938503, file_name: /tmp/test/testFile + # DXT, rank: 0, hostname: acfs.ece.northwestern.edu + # DXT, write_count: 4, read_count: 3 + # DXT, mnt_pt: /homes, fs_type: autofs + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_POSIX 0 write 0 0 30 0.0018 0.0018 139823019705024 + X_POSIX 0 write 1 120 10 0.0019 0.0020 139822945752832 + X_POSIX 0 write 2 130 10 0.0021 0.0021 139822937360128 + X_POSIX 0 write 3 140 10 0.0022 0.0022 139822861645568 + X_POSIX 0 read 0 0 10 0.0019 0.0019 139822945752832 + X_POSIX 0 read 1 10 10 0.0020 0.0020 139822937360128 + X_POSIX 0 read 2 20 10 0.0022 0.0022 139822861645568 + + # DXT, file_id: 12998263815182938503, file_name: /tmp/test/testFile + # DXT, rank: 1, hostname: acfs.ece.northwestern.edu + # DXT, write_count: 4, read_count: 3 + # DXT, mnt_pt: /homes, fs_type: autofs + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_POSIX 1 write 0 30 30 0.0018 0.0018 140244736549568 + X_POSIX 1 write 1 180 10 0.0019 0.0019 140244662597376 + X_POSIX 1 write 2 190 10 0.0022 0.0022 140244654204672 + X_POSIX 1 write 3 200 10 0.0022 0.0022 140244573746944 + X_POSIX 1 read 0 60 10 0.0019 0.0019 140244662597376 + X_POSIX 1 read 1 70 10 0.0022 0.0022 140244654204672 + X_POSIX 1 read 2 80 10 0.0022 0.0022 140244573746944 # *************************************************** # DXT_MPIIO module data # *************************************************** - # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile - # DXT, rank: 0, hostname: shane-thinkpad - # DXT, write_count: 4, read_count: 4 - # DXT, mnt_pt: /, fs_type: ext4 - # Module Rank Wt/Rd Segment Length Start(s) End(s) - X_MPIIO 0 write 0 262144 0.0029 0.0032 - X_MPIIO 0 write 1 262144 0.0032 0.0035 - X_MPIIO 0 write 2 262144 0.0035 0.0038 - X_MPIIO 0 write 3 262144 0.0038 0.0040 - X_MPIIO 0 read 0 262144 0.0048 0.0049 - X_MPIIO 0 read 1 262144 0.0049 0.0049 - X_MPIIO 0 read 2 262144 0.0049 0.0050 - X_MPIIO 0 read 3 262144 0.0050 0.0051 + # DXT, file_id: 12998263815182938503, file_name: /tmp/test/testFile + # DXT, rank: 0, hostname: acfs.ece.northwestern.edu + # DXT, write_count: 4, read_count: 3 + # DXT, mnt_pt: /homes, fs_type: autofs + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_MPIIO 0 write 0 0 30 0.0018 0.0018 139823019705024 + X_MPIIO 0 write 1 120 10 0.0019 0.0020 139822945752832 + X_MPIIO 0 write 2 130 10 0.0020 0.0021 139822937360128 + X_MPIIO 0 write 3 140 10 0.0022 0.0022 139822861645568 + X_MPIIO 0 read 0 0 10 0.0019 0.0019 139822945752832 + X_MPIIO 0 read 1 10 10 0.0020 0.0020 139822937360128 + X_MPIIO 0 read 2 20 10 0.0022 0.0022 139822861645568 + + # DXT, file_id: 12998263815182938503, file_name: /tmp/test/testFile + # DXT, rank: 1, hostname: acfs.ece.northwestern.edu + # DXT, write_count: 4, read_count: 3 + # DXT, mnt_pt: /homes, fs_type: autofs + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_MPIIO 1 write 0 30 30 0.0018 0.0018 140244736549568 + X_MPIIO 1 write 1 180 10 0.0019 0.0019 140244662597376 + X_MPIIO 1 write 2 190 10 0.0022 0.0022 140244654204672 + X_MPIIO 1 write 3 200 10 0.0022 0.0022 140244573746944 + X_MPIIO 1 read 0 60 10 0.0019 0.0019 140244662597376 + X_MPIIO 1 read 1 70 10 0.0019 0.0022 140244654204672 + X_MPIIO 1 read 2 80 10 0.0022 0.0022 140244573746944 + +.. note:: + Starting from Darshan version 3.6.0, there is an additional column at the + end showing the pthread ID. For Darshan log files generated from the earlier + versions will show -1 as Pthread-ID. An example is given below. + + .. code-block:: + + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_POSIX 2 write 0 16 8 0.0022 0.0022 -1 + + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + X_MPIIO 3 write 0 24 8 0.0022 0.0022 -1 + DXT POSIX module ------------------------------------- @@ -1406,7 +1444,17 @@ The output format for each individual I/O operation segment is: .. code-block:: - # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) Pthread-ID + +.. note:: + Starting from Darshan version 3.6.0, there is an additional column at the + end showing the pthread ID. When using the DXT parser of an earlier version + to parse a Darshan log file generated from earlier version, the columns will + not include "Pthread-ID", for example: + + .. code-block:: + + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) * Module: corresponding DXT module (DXT_POSIX or DXT_MPIIO) * Rank: process rank responsible for I/O operation @@ -1416,6 +1464,9 @@ The output format for each individual I/O operation segment is: * Length: length of the I/O operation in bytes * Start: timestamp of the start of the operation (w.r.t. application start time) * End: timestamp of the end of the operation (w.r.t. application start time) +* Pthread-ID: pthread ID of the calling thread. When using DXT parser from + version 3.6.0 and later to parse log files generated from earlier versions of + Darshan, the values of Pthread-ID will be -1. DXT MPI-IO module ------------------------------------- diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index d0cb6be3a..224c51720 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -205,6 +205,7 @@ int64_t length; double start_time; double end_time; + unsigned long pthread_id; } segment_info; /* counter names */ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 34fcfa580..0489c0950 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -595,6 +595,22 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): mod_type = _structdefs[mod_name] #name_records = log_get_name_records(log) + if 'DXT_POSIX' in log['modules']: + # retrieve file's DXT_POSIX version number + dxt_posix_ver = log['modules']['DXT_POSIX']['ver'] + + if 'DXT_MPIIO' in log['modules']: + # retrieve file's DXT_MPIIO version number + dxt_mpiio_ver = log['modules']['DXT_MPIIO']['ver'] + + append_pthread_id = True + if mod_name == 'DXT_POSIX' and dxt_posix_ver < 2: + # DXT_POSIX_VER 2 and later has pthread_id field added + append_pthread_id = False + elif mod_name == 'DXT_MPIIO' and dxt_mpiio_ver < 3: + # DXT_MPIIO_VER 3 and later has pthread_id field added + append_pthread_id = False + rec = {} buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) @@ -620,7 +636,6 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): size_of = ffi.sizeof("struct dxt_file_record") segments = ffi.cast("struct segment_info *", buf[0] + size_of ) - for i in range(wcnt): seg = { "offset": segments[i].offset, @@ -628,19 +643,28 @@ def log_get_dxt_record(log, mod_name, reads=True, writes=True, dtype='dict'): "start_time": segments[i].start_time, "end_time": segments[i].end_time } - rec['write_segments'].append(seg) + if append_pthread_id: + # append field of pthread_id + seg["pthread_id"] = segments[i].pthread_id + + rec['write_segments'].append(seg) for i in range(rcnt): i = i + wcnt + seg = { "offset": segments[i].offset, "length": segments[i].length, "start_time": segments[i].start_time, "end_time": segments[i].end_time } - rec['read_segments'].append(seg) + if append_pthread_id: + # append field of pthread_id + seg["pthread_id"] = segments[i].pthread_id + + rec['read_segments'].append(seg) if dtype == "pandas": rec['read_segments'] = pd.DataFrame(rec['read_segments']) diff --git a/include/darshan-dxt-log-format.h b/include/darshan-dxt-log-format.h index b31fc9928..5f11612c1 100644 --- a/include/darshan-dxt-log-format.h +++ b/include/darshan-dxt-log-format.h @@ -7,8 +7,8 @@ #define __DARSHAN_DXT_LOG_FORMAT_H /* current DXT log format version */ -#define DXT_POSIX_VER 1 -#define DXT_MPIIO_VER 2 +#define DXT_POSIX_VER 2 +#define DXT_MPIIO_VER 3 #define HOSTNAME_SIZE 64 @@ -21,6 +21,7 @@ typedef struct segment_info { int64_t length; double start_time; double end_time; + unsigned long pthread_id; } segment_info; #define X(a) a,