diff --git a/darshan-runtime/configure.ac b/darshan-runtime/configure.ac index cef95d577..b15f3bd37 100644 --- a/darshan-runtime/configure.ac +++ b/darshan-runtime/configure.ac @@ -487,6 +487,46 @@ if test "x$enable_darshan_runtime" = xyes ; then use --with-pnetcdf to provide the PnetCDF install prefix, if needed.])) fi + # inform about DAOS installs not found in default locations + AC_ARG_WITH([daos], + [AS_HELP_STRING([--with-daos@<:@=DIR@:>@], + [Installation directory for DAOS.])], + [], [with_daos=no] + ) + + # DAOS module (disabled by default) + AC_ARG_ENABLE([daos-mod], + [AS_HELP_STRING([--enable-daos-mod], + [Enables compilation and use of DAOS module])], + [], [enable_daos_mod=no] + ) + if test "x$enable_daos_mod" = xyes ; then + AC_CHECK_HEADERS(m4_normalize([daos_types.h daos_prop.h daos_pool.h daos_cont.h + daos_obj.h daos_array.h daos_fs.h]), + [], + [AC_MSG_ERROR([Cannot find required DAOS headers])], + [[ + #ifdef HAVE_DAOS_TYPES_H + # include + #endif + #ifdef HAVE_DAOS_PROP_H + # include + #endif + #ifdef HAVE_DAOS_CONT_H + # include + #endif + #ifdef HAVE_DAOS_OBJ_H + # include + #endif + #ifdef HAVE_DAOS_ARRAY_H + # include + #endif + ]]) + elif test "x$enable_daos_mod" != xno ; then + AC_MSG_ERROR(m4_normalize([--enable-daos-mod does not take any argument, + use --with-daos to provide the DAOS install prefix, if needed.])) + fi + # BG/Q module AC_ARG_ENABLE([bgq-mod], [AS_HELP_STRING([--disable-bgq-mod], @@ -809,6 +849,7 @@ AM_CONDITIONAL(BUILD_MDHIM_MODULE, [test "x$enable_mdhim_mod" = xyes]) AM_CONDITIONAL(BUILD_APMPI_MODULE, [test "x$enable_apmpi_mod" = xyes]) AM_CONDITIONAL(BUILD_APXC_MODULE, [test "x$enable_apxc_mod" = xyes]) AM_CONDITIONAL(BUILD_HEATMAP_MODULE,[test "x$enable_heatmap_mod" = xyes]) +AM_CONDITIONAL(BUILD_DAOS_MODULE, [test "x$enable_daos_mod" = xyes]) AM_CONDITIONAL(HAVE_LDMS, [test "x$enable_ldms_mod" = xyes]) AC_CONFIG_FILES(Makefile \ @@ -884,6 +925,7 @@ if test "x$enable_darshan_runtime" = xyes ; then AUTOPERF MPI module support - $enable_apmpi_mod AUTOPERF XC module support - $enable_apxc_mod HDF5 module support - $enable_hdf5_mod + DAOS module support - $enable_daos_mod PnetCDF module support - $enable_pnetcdf_mod BG/Q module support - $enable_bgq_mod Lustre module support - $enable_lustre_mod diff --git a/darshan-runtime/doc/darshan-runtime.txt b/darshan-runtime/doc/darshan-runtime.txt index 76c2a6ee8..8d5155112 100644 --- a/darshan-runtime/doc/darshan-runtime.txt +++ b/darshan-runtime/doc/darshan-runtime.txt @@ -146,6 +146,10 @@ make install ** NOTE: PnetCDF instrumentation only works on PnetCDF library versions >=1.8 * `--disable-lustre-mod`: disables compilation and use of Darshan's Lustre module (default=enabled) +* `--enable-daos-mod`: enables compilation and use of Darshan's DAOS module + (default=disabled) +* `--with-daos=DIR`: installation directory for DAOS +** NOTE: Users must call `--enable-daos-mod` to enable DAOS modules, `--with-daos` is only used to additionally provide a DAOS install prefix. * `--enable-mdhim-mod`: enables compilation and use of Darshan's MDHIM module (default=disabled) * `--enable-ldms-mod`: enables compilation and use of Darshan’s LDMS runtime module (default=disabled) diff --git a/darshan-runtime/lib/Makefile.am b/darshan-runtime/lib/Makefile.am index 9538b7217..103a3535e 100644 --- a/darshan-runtime/lib/Makefile.am +++ b/darshan-runtime/lib/Makefile.am @@ -71,6 +71,11 @@ if BUILD_HEATMAP_MODULE AM_CPPFLAGS += -DDARSHAN_HEATMAP endif +if BUILD_DAOS_MODULE + C_SRCS += darshan-dfs.c darshan-daos.c + AM_CPPFLAGS += -DDARSHAN_DAOS +endif + .m4.c: $(M4) $(AM_M4FLAGS) $(M4FLAGS) $< >$@ @@ -138,5 +143,7 @@ EXTRA_DIST = $(H_SRCS) \ darshan-bgq.c \ darshan-lustre.c \ darshan-mdhim.c \ - darshan-heatmap.c + darshan-heatmap.c \ + darshan-dfs.c \ + darshan-daos.c diff --git a/darshan-runtime/lib/darshan-core.c b/darshan-runtime/lib/darshan-core.c index 0b25eadeb..96f31ec18 100644 --- a/darshan-runtime/lib/darshan-core.c +++ b/darshan-runtime/lib/darshan-core.c @@ -116,7 +116,7 @@ static int darshan_should_instrument_rank( struct darshan_core_runtime *core); static void darshan_fs_info_from_path( const char *path, struct darshan_fs_info *fs_info); -static int darshan_add_name_record_ref( +static int darshan_update_name_record_ref( struct darshan_core_runtime *core, darshan_record_id rec_id, const char *name, darshan_module_id mod_id); static void darshan_get_user_name( @@ -1301,51 +1301,87 @@ static void darshan_fs_info_from_path(const char *path, struct darshan_fs_info * return; } -static int darshan_add_name_record_ref(struct darshan_core_runtime *core, +static int darshan_update_name_record_ref(struct darshan_core_runtime *core, darshan_record_id rec_id, const char *name, darshan_module_id mod_id) { - struct darshan_core_name_record_ref *ref; - struct darshan_core_name_record_ref *check_ref; - int record_size = sizeof(darshan_record_id) + strlen(name) + 1; + int is_new_rec = 0; + struct darshan_core_name_record_ref *ref, *check_ref; - if((record_size + core->name_mem_used) > core->config.name_mem) - return(0); + /* if no name given, use the empty string */ + if(!name) name = ""; - /* drop core lock while we allocate reference. Note that - * this means we must check for existence again in hash table once we - * re-acquire the lock, but this code path will only happen once per - * file. - */ - __DARSHAN_CORE_UNLOCK(); - ref = malloc(sizeof(*ref)); - __DARSHAN_CORE_LOCK(); + /* check to see if we've already stored the id->name mapping for this record */ + HASH_FIND(hlink, core->name_hash, &rec_id, sizeof(rec_id), ref); if(!ref) { - return(0); - } - memset(ref, 0, sizeof(*ref)); + /* drop core lock while we allocate reference. Note that + * this means we must check for existence again in hash table once we + * re-acquire the lock, but this code path will only happen once per + * file. + */ + __DARSHAN_CORE_UNLOCK(); + ref = malloc(sizeof(*ref)); + __DARSHAN_CORE_LOCK(); + if(!ref) + { + return(0); + } + memset(ref, 0, sizeof(*ref)); - /* make sure no one else added it while we dropped the lock */ - HASH_FIND(hlink, core->name_hash, &rec_id, - sizeof(darshan_record_id), check_ref); - if(check_ref) - return(1); + HASH_FIND(hlink, core->name_hash, &rec_id, sizeof(rec_id), check_ref); + if(check_ref) + { + /* someone else added the ref while we dropped the lock */ + free(ref); + ref = check_ref; + } + else + { + /* we need to allocate and add a new record ref */ + is_new_rec = 1; + } + } - /* initialize the name record */ - ref->name_record = (struct darshan_name_record *) - ((char *)core->log_name_p + core->name_mem_used); - memset(ref->name_record, 0, record_size); - ref->name_record->id = rec_id; - strcpy(ref->name_record->name, name); - DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + /* set a new name record reference in 2 scenarios: + * 1.) creation of a new record ref + * 2.) detecting zero-length name on an existing record ref + * (i.e., initial creator of the ref didn't specify a name) + */ + if(is_new_rec || ((strlen(ref->name_record->name) == 0) && strlen(name) > 0)) + { + int record_size = sizeof(darshan_record_id) + strlen(name) + 1; + if((record_size + core->name_mem_used) > core->config.name_mem) + { + /* no more room for this name record */ + if(is_new_rec) free(ref); + return(0); + } + else + { + /* initialize new name record structure */ + ref->name_record = (struct darshan_name_record *) + ((char *)core->log_name_p + core->name_mem_used); + memset(ref->name_record, 0, record_size); + ref->name_record->id = rec_id; + strcpy(ref->name_record->name, name); - HASH_ADD(hlink, core->name_hash, name_record->id, - sizeof(darshan_record_id), ref); - core->name_mem_used += record_size; + core->name_mem_used += record_size; #ifdef __DARSHAN_ENABLE_MMAP_LOGS - core->log_hdr_p->name_map.len += record_size; + core->log_hdr_p->name_map.len += record_size; #endif + } + } + + DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + if(is_new_rec) + { + /* add new record reference */ + HASH_ADD(hlink, core->name_hash, name_record->id, + sizeof(darshan_record_id), ref); + } + + /* successfully updated core record ref */ return(1); } @@ -2207,6 +2243,9 @@ static int darshan_core_name_is_excluded(const char *name, darshan_module_id mod int tmp_index = 0; struct darshan_core_regex *regex; + if(!name) + return(0); + /* set flag if this module's record names are based on file paths */ name_is_path = 1; if((mod_id == DARSHAN_APMPI_MOD) || (mod_id == DARSHAN_APXC_MOD) || @@ -2606,9 +2645,7 @@ void *darshan_core_register_record( size_t rec_size, struct darshan_fs_info *fs_info) { - struct darshan_core_name_record_ref *ref; void *rec_buf; - int ret; __DARSHAN_CORE_LOCK(); if(!__darshan_core) @@ -2625,35 +2662,19 @@ void *darshan_core_register_record( return(NULL); } - /* register a name record if a name is given for this record */ - if(name) + if(darshan_core_name_is_excluded(name, mod_id)) { - if(darshan_core_name_is_excluded(name, mod_id)) - { - /* do not register record if name matches any exclusion rules */ - __DARSHAN_CORE_UNLOCK(); - return(NULL); - } + /* do not register record if name matches any exclusion rules */ + __DARSHAN_CORE_UNLOCK(); + return(NULL); } - /* check to see if we've already stored the id->name mapping for - * this record, and add a new name record if not - */ - HASH_FIND(hlink, __darshan_core->name_hash, &rec_id, - sizeof(darshan_record_id), ref); - if(!ref) + if(!darshan_update_name_record_ref(__darshan_core, rec_id, name, mod_id)) { - ret = darshan_add_name_record_ref(__darshan_core, rec_id, name, mod_id); - if(ret == 0) - { - DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id); - __DARSHAN_CORE_UNLOCK(); - return(NULL); - } - } - else - { - DARSHAN_MOD_FLAG_SET(ref->mod_flags, mod_id); + /* unable to update record ref, fail and set this module's partial flag */ + DARSHAN_MOD_FLAG_SET(__darshan_core->log_hdr_p->partial_flag, mod_id); + __DARSHAN_CORE_UNLOCK(); + return(NULL); } __darshan_core->mod_array[mod_id]->rec_mem_avail -= rec_size; @@ -2683,7 +2704,7 @@ void *darshan_core_register_record( if(fs_info) darshan_fs_info_from_path(name, fs_info); - return(rec_buf);; + return(rec_buf); } char *darshan_core_lookup_record_name(darshan_record_id rec_id) diff --git a/darshan-runtime/lib/darshan-daos.c b/darshan-runtime/lib/darshan-daos.c new file mode 100644 index 000000000..b944dfcc1 --- /dev/null +++ b/darshan-runtime/lib/darshan-daos.c @@ -0,0 +1,2267 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#define _XOPEN_SOURCE 500 +#define _GNU_SOURCE + +#include "darshan-runtime-config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uthash.h" +#include "darshan.h" +#include "darshan-dynamic.h" +#include "darshan-heatmap.h" + +#include +#include +#include +#include +#include +#include + +/* container access routines intercepted for maintaining pool/container UUIDs */ +DARSHAN_FORWARD_DECL(daos_cont_open, int, (daos_handle_t poh, const char *cont, unsigned int flags, daos_handle_t *coh, daos_cont_info_t *info, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_cont_global2local, int, (daos_handle_t poh, d_iov_t glob, daos_handle_t *coh)); +DARSHAN_FORWARD_DECL(daos_cont_close, int, (daos_handle_t coh, daos_event_t *ev)); + +/* multi-level key array API */ +DARSHAN_FORWARD_DECL(daos_obj_open, int, (daos_handle_t coh, daos_obj_id_t oid, unsigned int mode, daos_handle_t *oh, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_fetch, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, daos_key_t *dkey, unsigned int nr, daos_iod_t *iods, d_sg_list_t *sgls, daos_iom_t *ioms, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_update, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, daos_key_t *dkey, unsigned int nr, daos_iod_t *iods, d_sg_list_t *sgls, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_punch, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_punch_dkeys, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, unsigned int nr, daos_key_t *dkeys, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_punch_akeys, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, daos_key_t *dkey, unsigned int nr, daos_key_t *akeys, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_list_dkey, int, (daos_handle_t oh, daos_handle_t th, uint32_t *nr, daos_key_desc_t *kds, d_sg_list_t *sgl, daos_anchor_t *anchor, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_list_akey, int, (daos_handle_t oh, daos_handle_t th, daos_key_t *dkey, uint32_t *nr, daos_key_desc_t *kds, d_sg_list_t *sgl, daos_anchor_t *anchor, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_list_recx, int, (daos_handle_t oh, daos_handle_t th, daos_key_t *dkey, daos_key_t *akey, daos_size_t *size, uint32_t *nr, daos_recx_t *recxs, daos_epoch_range_t *eprs, daos_anchor_t *anchor, bool incr_order, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_obj_close, int, (daos_handle_t oh, daos_event_t *ev)); + +/* array API */ +DARSHAN_FORWARD_DECL(daos_array_create, int, (daos_handle_t coh, daos_obj_id_t oid, daos_handle_t th, daos_size_t cell_size, daos_size_t chunk_size, daos_handle_t *oh, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_open, int, (daos_handle_t coh, daos_obj_id_t oid, daos_handle_t th, unsigned int mode, daos_size_t *cell_size, daos_size_t *chunk_size, daos_handle_t *oh, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_open_with_attr, int, (daos_handle_t coh, daos_obj_id_t oid, daos_handle_t th, unsigned int mode, daos_size_t cell_size, daos_size_t chunk_size, daos_handle_t *oh, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_read, int, (daos_handle_t oh, daos_handle_t th, daos_array_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_write, int, (daos_handle_t oh, daos_handle_t th, daos_array_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_get_size, int, (daos_handle_t oh, daos_handle_t th, daos_size_t *size, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_set_size, int, (daos_handle_t oh, daos_handle_t th, daos_size_t size, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_stat, int, (daos_handle_t oh, daos_handle_t th, daos_array_stbuf_t *stbuf, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_punch, int, (daos_handle_t oh, daos_handle_t th, daos_array_iod_t *iod, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_destroy, int, (daos_handle_t oh, daos_handle_t th, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_array_close, int, (daos_handle_t oh, daos_event_t *ev)); + +/* key-value API */ +DARSHAN_FORWARD_DECL(daos_kv_open, int, (daos_handle_t coh, daos_obj_id_t oid, unsigned int mode, daos_handle_t *oh, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_get, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, const char *key, daos_size_t *size, void *buf, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_put, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, const char *key, daos_size_t size, const void *buf, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_remove, int, (daos_handle_t oh, daos_handle_t th, uint64_t flags, const char *key, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_list, int, (daos_handle_t oh, daos_handle_t th, uint32_t *nr, daos_key_desc_t *kds, d_sg_list_t *sgl, daos_anchor_t *anchor, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_destroy, int, (daos_handle_t oh, daos_handle_t th, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(daos_kv_close, int, (daos_handle_t oh, daos_event_t *ev)); + +/* The daos_object_record_ref structure maintains necessary runtime metadata + * for the DAOS object record (darshan_daos_object structure, defined in + * darshan-daos-log-format.h) pointed to by 'object_rec'. This metadata + * assists with the instrumenting of specific statistics in the object record. + * + * RATIONALE: the DAOS module needs to track some stateful, volatile + * information about each open object (like the most recent + * access time, etc.) to aid in instrumentation, but this information can't be + * stored in the darshan_daos_object struct because we don't want it to appear in + * the final darshan log file. We therefore associate a daos_object_record_ref + * struct with each darshan_daos_object struct in order to track this information + * (i.e., the mapping between daos_object_record_ref structs to darshan_daos_object + * structs is one-to-one). + * + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this daos_object_record_ref struct. + * This allows us to index this struct (and the underlying object record) by using + * either the corresponding Darshan record identifier (derived from the object OID) + * or by a DAOS object handle, for instance. Note that, while there should + * only be a single Darshan record identifier that indexes a daos_object_record_ref, + * there could be multiple open object handles that index it. + */ +struct daos_object_record_ref +{ + struct darshan_daos_object *object_rec; + enum darshan_io_type last_io_type; + double last_meta_end; + double last_read_end; + double last_write_end; + void *access_root; + int access_count; +}; + +struct daos_poolcont_info +{ + daos_handle_t coh; + uuid_t pool_uuid; + uuid_t cont_uuid; + UT_hash_handle hlink; +}; + +struct daos_runtime +{ + struct daos_poolcont_info *poolcont_hash; + void *rec_id_hash; + void *oh_hash; + int obj_rec_count; + darshan_record_id heatmap_id; + int frozen; /* flag to indicate that the counters should no longer be modified */ +}; + +static void daos_runtime_initialize(); +static struct daos_object_record_ref *daos_track_new_object_record( + darshan_record_id rec_id, daos_obj_id_t oid, struct daos_poolcont_info *poolcont_info); +static void daos_finalize_object_records( + void *rec_ref_p, void *user_ptr); +#ifdef HAVE_MPI +static void daos_record_reduction_op( + void* inobj_v, void* inoutobj_v, int *len, MPI_Datatype *datatype); +static void daos_mpi_redux( + void *daos_buf, MPI_Comm mod_comm, + darshan_record_id *shared_recs, int shared_rec_count); +#endif +static void daos_output( + void **daos_buf, int *daos_buf_sz); +static void daos_cleanup( + void); + +static struct daos_runtime *daos_runtime = NULL; +static pthread_mutex_t daos_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; +static int daos_runtime_init_attempted = 0; +static int my_rank = -1; + +#define DAOS_LOCK() pthread_mutex_lock(&daos_runtime_mutex) +#define DAOS_UNLOCK() pthread_mutex_unlock(&daos_runtime_mutex) + +#define DAOS_WTIME() \ + __darshan_disabled ? 0 : darshan_core_wtime(); + +#define DAOS_PRE_RECORD() do { \ + if(!ret && !__darshan_disabled) { \ + DAOS_LOCK(); \ + if(!daos_runtime && !daos_runtime_init_attempted) \ + daos_runtime_initialize(); \ + if(daos_runtime && !daos_runtime->frozen) break; \ + DAOS_UNLOCK(); \ + } \ + return(ret); \ +} while(0) + +#define DAOS_POST_RECORD() do { \ + DAOS_UNLOCK(); \ +} while(0) + +#define DAOS_STORE_POOLCONT_INFO(__poh, __coh_p) do { \ + int __query_ret; \ + daos_pool_info_t __pool_info; \ + daos_cont_info_t __cont_info; \ + struct daos_poolcont_info *__poolcont_info; \ + __query_ret = daos_pool_query(__poh, NULL, &__pool_info, NULL, NULL); \ + if(__query_ret == 0) { \ + __query_ret = daos_cont_query(*__coh_p, &__cont_info, NULL, NULL); \ + if(__query_ret == 0) { \ + __poolcont_info = malloc(sizeof(*__poolcont_info)); \ + if(__poolcont_info) { \ + uuid_copy(__poolcont_info->pool_uuid, __pool_info.pi_uuid); \ + uuid_copy(__poolcont_info->cont_uuid, __cont_info.ci_uuid); \ + __poolcont_info->coh = *__coh_p; \ + HASH_ADD(hlink, daos_runtime->poolcont_hash, coh, sizeof(*__coh_p), __poolcont_info); \ + } \ + } \ + } \ +} while(0) + +#define DAOS_GET_POOLCONT_INFO(__coh, __poolcont_info) \ + HASH_FIND(hlink, daos_runtime->poolcont_hash, &__coh, sizeof(__coh), __poolcont_info) + +#define DAOS_FREE_POOLCONT_INFO(__poolcont_info) do { \ + HASH_DELETE(hlink, daos_runtime->poolcont_hash, __poolcont_info); \ + free(__poolcont_info); \ +} while(0) + +#define ID_GLOB_SIZE (sizeof(daos_obj_id_t) + (2*sizeof(uuid_t))) +#define DAOS_RECORD_OBJ_OPEN(__coh, __oh_p, __oid, __counter, __cell_sz, __chunk_sz, __is_async, __tm1, __tm2) do { \ + struct daos_poolcont_info *__poolcont_info; \ + unsigned char __id_glob[ID_GLOB_SIZE]; \ + darshan_record_id __rec_id; \ + struct daos_object_record_ref *__rec_ref; \ + DAOS_GET_POOLCONT_INFO(__coh, __poolcont_info); \ + if(!__poolcont_info) break; \ + memcpy(__id_glob, __poolcont_info->pool_uuid, sizeof(__poolcont_info->pool_uuid)); \ + memcpy(__id_glob+sizeof(__poolcont_info->pool_uuid), __poolcont_info->cont_uuid, sizeof(__poolcont_info->cont_uuid)); \ + memcpy(__id_glob+sizeof(__poolcont_info->pool_uuid)+sizeof(__poolcont_info->cont_uuid), &__oid, sizeof(__oid)); \ + __rec_id = darshan_hash(__id_glob, ID_GLOB_SIZE, 0); \ + __rec_ref = darshan_lookup_record_ref(daos_runtime->rec_id_hash, &__rec_id, \ + sizeof(darshan_record_id)); \ + if(!__rec_ref) __rec_ref = daos_track_new_object_record(__rec_id, __oid, __poolcont_info); \ + if(!__rec_ref) break; \ + __rec_ref->object_rec->counters[__counter] += 1; \ + if(__is_async) __rec_ref->object_rec->counters[DAOS_NB_OPS] += 1; \ + if(__cell_sz) __rec_ref->object_rec->counters[DAOS_ARRAY_CELL_SIZE] = __cell_sz; \ + if(__chunk_sz) __rec_ref->object_rec->counters[DAOS_ARRAY_CHUNK_SIZE] = __chunk_sz; \ + __rec_ref->object_rec->counters[DAOS_OBJ_OTYPE] = daos_obj_id2type(__oid); \ + if(__rec_ref->object_rec->fcounters[DAOS_F_OPEN_START_TIMESTAMP] == 0 || \ + __rec_ref->object_rec->fcounters[DAOS_F_OPEN_START_TIMESTAMP] > __tm1) \ + __rec_ref->object_rec->fcounters[DAOS_F_OPEN_START_TIMESTAMP] = __tm1; \ + __rec_ref->object_rec->fcounters[DAOS_F_OPEN_END_TIMESTAMP] = __tm2; \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->object_rec->fcounters[DAOS_F_META_TIME], \ + __tm1, __tm2, __rec_ref->last_meta_end); \ + darshan_add_record_ref(&(daos_runtime->oh_hash), __oh_p, \ + sizeof(daos_handle_t), __rec_ref); \ +} while(0) + +#define DAOS_RECORD_OBJ_READ(__oh, __counter, __sz, __is_async, __tm1, __tm2) do { \ + int64_t __tmp_sz = (int64_t)__sz; \ + struct darshan_common_val_counter *__cvc; \ + double __elapsed = __tm2-__tm1; \ + struct daos_object_record_ref *__rec_ref; \ + __rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, &__oh, \ + sizeof(daos_handle_t)); \ + if(!__rec_ref) break; \ + if(__counter == DAOS_ARRAY_READS) \ + __tmp_sz *= __rec_ref->object_rec->counters[DAOS_ARRAY_CELL_SIZE];\ + /* heatmap to record traffic summary */ \ + heatmap_update(daos_runtime->heatmap_id, HEATMAP_READ, __tmp_sz, __tm1, __tm2); \ + __rec_ref->object_rec->counters[__counter] += 1; \ + if(__is_async) __rec_ref->object_rec->counters[DAOS_NB_OPS] += 1; \ + __rec_ref->object_rec->counters[DAOS_BYTES_READ] += __tmp_sz; \ + DARSHAN_BUCKET_INC(&(__rec_ref->object_rec->counters[DAOS_SIZE_READ_0_100]), __tmp_sz); \ + __cvc = darshan_track_common_val_counters(&__rec_ref->access_root, &__tmp_sz, 1, \ + &__rec_ref->access_count); \ + if(__cvc) DARSHAN_UPDATE_COMMON_VAL_COUNTERS( \ + &(__rec_ref->object_rec->counters[DAOS_ACCESS1_ACCESS]), \ + &(__rec_ref->object_rec->counters[DAOS_ACCESS1_COUNT]), \ + __cvc->vals, 1, __cvc->freq, 0); \ + if(__rec_ref->last_io_type == DARSHAN_IO_WRITE) \ + __rec_ref->object_rec->counters[DAOS_RW_SWITCHES] += 1; \ + __rec_ref->last_io_type = DARSHAN_IO_READ; \ + if(__rec_ref->object_rec->fcounters[DAOS_F_READ_START_TIMESTAMP] == 0 || \ + __rec_ref->object_rec->fcounters[DAOS_F_READ_START_TIMESTAMP] > __tm1) \ + __rec_ref->object_rec->fcounters[DAOS_F_READ_START_TIMESTAMP] = __tm1; \ + __rec_ref->object_rec->fcounters[DAOS_F_READ_END_TIMESTAMP] = __tm2; \ + if(__rec_ref->object_rec->fcounters[DAOS_F_MAX_READ_TIME] < __elapsed) { \ + __rec_ref->object_rec->fcounters[DAOS_F_MAX_READ_TIME] = __elapsed; \ + __rec_ref->object_rec->counters[DAOS_MAX_READ_TIME_SIZE] = __tmp_sz; \ + } \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->object_rec->fcounters[DAOS_F_READ_TIME], \ + __tm1, __tm2, __rec_ref->last_read_end); \ +} while(0) + +#define DAOS_RECORD_OBJ_WRITE(__oh, __counter, __sz, __is_async, __tm1, __tm2) do { \ + int64_t __tmp_sz = (int64_t)__sz; \ + struct darshan_common_val_counter *__cvc; \ + double __elapsed = __tm2-__tm1; \ + struct daos_object_record_ref *__rec_ref; \ + __rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, &__oh, \ + sizeof(daos_handle_t)); \ + if(!__rec_ref) break; \ + if(__counter == DAOS_ARRAY_WRITES) \ + __tmp_sz *= __rec_ref->object_rec->counters[DAOS_ARRAY_CELL_SIZE];\ + /* heatmap to record traffic summary */ \ + heatmap_update(daos_runtime->heatmap_id, HEATMAP_WRITE, __tmp_sz, __tm1, __tm2); \ + __rec_ref->object_rec->counters[__counter] += 1; \ + if(__is_async) __rec_ref->object_rec->counters[DAOS_NB_OPS] += 1; \ + __rec_ref->object_rec->counters[DAOS_BYTES_WRITTEN] += __tmp_sz; \ + DARSHAN_BUCKET_INC(&(__rec_ref->object_rec->counters[DAOS_SIZE_WRITE_0_100]), __tmp_sz); \ + __cvc = darshan_track_common_val_counters(&__rec_ref->access_root, &__tmp_sz, 1, \ + &__rec_ref->access_count); \ + if(__cvc) DARSHAN_UPDATE_COMMON_VAL_COUNTERS( \ + &(__rec_ref->object_rec->counters[DAOS_ACCESS1_ACCESS]), \ + &(__rec_ref->object_rec->counters[DAOS_ACCESS1_COUNT]), \ + __cvc->vals, 1, __cvc->freq, 0); \ + if(__rec_ref->last_io_type == DARSHAN_IO_READ) \ + __rec_ref->object_rec->counters[DAOS_RW_SWITCHES] += 1; \ + __rec_ref->last_io_type = DARSHAN_IO_WRITE; \ + if(__rec_ref->object_rec->fcounters[DAOS_F_WRITE_START_TIMESTAMP] == 0 || \ + __rec_ref->object_rec->fcounters[DAOS_F_WRITE_START_TIMESTAMP] > __tm1) \ + __rec_ref->object_rec->fcounters[DAOS_F_WRITE_START_TIMESTAMP] = __tm1; \ + __rec_ref->object_rec->fcounters[DAOS_F_WRITE_END_TIMESTAMP] = __tm2; \ + if(__rec_ref->object_rec->fcounters[DAOS_F_MAX_WRITE_TIME] < __elapsed) { \ + __rec_ref->object_rec->fcounters[DAOS_F_MAX_WRITE_TIME] = __elapsed; \ + __rec_ref->object_rec->counters[DAOS_MAX_WRITE_TIME_SIZE] = __tmp_sz; \ + } \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->object_rec->fcounters[DAOS_F_WRITE_TIME], \ + __tm1, __tm2, __rec_ref->last_write_end); \ +} while(0) + +#define DAOS_RECORD_OBJ_CLOSE(__oh, __tm1, __tm2) do { \ + struct daos_object_record_ref *__rec_ref; \ + __rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, &__oh, \ + sizeof(daos_handle_t)); \ + if(!__rec_ref) break; \ + if(__rec_ref->object_rec->fcounters[DAOS_F_CLOSE_START_TIMESTAMP] == 0 || \ + __rec_ref->object_rec->fcounters[DAOS_F_CLOSE_START_TIMESTAMP] > __tm1) \ + __rec_ref->object_rec->fcounters[DAOS_F_CLOSE_START_TIMESTAMP] = __tm1; \ + __rec_ref->object_rec->fcounters[DAOS_F_CLOSE_END_TIMESTAMP] = __tm2; \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->object_rec->fcounters[DAOS_F_META_TIME], \ + __tm1, __tm2, __rec_ref->last_meta_end); \ + darshan_delete_record_ref(&(daos_runtime->oh_hash), &__oh, sizeof(daos_handle_t)); \ +} while(0) + +/* DAOS callback routine to measure end of async open calls */ +struct daos_open_event_tracker +{ + double tm1; + daos_handle_t coh; + daos_obj_id_t oid; + int op; + int resolve_sizes; + union + { + daos_size_t cell_size; + daos_size_t *cell_size_p; + }; + union + { + daos_size_t chunk_size; + daos_size_t *chunk_size_p; + }; + daos_handle_t *oh_p; +}; +int darshan_daos_open_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_open_event_tracker *tracker = (struct daos_open_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + if (!tracker->resolve_sizes) + DAOS_RECORD_OBJ_OPEN(tracker->coh, tracker->oh_p, tracker->oid, tracker->op, + tracker->cell_size, tracker->chunk_size, 1, tracker->tm1, tm2); + else + DAOS_RECORD_OBJ_OPEN(tracker->coh, tracker->oh_p, tracker->oid, tracker->op, + *(tracker->cell_size_p), *(tracker->chunk_size_p), 1, tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +/* DAOS callback routine to measure end of async read calls */ +struct daos_read_event_tracker +{ + double tm1; + daos_handle_t oh; + int op; + union + { + daos_size_t read_size; + daos_size_t *read_size_p; + }; +}; +int darshan_daos_read_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_read_event_tracker *tracker = (struct daos_read_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + if (tracker->op != DAOS_KV_GETS) + DAOS_RECORD_OBJ_READ(tracker->oh, tracker->op, tracker->read_size, 1, + tracker->tm1, tm2); + else + DAOS_RECORD_OBJ_READ(tracker->oh, tracker->op, *(tracker->read_size_p), 1, + tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +/* DAOS callback routine to measure end of async write calls */ +struct daos_write_event_tracker +{ + double tm1; + daos_handle_t oh; + int op; + daos_size_t write_size; +}; +int darshan_daos_write_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_write_event_tracker *tracker = (struct daos_write_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + DAOS_RECORD_OBJ_WRITE(tracker->oh, tracker->op, tracker->write_size, 1, + tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +/* DAOS callback routine to measure end of async "metadata" calls */ +struct daos_meta_event_tracker +{ + double tm1; + daos_handle_t oh; + int op; +}; +int darshan_daos_meta_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_meta_event_tracker *tracker = (struct daos_meta_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + struct daos_object_record_ref *rec_ref; + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &tracker->oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tracker->tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[tracker->op] += 1; + rec_ref->object_rec->counters[DAOS_NB_OPS] += 1; + } + + } + free(tracker); + + return 0; +} + +/* DAOS callback routine to measure end of async close calls */ +struct daos_close_event_tracker +{ + double tm1; + daos_handle_t oh; +}; +int darshan_daos_close_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_close_event_tracker *tracker = (struct daos_close_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + DAOS_RECORD_OBJ_CLOSE(tracker->oh, tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +/* DAOS callback routine to capture key details from container open calls */ +struct daos_contopen_event_tracker +{ + daos_handle_t poh; + daos_handle_t *coh_p; +}; +int darshan_daos_contopen_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct daos_contopen_event_tracker *tracker = (struct daos_contopen_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture container info */ + DAOS_STORE_POOLCONT_INFO(tracker->poh, tracker->coh_p); + } + free(tracker); + + return 0; +} + +/***************************************************** + * Wrappers for DAOS functions of interest * + *****************************************************/ + +/* container access routines intercepted for maintaining pool/container UUIDs */ + +int DARSHAN_DECL(daos_cont_open)(daos_handle_t poh, const char *cont, unsigned int flags, + daos_handle_t *coh, daos_cont_info_t *info, daos_event_t *ev) +{ + int ret; + + MAP_OR_FAIL(daos_cont_open); + + if(ev) + { + /* setup callback to capture the container open operation upon completion */ + struct daos_contopen_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->poh = poh; + tracker->coh_p = coh; + daos_event_register_comp_cb(ev, darshan_daos_contopen_comp_cb, tracker); + } + } + + ret = __real_daos_cont_open(poh, cont, flags, coh, info, ev); + + if(!ev) + { + DAOS_PRE_RECORD(); + DAOS_STORE_POOLCONT_INFO(poh, coh); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_cont_global2local)(daos_handle_t poh, d_iov_t glob, daos_handle_t *coh) +{ + int ret; + + MAP_OR_FAIL(daos_cont_global2local); + + ret = __real_daos_cont_global2local(poh, glob, coh); + + DAOS_PRE_RECORD(); + DAOS_STORE_POOLCONT_INFO(poh, coh); + DAOS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(daos_cont_close)(daos_handle_t coh, daos_event_t *ev) +{ + int ret; + struct daos_poolcont_info *poolcont_info; + + MAP_OR_FAIL(daos_cont_close); + + if(!__darshan_disabled) + { + DAOS_LOCK(); + if(daos_runtime && !daos_runtime->frozen) + { + DAOS_GET_POOLCONT_INFO(coh, poolcont_info); + if(poolcont_info) + DAOS_FREE_POOLCONT_INFO(poolcont_info); + } + DAOS_UNLOCK(); + } + + ret = __real_daos_cont_close(coh, ev); + + return(ret); +} + +/* multi-level key array API */ + +int DARSHAN_DECL(daos_obj_open)(daos_handle_t coh, daos_obj_id_t oid, unsigned int mode, daos_handle_t *oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_obj_open); + + if(ev) + { + /* setup callback to record the open operation upon completion */ + struct daos_open_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->coh = coh; + tracker->oid = oid; + tracker->op = DAOS_OBJ_OPENS; + tracker->resolve_sizes = 0; + tracker->cell_size = 0; + tracker->chunk_size = 0; + tracker->oh_p = oh; + daos_event_register_comp_cb(ev, darshan_daos_open_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_open(coh, oid, mode, oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_OPEN(coh, oh, oid, DAOS_OBJ_OPENS, 0, 0, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +#define DAOS_OBJ_IOD_SZ(__iods, __nr, __sz) do { \ + int __i, __j; \ + __sz = 0; \ + for(__i = 0; __i < __nr; __i++) { \ + if(__iods[__i].iod_type == DAOS_IOD_SINGLE) \ + __sz += __iods[__i].iod_size; \ + else if(__iods[__i].iod_recxs) \ + for(__j = 0; __j < __iods[__i].iod_nr; __j++) \ + __sz += (__iods[__i].iod_size * __iods[__i].iod_recxs[__j].rx_nr); \ + } \ +} while(0) + +int DARSHAN_DECL(daos_obj_fetch)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + daos_key_t *dkey, unsigned int nr, daos_iod_t *iods, d_sg_list_t *sgls, + daos_iom_t *ioms, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t fetch_sz; + + MAP_OR_FAIL(daos_obj_fetch); + + DAOS_OBJ_IOD_SZ(iods, nr, fetch_sz); + + if(ev) + { + /* setup callback to record the read operation upon completion */ + struct daos_read_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_FETCHES; + tracker->read_size = fetch_sz; + daos_event_register_comp_cb(ev, darshan_daos_read_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_fetch(oh, th, flags, dkey, nr, iods, sgls, ioms, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_READ(oh, DAOS_OBJ_FETCHES, fetch_sz, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_update)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + daos_key_t *dkey, unsigned int nr, daos_iod_t *iods, d_sg_list_t *sgls, + daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t update_sz; + + MAP_OR_FAIL(daos_obj_update); + + DAOS_OBJ_IOD_SZ(iods, nr, update_sz); + + if(ev) + { + /* setup callback to record the write operation upon completion */ + struct daos_write_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_UPDATES; + tracker->write_size = update_sz; + daos_event_register_comp_cb(ev, darshan_daos_write_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_update(oh, th, flags, dkey, nr, iods, sgls, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_WRITE(oh, DAOS_OBJ_UPDATES, update_sz, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_punch)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_punch); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_PUNCHES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_punch(oh, th, flags, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_PUNCHES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_punch_dkeys)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + unsigned int nr, daos_key_t *dkeys, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_punch_dkeys); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_DKEY_PUNCHES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_punch_dkeys(oh, th, flags, nr, dkeys, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_DKEY_PUNCHES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_punch_akeys)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + daos_key_t *dkey, unsigned int nr, daos_key_t *akeys, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_punch_akeys); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_AKEY_PUNCHES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_punch_akeys(oh, th, flags, dkey, nr, akeys, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_AKEY_PUNCHES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + + +int DARSHAN_DECL(daos_obj_list_dkey)(daos_handle_t oh, daos_handle_t th, uint32_t *nr, + daos_key_desc_t *kds, d_sg_list_t *sgl, daos_anchor_t *anchor, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_list_dkey); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_DKEY_LISTS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_list_dkey(oh, th, nr, kds, sgl, anchor, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_DKEY_LISTS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_list_akey)(daos_handle_t oh, daos_handle_t th, + daos_key_t *dkey, uint32_t *nr, daos_key_desc_t *kds, d_sg_list_t *sgl, + daos_anchor_t *anchor, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_list_akey); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_AKEY_LISTS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_list_akey(oh, th, dkey, nr, kds, sgl, anchor, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_AKEY_LISTS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_list_recx)(daos_handle_t oh, daos_handle_t th, + daos_key_t *dkey, daos_key_t *akey, daos_size_t *size, uint32_t *nr, + daos_recx_t *recxs, daos_epoch_range_t *eprs, daos_anchor_t *anchor, + bool incr_order, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_obj_list_recx); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_OBJ_RECX_LISTS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_list_recx(oh, th, dkey, akey, size, nr, recxs, + eprs, anchor, incr_order, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_OBJ_RECX_LISTS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_obj_close)(daos_handle_t oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_obj_close); + + if(ev) + { + /* setup callback to record the close operation upon completion */ + struct daos_close_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + daos_event_register_comp_cb(ev, darshan_daos_close_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_obj_close(oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_CLOSE(oh, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +/* array API */ + +int DARSHAN_DECL(daos_array_create)(daos_handle_t coh, daos_obj_id_t oid, + daos_handle_t th, daos_size_t cell_size, daos_size_t chunk_size, + daos_handle_t *oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_array_create); + + if(ev) + { + /* setup callback to record the open operation upon completion */ + struct daos_open_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->coh = coh; + tracker->oid = oid; + tracker->op = DAOS_ARRAY_OPENS; + tracker->resolve_sizes = 0; + tracker->cell_size = cell_size; + tracker->chunk_size = chunk_size; + tracker->oh_p = oh; + daos_event_register_comp_cb(ev, darshan_daos_open_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_create(coh, oid, th, cell_size, chunk_size, oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_OPEN(coh, oh, oid, DAOS_ARRAY_OPENS, cell_size, chunk_size, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_open)(daos_handle_t coh, daos_obj_id_t oid, + daos_handle_t th, unsigned int mode, daos_size_t *cell_size, daos_size_t *chunk_size, + daos_handle_t *oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_array_open); + + if(ev) + { + /* setup callback to record the open operation upon completion */ + struct daos_open_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->coh = coh; + tracker->oid = oid; + tracker->op = DAOS_ARRAY_OPENS; + tracker->resolve_sizes = 1; + tracker->cell_size_p = cell_size; + tracker->chunk_size_p = chunk_size; + tracker->oh_p = oh; + daos_event_register_comp_cb(ev, darshan_daos_open_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_open(coh, oid, th, mode, cell_size, chunk_size, oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_OPEN(coh, oh, oid, DAOS_ARRAY_OPENS, *cell_size, *chunk_size, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_open_with_attr)(daos_handle_t coh, daos_obj_id_t oid, daos_handle_t th, unsigned int mode, daos_size_t cell_size, daos_size_t chunk_size, daos_handle_t *oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_array_open_with_attr); + + if(ev) + { + /* setup callback to record the open operation upon completion */ + struct daos_open_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->coh = coh; + tracker->oid = oid; + tracker->op = DAOS_ARRAY_OPENS; + tracker->resolve_sizes = 0; + tracker->cell_size = cell_size; + tracker->chunk_size = chunk_size; + tracker->oh_p = oh; + daos_event_register_comp_cb(ev, darshan_daos_open_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_open_with_attr(coh, oid, th, mode, cell_size, chunk_size, oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_OPEN(coh, oh, oid, DAOS_ARRAY_OPENS, cell_size, chunk_size, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +/* XXX daos_array_global2local not supported, as there is no way to map from a + * global representation to underlying object ID used to reference record + */ + +#define DAOS_ARRAY_IOD_SZ(__iod, __sz) do { \ + int __i; \ + __sz = 0; \ + for(__i = 0; __i < __iod->arr_nr; __i++) \ + __sz += __iod->arr_rgs[__i].rg_len; \ +} while(0) + +int DARSHAN_DECL(daos_array_read)(daos_handle_t oh, daos_handle_t th, + daos_array_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t read_sz; + + MAP_OR_FAIL(daos_array_read); + + DAOS_ARRAY_IOD_SZ(iod, read_sz); + + if(ev) + { + /* setup callback to record the read operation upon completion */ + struct daos_read_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_READS; + tracker->read_size = read_sz; + daos_event_register_comp_cb(ev, darshan_daos_read_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_read(oh, th, iod, sgl, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_READ(oh, DAOS_ARRAY_READS, read_sz, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_write)(daos_handle_t oh, daos_handle_t th, + daos_array_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t write_sz; + + MAP_OR_FAIL(daos_array_write); + + DAOS_ARRAY_IOD_SZ(iod, write_sz); + + if(ev) + { + /* setup callback to record the write operation upon completion */ + struct daos_write_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_WRITES; + tracker->write_size = write_sz; + daos_event_register_comp_cb(ev, darshan_daos_write_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_write(oh, th, iod, sgl, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_WRITE(oh, DAOS_ARRAY_WRITES, write_sz, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_get_size)(daos_handle_t oh, daos_handle_t th, + daos_size_t *size, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_array_get_size); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_GET_SIZES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_get_size(oh, th, size, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_ARRAY_GET_SIZES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_set_size)(daos_handle_t oh, daos_handle_t th, + daos_size_t size, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_array_set_size); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_SET_SIZES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_set_size(oh, th, size, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_ARRAY_SET_SIZES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_stat)(daos_handle_t oh, daos_handle_t th, daos_array_stbuf_t *stbuf, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_array_stat); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_STATS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_stat(oh, th, stbuf, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_ARRAY_STATS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_punch)(daos_handle_t oh, daos_handle_t th, + daos_array_iod_t *iod, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_array_punch); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_PUNCHES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_punch(oh, th, iod, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_ARRAY_PUNCHES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_destroy)(daos_handle_t oh, daos_handle_t th, + daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_array_destroy); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_ARRAY_DESTROYS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_destroy(oh, th, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_ARRAY_DESTROYS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_array_close)(daos_handle_t oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_array_close); + + if(ev) + { + /* setup callback to record the close operation upon completion */ + struct daos_close_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + daos_event_register_comp_cb(ev, darshan_daos_close_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_array_close(oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_CLOSE(oh, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +/* key-value API */ + +int DARSHAN_DECL(daos_kv_open)(daos_handle_t coh, daos_obj_id_t oid, unsigned int mode, + daos_handle_t *oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_kv_open); + + if(ev) + { + /* setup callback to record the open operation upon completion */ + struct daos_open_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->coh = coh; + tracker->oid = oid; + tracker->op = DAOS_KV_OPENS; + tracker->resolve_sizes = 0; + tracker->cell_size = 0; + tracker->chunk_size = 0; + tracker->oh_p = oh; + daos_event_register_comp_cb(ev, darshan_daos_open_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_open(coh, oid, mode, oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_OPEN(coh, oh, oid, DAOS_KV_OPENS, 0, 0, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_get)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + const char *key, daos_size_t *size, void *buf, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_kv_get); + + if(ev) + { + /* setup callback to record the read operation upon completion */ + struct daos_read_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_KV_GETS; + tracker->read_size_p = size; + daos_event_register_comp_cb(ev, darshan_daos_read_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_get(oh, th, flags, key, size, buf, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_READ(oh, DAOS_KV_GETS, *(size), 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_put)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + const char *key, daos_size_t size, const void *buf, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_kv_put); + + if(ev) + { + /* setup callback to record the write operation upon completion */ + struct daos_write_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_KV_PUTS; + tracker->write_size = size; + daos_event_register_comp_cb(ev, darshan_daos_write_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_put(oh, th, flags, key, size, buf, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_WRITE(oh, DAOS_KV_PUTS, size, 0, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_remove)(daos_handle_t oh, daos_handle_t th, uint64_t flags, + const char *key, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_kv_remove); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_KV_REMOVES; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_remove(oh, th, flags, key, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_KV_REMOVES] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_list)(daos_handle_t oh, daos_handle_t th, uint32_t *nr, + daos_key_desc_t *kds, d_sg_list_t *sgl, daos_anchor_t *anchor, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_kv_list); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_KV_LISTS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_list(oh, th, nr, kds, sgl, anchor, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_KV_LISTS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_destroy)(daos_handle_t oh, daos_handle_t th, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + struct daos_object_record_ref *rec_ref; + + MAP_OR_FAIL(daos_kv_destroy); + + if(ev) + { + /* setup callback to record the metadata operation upon completion */ + struct daos_meta_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + tracker->op = DAOS_KV_DESTROYS; + daos_event_register_comp_cb(ev, darshan_daos_meta_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_destroy(oh, th, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(daos_runtime->oh_hash, + &oh, sizeof(daos_handle_t)); + if(rec_ref) + { + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->object_rec->fcounters[DAOS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + rec_ref->object_rec->counters[DAOS_KV_DESTROYS] += 1; + } + DAOS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(daos_kv_close)(daos_handle_t oh, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(daos_kv_close); + + if(ev) + { + /* setup callback to record the close operation upon completion */ + struct daos_close_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->oh = oh; + daos_event_register_comp_cb(ev, darshan_daos_close_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_daos_kv_close(oh, ev); + tm2 = DAOS_WTIME(); + + if(!ev) + { + /* only record here for synchronous I/O operations */ + DAOS_PRE_RECORD(); + DAOS_RECORD_OBJ_CLOSE(oh, tm1, tm2); + DAOS_POST_RECORD(); + } + + return(ret); +} + +/********************************************************* + * Internal functions for manipulating DAOS module state * + *********************************************************/ + +static void daos_runtime_initialize() +{ + int ret; + size_t daos_rec_count; + darshan_module_funcs mod_funcs = { +#ifdef HAVE_MPI + .mod_redux_func = &daos_mpi_redux, +#endif + .mod_output_func = &daos_output, + .mod_cleanup_func = &daos_cleanup + }; + + /* if this attempt at initializing fails, we won't try again */ + daos_runtime_init_attempted = 1; + + /* try to store a default number of records for this module */ + daos_rec_count = DARSHAN_DEF_MOD_REC_COUNT; + + /* register the DAOS module with darshan core */ + ret = darshan_core_register_module( + DARSHAN_DAOS_MOD, + mod_funcs, + sizeof(struct darshan_daos_object), + &daos_rec_count, + &my_rank, + NULL); + if(ret < 0) + return; + + daos_runtime = malloc(sizeof(*daos_runtime)); + if(!daos_runtime) + { + darshan_core_unregister_module(DARSHAN_DAOS_MOD); + return; + } + memset(daos_runtime, 0, sizeof(*daos_runtime)); + + /* register a heatmap */ + daos_runtime->heatmap_id = heatmap_register("heatmap:DAOS"); + + return; +} + +static struct daos_object_record_ref *daos_track_new_object_record( + darshan_record_id rec_id, daos_obj_id_t oid, struct daos_poolcont_info *poolcont_info) +{ + struct darshan_daos_object *object_rec = NULL; + struct daos_object_record_ref *rec_ref = NULL; + int ret; + + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) + return(NULL); + memset(rec_ref, 0, sizeof(*rec_ref)); + + /* add a reference to this object record based on record id */ + ret = darshan_add_record_ref(&(daos_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) + { + free(rec_ref); + return(NULL); + } + + /* register the actual object record with darshan-core so it is persisted + * in the log file + */ + object_rec = darshan_core_register_record( + rec_id, + NULL, + DARSHAN_DAOS_MOD, + sizeof(struct darshan_daos_object), + NULL); + + if(!object_rec) + { + darshan_delete_record_ref(&(daos_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); + } + + /* registering this object record was successful, so initialize some fields */ + object_rec->base_rec.id = rec_id; + object_rec->base_rec.rank = my_rank; + uuid_copy(object_rec->pool_uuid, poolcont_info->pool_uuid); + uuid_copy(object_rec->cont_uuid, poolcont_info->cont_uuid); + object_rec->oid_hi = oid.hi; + object_rec->oid_lo = oid.lo; + rec_ref->object_rec = object_rec; + daos_runtime->obj_rec_count++; + + return(rec_ref); +} + +static void daos_finalize_object_records(void *rec_ref_p, void *user_ptr) +{ + struct daos_object_record_ref *rec_ref = + (struct daos_object_record_ref *)rec_ref_p; + + tdestroy(rec_ref->access_root, free); + return; +} + +#ifdef HAVE_MPI +static void daos_record_reduction_op( + void* inobj_v, void* inoutobj_v, int *len, MPI_Datatype *datatype) +{ + struct darshan_daos_object tmp_obj; + struct darshan_daos_object *inobj = inobj_v; + struct darshan_daos_object *inoutobj = inoutobj_v; + int i, j, k; + + for(i=0; i<*len; i++) + { + memset(&tmp_obj, 0, sizeof(struct darshan_daos_object)); + tmp_obj.base_rec.id = inobj->base_rec.id; + tmp_obj.base_rec.rank = -1; + uuid_copy(tmp_obj.pool_uuid, inobj->pool_uuid); + uuid_copy(tmp_obj.cont_uuid, inobj->cont_uuid); + tmp_obj.oid_hi = inobj->oid_hi; + tmp_obj.oid_lo = inobj->oid_lo; + + /* sum */ + for(j=DAOS_OBJ_OPENS; j<=DAOS_RW_SWITCHES; j++) + { + tmp_obj.counters[j] = inobj->counters[j] + inoutobj->counters[j]; + if(tmp_obj.counters[j] < 0) /* make sure invalid counters are -1 exactly */ + tmp_obj.counters[j] = -1; + } + + /* skip DAOS_MAX_*_TIME_SIZE; handled in floating point section */ + + for(j=DAOS_SIZE_READ_0_100; j<=DAOS_SIZE_WRITE_1G_PLUS; j++) + { + tmp_obj.counters[j] = inobj->counters[j] + inoutobj->counters[j]; + } + + /* common access counters */ + + /* first collapse any duplicates */ + for(j=DAOS_ACCESS1_ACCESS; j<=DAOS_ACCESS4_ACCESS; j++) + { + for(k=DAOS_ACCESS1_ACCESS; k<=DAOS_ACCESS4_ACCESS; k++) + { + if(inobj->counters[j] == inoutobj->counters[k]) + { + inobj->counters[j+4] += inoutobj->counters[k+4]; + inoutobj->counters[k] = 0; + inoutobj->counters[k+4] = 0; + } + } + } + + /* first set */ + for(j=DAOS_ACCESS1_ACCESS; j<=DAOS_ACCESS4_ACCESS; j++) + { + DARSHAN_UPDATE_COMMON_VAL_COUNTERS( + &(tmp_obj.counters[DAOS_ACCESS1_ACCESS]), + &(tmp_obj.counters[DAOS_ACCESS1_COUNT]), + &inobj->counters[j], 1, inobj->counters[j+4], 1); + } + /* second set */ + for(j=DAOS_ACCESS1_ACCESS; j<=DAOS_ACCESS4_ACCESS; j++) + { + DARSHAN_UPDATE_COMMON_VAL_COUNTERS( + &(tmp_obj.counters[DAOS_ACCESS1_ACCESS]), + &(tmp_obj.counters[DAOS_ACCESS1_COUNT]), + &inoutobj->counters[j], 1, inoutobj->counters[j+4], 1); + } + + tmp_obj.counters[DAOS_OBJ_OTYPE] = inobj->counters[DAOS_OBJ_OTYPE]; + tmp_obj.counters[DAOS_ARRAY_CELL_SIZE] = inobj->counters[DAOS_ARRAY_CELL_SIZE]; + tmp_obj.counters[DAOS_ARRAY_CHUNK_SIZE] = inobj->counters[DAOS_ARRAY_CHUNK_SIZE]; + + /* min non-zero (if available) value */ + for(j=DAOS_F_OPEN_START_TIMESTAMP; j<=DAOS_F_CLOSE_START_TIMESTAMP; j++) + { + if((inobj->fcounters[j] < inoutobj->fcounters[j] && + inobj->fcounters[j] > 0) || inoutobj->fcounters[j] == 0) + tmp_obj.fcounters[j] = inobj->fcounters[j]; + else + tmp_obj.fcounters[j] = inoutobj->fcounters[j]; + } + + /* max */ + for(j=DAOS_F_OPEN_END_TIMESTAMP; j<=DAOS_F_CLOSE_END_TIMESTAMP; j++) + { + if(inobj->fcounters[j] > inoutobj->fcounters[j]) + tmp_obj.fcounters[j] = inobj->fcounters[j]; + else + tmp_obj.fcounters[j] = inoutobj->fcounters[j]; + } + + /* sum */ + for(j=DAOS_F_READ_TIME; j<=DAOS_F_META_TIME; j++) + { + tmp_obj.fcounters[j] = inobj->fcounters[j] + inoutobj->fcounters[j]; + } + + /* max (special case) */ + if(inobj->fcounters[DAOS_F_MAX_READ_TIME] > + inoutobj->fcounters[DAOS_F_MAX_READ_TIME]) + { + tmp_obj.fcounters[DAOS_F_MAX_READ_TIME] = + inobj->fcounters[DAOS_F_MAX_READ_TIME]; + tmp_obj.counters[DAOS_MAX_READ_TIME_SIZE] = + inobj->counters[DAOS_MAX_READ_TIME_SIZE]; + } + else + { + tmp_obj.fcounters[DAOS_F_MAX_READ_TIME] = + inoutobj->fcounters[DAOS_F_MAX_READ_TIME]; + tmp_obj.counters[DAOS_MAX_READ_TIME_SIZE] = + inoutobj->counters[DAOS_MAX_READ_TIME_SIZE]; + } + + if(inobj->fcounters[DAOS_F_MAX_WRITE_TIME] > + inoutobj->fcounters[DAOS_F_MAX_WRITE_TIME]) + { + tmp_obj.fcounters[DAOS_F_MAX_WRITE_TIME] = + inobj->fcounters[DAOS_F_MAX_WRITE_TIME]; + tmp_obj.counters[DAOS_MAX_WRITE_TIME_SIZE] = + inobj->counters[DAOS_MAX_WRITE_TIME_SIZE]; + } + else + { + tmp_obj.fcounters[DAOS_F_MAX_WRITE_TIME] = + inoutobj->fcounters[DAOS_F_MAX_WRITE_TIME]; + tmp_obj.counters[DAOS_MAX_WRITE_TIME_SIZE] = + inoutobj->counters[DAOS_MAX_WRITE_TIME_SIZE]; + } + + /* min (zeroes are ok here; some procs don't do I/O) */ + if(inobj->fcounters[DAOS_F_FASTEST_RANK_TIME] < + inoutobj->fcounters[DAOS_F_FASTEST_RANK_TIME]) + { + tmp_obj.counters[DAOS_FASTEST_RANK] = + inobj->counters[DAOS_FASTEST_RANK]; + tmp_obj.counters[DAOS_FASTEST_RANK_BYTES] = + inobj->counters[DAOS_FASTEST_RANK_BYTES]; + tmp_obj.fcounters[DAOS_F_FASTEST_RANK_TIME] = + inobj->fcounters[DAOS_F_FASTEST_RANK_TIME]; + } + else + { + tmp_obj.counters[DAOS_FASTEST_RANK] = + inoutobj->counters[DAOS_FASTEST_RANK]; + tmp_obj.counters[DAOS_FASTEST_RANK_BYTES] = + inoutobj->counters[DAOS_FASTEST_RANK_BYTES]; + tmp_obj.fcounters[DAOS_F_FASTEST_RANK_TIME] = + inoutobj->fcounters[DAOS_F_FASTEST_RANK_TIME]; + } + + /* max */ + if(inobj->fcounters[DAOS_F_SLOWEST_RANK_TIME] > + inoutobj->fcounters[DAOS_F_SLOWEST_RANK_TIME]) + { + tmp_obj.counters[DAOS_SLOWEST_RANK] = + inobj->counters[DAOS_SLOWEST_RANK]; + tmp_obj.counters[DAOS_SLOWEST_RANK_BYTES] = + inobj->counters[DAOS_SLOWEST_RANK_BYTES]; + tmp_obj.fcounters[DAOS_F_SLOWEST_RANK_TIME] = + inobj->fcounters[DAOS_F_SLOWEST_RANK_TIME]; + } + else + { + tmp_obj.counters[DAOS_SLOWEST_RANK] = + inoutobj->counters[DAOS_SLOWEST_RANK]; + tmp_obj.counters[DAOS_SLOWEST_RANK_BYTES] = + inoutobj->counters[DAOS_SLOWEST_RANK_BYTES]; + tmp_obj.fcounters[DAOS_F_SLOWEST_RANK_TIME] = + inoutobj->fcounters[DAOS_F_SLOWEST_RANK_TIME]; + } + + /* update pointers */ + *inoutobj = tmp_obj; + inoutobj++; + inobj++; + } + + return; +} +#endif + +/********************************************************************************* + * shutdown functions exported by this module for coordinating with darshan-core * + *********************************************************************************/ + +#ifdef HAVE_MPI +static void daos_mpi_redux( + void *daos_buf, MPI_Comm mod_comm, + darshan_record_id *shared_recs, int shared_rec_count) +{ + int daos_rec_count; + struct daos_object_record_ref *rec_ref; + struct darshan_daos_object *daos_rec_buf = (struct darshan_daos_object *)daos_buf; + double daos_time; + struct darshan_daos_object *red_send_buf = NULL; + struct darshan_daos_object *red_recv_buf = NULL; + MPI_Datatype red_type; + MPI_Op red_op; + int i; + + DAOS_LOCK(); + assert(daos_runtime); + + daos_rec_count = daos_runtime->obj_rec_count; + + /* necessary initialization of shared records */ + for(i = 0; i < shared_rec_count; i++) + { + rec_ref = darshan_lookup_record_ref(daos_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); + + daos_time = + rec_ref->object_rec->fcounters[DAOS_F_READ_TIME] + + rec_ref->object_rec->fcounters[DAOS_F_WRITE_TIME] + + rec_ref->object_rec->fcounters[DAOS_F_META_TIME]; + + /* initialize fastest/slowest info prior to the reduction */ + rec_ref->object_rec->counters[DAOS_FASTEST_RANK] = + rec_ref->object_rec->base_rec.rank; + rec_ref->object_rec->counters[DAOS_FASTEST_RANK_BYTES] = + rec_ref->object_rec->counters[DAOS_BYTES_READ] + + rec_ref->object_rec->counters[DAOS_BYTES_WRITTEN]; + rec_ref->object_rec->fcounters[DAOS_F_FASTEST_RANK_TIME] = + daos_time; + + /* until reduction occurs, we assume that this rank is both + * the fastest and slowest. It is up to the reduction operator + * to find the true min and max. + */ + rec_ref->object_rec->counters[DAOS_SLOWEST_RANK] = + rec_ref->object_rec->counters[DAOS_FASTEST_RANK]; + rec_ref->object_rec->counters[DAOS_SLOWEST_RANK_BYTES] = + rec_ref->object_rec->counters[DAOS_FASTEST_RANK_BYTES]; + rec_ref->object_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME] = + rec_ref->object_rec->fcounters[DAOS_F_FASTEST_RANK_TIME]; + + rec_ref->object_rec->base_rec.rank = -1; + } + + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array + */ + darshan_record_sort(daos_rec_buf, daos_rec_count, + sizeof(struct darshan_daos_object)); + + /* make send_buf point to the shared records at the end of sorted array */ + red_send_buf = &(daos_rec_buf[daos_rec_count-shared_rec_count]); + + /* allocate memory for the reduction output on rank 0 */ + if(my_rank == 0) + { + red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_daos_object)); + if(!red_recv_buf) + { + DAOS_UNLOCK(); + return; + } + } + + /* construct a datatype for a DAOS object record. This is serving no purpose + * except to make sure we can do a reduction on proper boundaries + */ + PMPI_Type_contiguous(sizeof(struct darshan_daos_object), + MPI_BYTE, &red_type); + PMPI_Type_commit(&red_type); + + /* register a DAOS object record reduction operator */ + PMPI_Op_create(daos_record_reduction_op, 1, &red_op); + + /* reduce shared DAOS object records */ + PMPI_Reduce(red_send_buf, red_recv_buf, + shared_rec_count, red_type, red_op, 0, mod_comm); + + /* update module state to account for shared file reduction */ + if(my_rank == 0) + { + /* overwrite local shared records with globally reduced records */ + int tmp_ndx = daos_rec_count - shared_rec_count; + memcpy(&(daos_rec_buf[tmp_ndx]), red_recv_buf, + shared_rec_count * sizeof(struct darshan_daos_object)); + free(red_recv_buf); + } + else + { + /* drop shared records on non-zero ranks */ + daos_runtime->obj_rec_count -= shared_rec_count; + } + + PMPI_Type_free(&red_type); + PMPI_Op_free(&red_op); + + DAOS_UNLOCK(); + return; +} +#endif + +static void daos_output( + void **daos_buf, int *daos_buf_sz) +{ + int daos_rec_count; + struct darshan_daos_object *daos_rec_buf = *(struct darshan_daos_object **)daos_buf; + int i, j; + int ops; + + DAOS_LOCK(); + assert(daos_runtime); + + daos_rec_count = daos_runtime->obj_rec_count; + + /* filter out records that have been opened, but don't have any + * I/O operations + */ + for(i=0; ifrozen = 1; + + DAOS_UNLOCK(); + return; +} + +static void daos_cleanup() +{ + struct daos_poolcont_info *poolcont_info, *tmp; + + DAOS_LOCK(); + assert(daos_runtime); + + /* cleanup internal structures used for instrumenting */ + darshan_iter_record_refs(daos_runtime->rec_id_hash, + &daos_finalize_object_records, NULL); + darshan_clear_record_refs(&(daos_runtime->oh_hash), 0); + darshan_clear_record_refs(&(daos_runtime->rec_id_hash), 1); + + HASH_ITER(hlink, daos_runtime->poolcont_hash, poolcont_info, tmp) + { + HASH_DELETE(hlink, daos_runtime->poolcont_hash, poolcont_info); + free(poolcont_info); + } + + free(daos_runtime); + daos_runtime = NULL; + daos_runtime_init_attempted = 0; + + DAOS_UNLOCK(); + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/darshan-runtime/lib/darshan-dfs.c b/darshan-runtime/lib/darshan-dfs.c new file mode 100644 index 000000000..4d7bf3964 --- /dev/null +++ b/darshan-runtime/lib/darshan-dfs.c @@ -0,0 +1,1404 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#define _XOPEN_SOURCE 500 +#define _GNU_SOURCE + +#include "darshan-runtime-config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uthash.h" +#include "darshan.h" +#include "darshan-dynamic.h" +#include "darshan-heatmap.h" + +#include +#include +#include +#include +#include +#include +#include + +DARSHAN_FORWARD_DECL(dfs_mount, int, (daos_handle_t poh, daos_handle_t coh, int flags, dfs_t **dfs)); +DARSHAN_FORWARD_DECL(dfs_global2local, int, (daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, dfs_t **dfs)); +DARSHAN_FORWARD_DECL(dfs_umount, int, (dfs_t *dfs)); +DARSHAN_FORWARD_DECL(dfs_lookup, int, (dfs_t *dfs, const char *path, int flags, dfs_obj_t **obj, mode_t *mode, struct stat *stbuf)); +DARSHAN_FORWARD_DECL(dfs_lookup_rel, int, (dfs_t *dfs, dfs_obj_t *parent, const char *name, int flags, dfs_obj_t **obj, mode_t *mode, struct stat *stbuf)); +DARSHAN_FORWARD_DECL(dfs_open, int, (dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, int flags, daos_oclass_id_t cid, daos_size_t chunk_size, const char *value, dfs_obj_t **obj)); +DARSHAN_FORWARD_DECL(dfs_dup, int, (dfs_t *dfs, dfs_obj_t *obj, int flags, dfs_obj_t **new_obj)); +DARSHAN_FORWARD_DECL(dfs_obj_global2local, int, (dfs_t *dfs, int flags, d_iov_t glob, dfs_obj_t **obj)); +DARSHAN_FORWARD_DECL(dfs_release, int, (dfs_obj_t *obj)); +DARSHAN_FORWARD_DECL(dfs_read, int, (dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size_t *read_size, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(dfs_readx, int, (dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_size_t *read_size, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(dfs_write, int, (dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(dfs_writex, int, (dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev)); +DARSHAN_FORWARD_DECL(dfs_get_size, int, (dfs_t *dfs, dfs_obj_t *obj, daos_size_t *size)); +DARSHAN_FORWARD_DECL(dfs_punch, int, (dfs_t *dfs, dfs_obj_t *obj, daos_off_t offset, daos_size_t len)); +DARSHAN_FORWARD_DECL(dfs_remove, int, (dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj_id_t *oid)); +#if 0 +DARSHAN_FORWARD_DECL(dfs_stat, int, (dfs_t *dfs, dfs_obj_t *parent, const char *name, struct stat *stbuf)); +#endif +DARSHAN_FORWARD_DECL(dfs_ostat, int, (dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf)); +DARSHAN_FORWARD_DECL(dfs_osetattr, int, (dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags)); + +/* The dfs_file_record_ref structure maintains necessary runtime metadata + * for the DFS file record (darshan_dfs_file structure, defined in + * darshan-dfs-log-format.h) pointed to by 'file_rec'. This metadata + * assists with the instrumenting of specific statistics in the file record. + * + * RATIONALE: the DFS module needs to track some stateful, volatile + * information about each open file (like the current file offset, most recent + * access time, etc.) to aid in instrumentation, but this information can't be + * stored in the darshan_dfs_file struct because we don't want it to appear in + * the final darshan log file. We therefore associate a dfs_file_record_ref + * struct with each darshan_dfs_file struct in order to track this information + * (i.e., the mapping between dfs_file_record_ref structs to darshan_dfs_file + * structs is one-to-one). + * + * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to + * associate different types of handles with this dfs_file_record_ref struct. + * This allows us to index this struct (and the underlying file record) by using + * either the corresponding Darshan record identifier (derived from the underlying + object OID) or by a DFS file object, for instance. Note that, while there should + * only be a single Darshan record identifier that indexes a dfs_file_record_ref, + * there could be multiple open file objects that index it. + */ +struct dfs_file_record_ref +{ + struct darshan_dfs_file *file_rec; + enum darshan_io_type last_io_type; + double last_meta_end; + double last_read_end; + double last_write_end; + void *access_root; + int access_count; +}; + +struct dfs_mount_info +{ + uuid_t pool_uuid; + uuid_t cont_uuid; + UT_hash_handle hlink; +}; + +struct dfs_runtime +{ + struct dfs_mount_info *mount_hash; + void *rec_id_hash; + void *file_obj_hash; + int file_rec_count; + darshan_record_id heatmap_id; + int frozen; /* flag to indicate that the counters should no longer be modified */ +}; + +static void dfs_runtime_initialize(); +static struct dfs_file_record_ref *dfs_track_new_file_record( + darshan_record_id rec_id, const char *path, struct dfs_mount_info *mnt_info); +static void dfs_finalize_file_records( + void *rec_ref_p, void *user_ptr); +#ifdef HAVE_MPI +static void dfs_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype); +static void dfs_mpi_redux( + void *dfs_buf, MPI_Comm mod_comm, + darshan_record_id *shared_recs, int shared_rec_count); +#endif +static void dfs_output( + void **dfs_buf, int *dfs_buf_sz); +static void dfs_cleanup( + void); + +static struct dfs_runtime *dfs_runtime = NULL; +static pthread_mutex_t dfs_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; +static int dfs_runtime_init_attempted = 0; +static int my_rank = -1; + +#define DFS_LOCK() pthread_mutex_lock(&dfs_runtime_mutex) +#define DFS_UNLOCK() pthread_mutex_unlock(&dfs_runtime_mutex) + +#define DAOS_WTIME() \ + __darshan_disabled ? 0 : darshan_core_wtime(); + +#define DFS_PRE_RECORD() do { \ + if(!ret && !__darshan_disabled) { \ + DFS_LOCK(); \ + if(!dfs_runtime && !dfs_runtime_init_attempted) \ + dfs_runtime_initialize(); \ + if(dfs_runtime && !dfs_runtime->frozen) break; \ + DFS_UNLOCK(); \ + } \ + return(ret); \ +} while(0) + +#define DFS_POST_RECORD() do { \ + DFS_UNLOCK(); \ +} while(0) + +#define DFS_STORE_MOUNT_INFO(__poh, __coh, __dfs_p) do { \ + int __query_ret; \ + daos_pool_info_t __pool_info; \ + daos_cont_info_t __cont_info; \ + struct dfs_mount_info *__mnt_info; \ + __query_ret = daos_pool_query(__poh, NULL, &__pool_info, NULL, NULL); \ + if(__query_ret == 0) { \ + __query_ret = daos_cont_query(__coh, &__cont_info, NULL, NULL); \ + if(__query_ret == 0) { \ + __mnt_info = malloc(sizeof(*__mnt_info)); \ + if(__mnt_info) { \ + uuid_copy(__mnt_info->pool_uuid, __pool_info.pi_uuid); \ + uuid_copy(__mnt_info->cont_uuid, __cont_info.ci_uuid); \ + HASH_ADD_KEYPTR(hlink, dfs_runtime->mount_hash, *__dfs_p, sizeof(void *), __mnt_info); \ + } \ + } \ + } \ +} while(0) + +#define DFS_GET_MOUNT_INFO(__dfs, __mnt_info) \ + HASH_FIND(hlink, dfs_runtime->mount_hash, __dfs, sizeof(void *), __mnt_info) + +#define DFS_FREE_MOUNT_INFO(__mnt_info) do { \ + HASH_DELETE(hlink, dfs_runtime->mount_hash, __mnt_info); \ + free(__mnt_info); \ +} while(0) + +#define DFS_RESOLVE_OBJ_REC_NAME(__parent_obj, __name, __obj_rec_name) do { \ + struct dfs_file_record_ref *__parent_rec_ref; \ + char *__parent_rec_name = NULL; \ + if (__parent_obj) { \ + __parent_rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, \ + &__parent_obj, sizeof(__parent_obj)); \ + if(__parent_rec_ref) \ + __parent_rec_name = darshan_core_lookup_record_name(__parent_rec_ref->file_rec->base_rec.id); \ + } \ + else { \ + __parent_rec_name = "/"; \ + } \ + int __obj_rec_name_len = (__parent_rec_name ? strlen(__parent_rec_name) : 0) + strlen(__name) + 1; \ + __obj_rec_name = malloc(__obj_rec_name_len); \ + if(!__obj_rec_name) break; \ + memset(__obj_rec_name, 0, __obj_rec_name_len); \ + if(__parent_rec_name) \ + strcat(__obj_rec_name, __parent_rec_name); \ + strcat(obj_rec_name, __name); \ +} while(0) + +/* Generate a DFS record ID based on the OID (and pool/container IDS) */ +#define ID_GLOB_SIZE (sizeof(daos_obj_id_t) + (2*sizeof(uuid_t))) +#define DFS_GEN_DARSHAN_REC_ID(__oid_p, __mnt_info, __rec_id) do { \ + unsigned char __id_glob[ID_GLOB_SIZE]; \ + memset(__id_glob, 0, ID_GLOB_SIZE); \ + if(__mnt_info) { \ + memcpy(__id_glob, __mnt_info->pool_uuid, sizeof(uuid_t)); \ + memcpy(__id_glob+sizeof(uuid_t), __mnt_info->cont_uuid, sizeof(uuid_t)); \ + } \ + memcpy(__id_glob+(2*sizeof(uuid_t)), __oid_p, sizeof(*__oid_p)); \ + __rec_id = darshan_hash(__id_glob, ID_GLOB_SIZE, 0); \ +} while(0) + +/* NOTE: the following macro captures details about open(), lookup(), + * and obj_global2local() calls. separate operation counters + * are maintained for each, but all calls share the same floating + * point counters (i.e., OPEN_START_TIMESTAMP, OPEN_END_TIMESTAMP). + */ +#define DFS_RECORD_FILE_OBJ_OPEN(__dfs, __obj_name, __counter, __obj_p, __tm1, __tm2) do { \ + struct dfs_mount_info *__mnt_info; \ + daos_obj_id_t __oid; \ + darshan_record_id __rec_id; \ + struct dfs_file_record_ref *__rec_ref; \ + DFS_GET_MOUNT_INFO(__dfs, __mnt_info); \ + if(dfs_obj2id(*__obj_p, &__oid)) break; \ + DFS_GEN_DARSHAN_REC_ID(&__oid, __mnt_info, __rec_id); \ + __rec_ref = darshan_lookup_record_ref(dfs_runtime->rec_id_hash, &__rec_id, sizeof(__rec_id)); \ + if(!__rec_ref) __rec_ref = dfs_track_new_file_record(__rec_id, __obj_name, __mnt_info); \ + DFS_RECORD_FILE_OBJREF_OPEN(__rec_ref, __counter, __obj_p, __tm1, __tm2); \ +} while(0) + +#define DFS_RECORD_FILE_OBJREF_OPEN(__rec_ref, __counter, __obj_p, __tm1, __tm2) do { \ + if(!__rec_ref) break; \ + __rec_ref->file_rec->counters[__counter] += 1; \ + if(__rec_ref->file_rec->fcounters[DFS_F_OPEN_START_TIMESTAMP] == 0 || \ + __rec_ref->file_rec->fcounters[DFS_F_OPEN_START_TIMESTAMP] > __tm1) \ + __rec_ref->file_rec->fcounters[DFS_F_OPEN_START_TIMESTAMP] = __tm1; \ + __rec_ref->file_rec->fcounters[DFS_F_OPEN_END_TIMESTAMP] = __tm2; \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->file_rec->fcounters[DFS_F_META_TIME], \ + __tm1, __tm2, __rec_ref->last_meta_end); \ + darshan_add_record_ref(&(dfs_runtime->file_obj_hash), __obj_p, sizeof(*__obj_p), __rec_ref); \ +} while(0) + +#define DFS_RECORD_READ(__obj, __read_size, __counter, __is_async, __tm1, __tm2) do { \ + struct dfs_file_record_ref *__rec_ref; \ + struct darshan_common_val_counter *__cvc; \ + double __elapsed = __tm2-__tm1; \ + int64_t __sz = (int64_t)__read_size; \ + daos_size_t __chunk_size; \ + __rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &__obj, sizeof(__obj)); \ + if(!__rec_ref) break; \ + /* heatmap to record traffic summary */ \ + heatmap_update(dfs_runtime->heatmap_id, HEATMAP_READ, __sz, __tm1, __tm2); \ + __rec_ref->file_rec->counters[__counter] += 1; \ + if(__is_async) \ + __rec_ref->file_rec->counters[DFS_NB_READS] += 1; \ + __rec_ref->file_rec->counters[DFS_BYTES_READ] += __sz; \ + DARSHAN_BUCKET_INC(&(__rec_ref->file_rec->counters[DFS_SIZE_READ_0_100]), __sz); \ + __cvc = darshan_track_common_val_counters(&__rec_ref->access_root, &__sz, 1, \ + &__rec_ref->access_count); \ + if(__cvc) DARSHAN_UPDATE_COMMON_VAL_COUNTERS( \ + &(__rec_ref->file_rec->counters[DFS_ACCESS1_ACCESS]), \ + &(__rec_ref->file_rec->counters[DFS_ACCESS1_COUNT]), \ + __cvc->vals, 1, __cvc->freq, 0); \ + if(__rec_ref->last_io_type == DARSHAN_IO_WRITE) \ + __rec_ref->file_rec->counters[DFS_RW_SWITCHES] += 1; \ + __rec_ref->last_io_type = DARSHAN_IO_READ; \ + if(__rec_ref->file_rec->counters[DFS_CHUNK_SIZE] == 0) \ + if(dfs_get_chunk_size(__obj, &__chunk_size) == 0) \ + __rec_ref->file_rec->counters[DFS_CHUNK_SIZE] = __chunk_size; \ + if(__rec_ref->file_rec->fcounters[DFS_F_READ_START_TIMESTAMP] == 0 || \ + __rec_ref->file_rec->fcounters[DFS_F_READ_START_TIMESTAMP] > __tm1) \ + __rec_ref->file_rec->fcounters[DFS_F_READ_START_TIMESTAMP] = __tm1; \ + __rec_ref->file_rec->fcounters[DFS_F_READ_END_TIMESTAMP] = __tm2; \ + if(__rec_ref->file_rec->fcounters[DFS_F_MAX_READ_TIME] < __elapsed) { \ + __rec_ref->file_rec->fcounters[DFS_F_MAX_READ_TIME] = __elapsed; \ + __rec_ref->file_rec->counters[DFS_MAX_READ_TIME_SIZE] = __sz; \ + } \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->file_rec->fcounters[DFS_F_READ_TIME], \ + __tm1, __tm2, __rec_ref->last_read_end); \ +} while(0) + +#define DFS_RECORD_WRITE(__obj, __write_size, __counter, __is_async, __tm1, __tm2) do { \ + struct dfs_file_record_ref *__rec_ref; \ + struct darshan_common_val_counter *__cvc; \ + double __elapsed = __tm2-__tm1; \ + int64_t __sz = (int64_t)__write_size; \ + daos_size_t __chunk_size; \ + __rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &__obj, sizeof(__obj)); \ + if(!__rec_ref) break; \ + /* heatmap to record traffic summary */ \ + heatmap_update(dfs_runtime->heatmap_id, HEATMAP_WRITE, __sz, __tm1, __tm2); \ + __rec_ref->file_rec->counters[__counter] += 1; \ + if(__is_async) \ + __rec_ref->file_rec->counters[DFS_NB_WRITES] += 1; \ + __rec_ref->file_rec->counters[DFS_BYTES_WRITTEN] += __sz; \ + DARSHAN_BUCKET_INC(&(__rec_ref->file_rec->counters[DFS_SIZE_WRITE_0_100]), __sz); \ + __cvc = darshan_track_common_val_counters(&__rec_ref->access_root, &__sz, 1, \ + &__rec_ref->access_count); \ + if(__cvc) DARSHAN_UPDATE_COMMON_VAL_COUNTERS( \ + &(__rec_ref->file_rec->counters[DFS_ACCESS1_ACCESS]), \ + &(__rec_ref->file_rec->counters[DFS_ACCESS1_COUNT]), \ + __cvc->vals, 1, __cvc->freq, 0); \ + if(__rec_ref->last_io_type == DARSHAN_IO_READ) \ + __rec_ref->file_rec->counters[DFS_RW_SWITCHES] += 1; \ + __rec_ref->last_io_type = DARSHAN_IO_WRITE; \ + if(__rec_ref->file_rec->counters[DFS_CHUNK_SIZE] == 0) \ + if(dfs_get_chunk_size(__obj, &__chunk_size) == 0) \ + __rec_ref->file_rec->counters[DFS_CHUNK_SIZE] = __chunk_size; \ + if(__rec_ref->file_rec->fcounters[DFS_F_WRITE_START_TIMESTAMP] == 0 || \ + __rec_ref->file_rec->fcounters[DFS_F_WRITE_START_TIMESTAMP] > __tm1) \ + __rec_ref->file_rec->fcounters[DFS_F_WRITE_START_TIMESTAMP] = __tm1; \ + __rec_ref->file_rec->fcounters[DFS_F_WRITE_END_TIMESTAMP] = __tm2; \ + if(__rec_ref->file_rec->fcounters[DFS_F_MAX_WRITE_TIME] < __elapsed) { \ + __rec_ref->file_rec->fcounters[DFS_F_MAX_WRITE_TIME] = __elapsed; \ + __rec_ref->file_rec->counters[DFS_MAX_WRITE_TIME_SIZE] = __sz; \ + } \ + DARSHAN_TIMER_INC_NO_OVERLAP(__rec_ref->file_rec->fcounters[DFS_F_WRITE_TIME], \ + __tm1, __tm2, __rec_ref->last_write_end); \ +} while(0) + +/***************************************************** + * Wrappers for DAOS functions of interest * + *****************************************************/ + +int DARSHAN_DECL(dfs_mount)(daos_handle_t poh, daos_handle_t coh, int flags, dfs_t **dfs) +{ + int ret; + + MAP_OR_FAIL(dfs_mount); + + ret = __real_dfs_mount(poh, coh, flags, dfs); + + DFS_PRE_RECORD(); + DFS_STORE_MOUNT_INFO(poh, coh, dfs); + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_global2local)(daos_handle_t poh, daos_handle_t coh, int flags, d_iov_t glob, dfs_t **dfs) +{ + int ret; + + MAP_OR_FAIL(dfs_global2local); + + ret = __real_dfs_global2local(poh, coh, flags, glob, dfs); + + DFS_PRE_RECORD(); + DFS_STORE_MOUNT_INFO(poh, coh, dfs); + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_umount)(dfs_t *dfs) +{ + int ret; + struct dfs_mount_info *mnt_info; + + MAP_OR_FAIL(dfs_umount); + + if(!__darshan_disabled) + { + DFS_LOCK(); + if(dfs_runtime && !dfs_runtime->frozen) + { + DFS_GET_MOUNT_INFO(dfs, mnt_info); + if(mnt_info) + DFS_FREE_MOUNT_INFO(mnt_info); + } + DFS_UNLOCK(); + } + + ret = __real_dfs_umount(dfs); + + return(ret); +} + +int DARSHAN_DECL(dfs_lookup)(dfs_t *dfs, const char *path, int flags, dfs_obj_t **obj, mode_t *mode, struct stat *stbuf) +{ + int ret; + double tm1, tm2; + + MAP_OR_FAIL(dfs_lookup); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_lookup(dfs, path, flags, obj, mode, stbuf); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_RECORD_FILE_OBJ_OPEN(dfs, path, DFS_LOOKUPS, obj, tm1, tm2); + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_lookup_rel)(dfs_t *dfs, dfs_obj_t *parent, const char *name, int flags, dfs_obj_t **obj, mode_t *mode, struct stat *stbuf) +{ + int ret; + double tm1, tm2; + char *obj_rec_name = NULL; + + MAP_OR_FAIL(dfs_lookup_rel); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_lookup_rel(dfs, parent, name, flags, obj, mode, stbuf); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_RESOLVE_OBJ_REC_NAME(parent, name, obj_rec_name); + if(obj_rec_name) + { + DFS_RECORD_FILE_OBJ_OPEN(dfs, obj_rec_name, DFS_LOOKUPS, obj, tm1, tm2); + free(obj_rec_name); + } + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_open)(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, int flags, daos_oclass_id_t cid, daos_size_t chunk_size, const char *value, dfs_obj_t **obj) +{ + int ret; + double tm1, tm2; + char *obj_rec_name = NULL; + + MAP_OR_FAIL(dfs_open); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_open(dfs, parent, name, mode, flags, cid, chunk_size, value, obj); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_RESOLVE_OBJ_REC_NAME(parent, name, obj_rec_name); + if(obj_rec_name) + { + DFS_RECORD_FILE_OBJ_OPEN(dfs, obj_rec_name, DFS_OPENS, obj, tm1, tm2); + free(obj_rec_name); + } + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_dup)(dfs_t *dfs, dfs_obj_t *obj, int flags, dfs_obj_t **new_obj) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_dup); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_dup(dfs, obj, flags, new_obj); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + DFS_RECORD_FILE_OBJREF_OPEN(rec_ref, DFS_DUPS, new_obj, tm1, tm2); + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_obj_global2local)(dfs_t *dfs, int flags, d_iov_t glob, dfs_obj_t **obj) +{ + int ret; + double tm1, tm2; + char *obj_rec_name = NULL; + + MAP_OR_FAIL(dfs_obj_global2local); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_obj_global2local(dfs, flags, glob, obj); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_RECORD_FILE_OBJ_OPEN(dfs, obj_rec_name, DFS_GLOBAL_OPENS, obj, tm1, tm2); + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_release)(dfs_obj_t *obj) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_release); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_release(obj); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + if(rec_ref) + { + if(rec_ref->file_rec->fcounters[DFS_F_CLOSE_START_TIMESTAMP] == 0 || + rec_ref->file_rec->fcounters[DFS_F_CLOSE_START_TIMESTAMP] > tm1) + rec_ref->file_rec->fcounters[DFS_F_CLOSE_START_TIMESTAMP] = tm1; + rec_ref->file_rec->fcounters[DFS_F_CLOSE_END_TIMESTAMP] = tm2; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + darshan_delete_record_ref(&(dfs_runtime->file_obj_hash), &obj, sizeof(obj)); + } + DFS_POST_RECORD(); + + return(ret); +} + +/* DAOS callback routine to measure end of async read calls */ +struct dfs_read_event_tracker +{ + double tm1; + dfs_obj_t *obj; + int op; + daos_size_t *read_size; +}; +int darshan_dfs_read_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct dfs_read_event_tracker *tracker = (struct dfs_read_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + DFS_RECORD_READ(tracker->obj, *(tracker->read_size), tracker->op, 1, tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +int DARSHAN_DECL(dfs_read)(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size_t *read_size, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t rdsize; + + MAP_OR_FAIL(dfs_read); + + if (ev) + { + /* setup callback to record the read operation upon completion */ + struct dfs_read_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->obj = obj; + tracker->op = DFS_READS; + tracker->read_size = read_size; + daos_event_register_comp_cb(ev, darshan_dfs_read_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_dfs_read(dfs, obj, sgl, off, read_size, ev); + tm2 = DAOS_WTIME(); + + if (!ev) + { + /* only record here for synchronous I/O operations */ + DFS_PRE_RECORD(); + /* no need to calculate read_size, it's returned to user */ + rdsize = *read_size; + DFS_RECORD_READ(obj, rdsize, DFS_READS, 0, tm1, tm2); + DFS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(dfs_readx)(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_size_t *read_size, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t rdsize; + + MAP_OR_FAIL(dfs_readx); + + if (ev) + { + /* setup callback to record the read operation upon completion */ + struct dfs_read_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->obj = obj; + tracker->op = DFS_READXS; + tracker->read_size = read_size; + daos_event_register_comp_cb(ev, darshan_dfs_read_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_dfs_readx(dfs, obj, iod, sgl, read_size, ev); + tm2 = DAOS_WTIME(); + + if (!ev) + { + /* only record here for synchronous I/O operations */ + DFS_PRE_RECORD(); + /* no need to calculate read_size, it's returned to user */ + rdsize = *read_size; + DFS_RECORD_READ(obj, rdsize, DFS_READXS, 0, tm1, tm2); + DFS_POST_RECORD(); + } + + return(ret); +} + +/* DAOS callback routine to measure end of async write calls */ +struct dfs_write_event_tracker +{ + double tm1; + dfs_obj_t *obj; + int op; + daos_size_t write_size; +}; +int darshan_dfs_write_comp_cb(void *arg, daos_event_t *ev, int ret) +{ + struct dfs_write_event_tracker *tracker = (struct dfs_write_event_tracker *)arg; + + if (ret == 0) + { + /* async operation completed successfully, capture Darshan statistics */ + double tm2 = darshan_core_wtime(); + DFS_RECORD_WRITE(tracker->obj, tracker->write_size, tracker->op, 1, tracker->tm1, tm2); + } + free(tracker); + + return 0; +} + +int DARSHAN_DECL(dfs_write)(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t wrsize; + int i; + + MAP_OR_FAIL(dfs_write); + + /* calculate write size first */ + for (i = 0, wrsize = 0; i < sgl->sg_nr; i++) + wrsize += sgl->sg_iovs[i].iov_len; + + if (ev) + { + /* setup callback to record the write operation upon completion */ + struct dfs_write_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->obj = obj; + tracker->op = DFS_WRITES; + tracker->write_size = wrsize; + daos_event_register_comp_cb(ev, darshan_dfs_write_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_dfs_write(dfs, obj, sgl, off, ev); + tm2 = DAOS_WTIME(); + + if (!ev) + { + /* only record here for synchronous I/O operations */ + DFS_PRE_RECORD(); + DFS_RECORD_WRITE(obj, wrsize, DFS_WRITES, 0, tm1, tm2); + DFS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(dfs_writex)(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_event_t *ev) +{ + int ret; + double tm1, tm2; + daos_size_t wrsize; + int i; + + MAP_OR_FAIL(dfs_writex); + + /* calculate write size first */ + for (i = 0, wrsize = 0; i < sgl->sg_nr; i++) + wrsize += sgl->sg_iovs[i].iov_len; + + if (ev) + { + /* setup callback to record the write operation upon completion */ + struct dfs_write_event_tracker *tracker = malloc(sizeof(*tracker)); + if (tracker) + { + tracker->tm1 = DAOS_WTIME(); + tracker->obj = obj; + tracker->op = DFS_WRITEXS; + tracker->write_size = wrsize; + daos_event_register_comp_cb(ev, darshan_dfs_write_comp_cb, tracker); + } + } + + tm1 = DAOS_WTIME(); + ret = __real_dfs_writex(dfs, obj, iod, sgl, ev); + tm2 = DAOS_WTIME(); + + if (!ev) + { + /* only record here for synchronous I/O operations */ + DFS_PRE_RECORD(); + DFS_RECORD_WRITE(obj, wrsize, DFS_WRITEXS, 0, tm1, tm2); + DFS_POST_RECORD(); + } + + return(ret); +} + +int DARSHAN_DECL(dfs_get_size)(dfs_t *dfs, dfs_obj_t *obj, daos_size_t *size) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_get_size); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_get_size(dfs, obj, size); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_GET_SIZES] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_punch)(dfs_t *dfs, dfs_obj_t *obj, daos_off_t offset, daos_size_t len) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_punch); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_punch(dfs, obj, offset, len); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_PUNCHES] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_remove)(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, + daos_obj_id_t *oid) +{ + int ret; + double tm1, tm2; + daos_obj_id_t the_oid; + struct dfs_mount_info *mnt_info; + darshan_record_id rec_id; + struct dfs_file_record_ref *rec_ref = NULL; + char *obj_rec_name = NULL; + + /* ask for the OID if user doesn't -- used to compute record ID */ + if(!oid) + oid = &the_oid; + + MAP_OR_FAIL(dfs_remove); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_remove(dfs, parent, name, force, oid); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_GET_MOUNT_INFO(dfs, mnt_info); + DFS_GEN_DARSHAN_REC_ID(oid, mnt_info, rec_id); + rec_ref = darshan_lookup_record_ref(dfs_runtime->rec_id_hash, + &rec_id, sizeof(rec_id)); + if(!rec_ref) + { + DFS_RESOLVE_OBJ_REC_NAME(parent, name, obj_rec_name); + if(obj_rec_name) + { + rec_ref = dfs_track_new_file_record(rec_id, obj_rec_name, mnt_info); + free(obj_rec_name); + } + } + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_REMOVES] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + DFS_POST_RECORD(); + + return(ret); +} + +#if 0 +/* XXX: we can't instrument this call because we have no way to obtain + * the associated OID, which is used to lookup the Darshan record + */ +int DARSHAN_DECL(dfs_stat)(dfs_t *dfs, dfs_obj_t *parent, const char *name, struct stat *stbuf) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + char *parent_rec_name, *rec_name; + int rec_len; + darshan_record_id rec_id; + + MAP_OR_FAIL(dfs_stat); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_stat(dfs, parent, name, stbuf); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + DFS_RESOLVE_PARENT_REC_NAME(dfs, parent, parent_rec_name); + if(parent_rec_name) + { + rec_len = strlen(parent_rec_name) + strlen(name) + 1; + rec_name = malloc(rec_len); + if(rec_name) + { + memset(rec_name, 0, rec_len); + strcat(rec_name, parent_rec_name); + strcat(rec_name, name); + rec_id = darshan_core_gen_record_id(rec_name); + rec_ref = darshan_lookup_record_ref(dfs_runtime->rec_id_hash, &rec_id, sizeof(rec_id)); + if(!rec_ref) rec_ref = dfs_track_new_file_record(rec_id, rec_name); + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_STATS] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + free(rec_name); + } + if(!parent) free(parent_rec_name); + } + DFS_POST_RECORD(); + + return(ret); +} +#endif + +int DARSHAN_DECL(dfs_ostat)(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_ostat); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_ostat(dfs, obj, stbuf); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_STATS] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + DFS_POST_RECORD(); + + return(ret); +} + +int DARSHAN_DECL(dfs_osetattr)(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) +{ + int ret; + double tm1, tm2; + struct dfs_file_record_ref *rec_ref = NULL; + + MAP_OR_FAIL(dfs_osetattr); + + tm1 = DAOS_WTIME(); + ret = __real_dfs_osetattr(dfs, obj, stbuf, flags); + tm2 = DAOS_WTIME(); + + DFS_PRE_RECORD(); + rec_ref = darshan_lookup_record_ref(dfs_runtime->file_obj_hash, &obj, sizeof(obj)); + if(rec_ref) + { + rec_ref->file_rec->counters[DFS_STATS] += 1; + DARSHAN_TIMER_INC_NO_OVERLAP( + rec_ref->file_rec->fcounters[DFS_F_META_TIME], + tm1, tm2, rec_ref->last_meta_end); + } + DFS_POST_RECORD(); + + return(ret); +} + +/********************************************************* + * Internal functions for manipulating DAOS module state * + *********************************************************/ + +static void dfs_runtime_initialize() +{ + int ret; + size_t dfs_rec_count; + darshan_module_funcs mod_funcs = { +#ifdef HAVE_MPI + .mod_redux_func = &dfs_mpi_redux, +#endif + .mod_output_func = &dfs_output, + .mod_cleanup_func = &dfs_cleanup + }; + + /* if this attempt at initializing fails, we won't try again */ + dfs_runtime_init_attempted = 1; + + /* try to store a default number of records for this module */ + dfs_rec_count = DARSHAN_DEF_MOD_REC_COUNT; + + /* register the DFS module with darshan core */ + ret = darshan_core_register_module( + DARSHAN_DFS_MOD, + mod_funcs, + sizeof(struct darshan_dfs_file), + &dfs_rec_count, + &my_rank, + NULL); + if(ret < 0) + return; + + dfs_runtime = malloc(sizeof(*dfs_runtime)); + if(!dfs_runtime) + { + darshan_core_unregister_module(DARSHAN_DFS_MOD); + return; + } + memset(dfs_runtime, 0, sizeof(*dfs_runtime)); + + /* register a heatmap */ + dfs_runtime->heatmap_id = heatmap_register("heatmap:DFS"); + + return; +} + +static struct dfs_file_record_ref *dfs_track_new_file_record( + darshan_record_id rec_id, const char *path, struct dfs_mount_info *mnt_info) +{ + struct darshan_dfs_file *file_rec = NULL; + struct dfs_file_record_ref *rec_ref = NULL; + int ret; + + rec_ref = malloc(sizeof(*rec_ref)); + if(!rec_ref) + return(NULL); + memset(rec_ref, 0, sizeof(*rec_ref)); + + /* add a reference to this file record based on record id */ + ret = darshan_add_record_ref(&(dfs_runtime->rec_id_hash), &rec_id, + sizeof(darshan_record_id), rec_ref); + if(ret == 0) + { + free(rec_ref); + return(NULL); + } + + /* register the actual file record with darshan-core so it is persisted + * in the log file + */ + file_rec = darshan_core_register_record( + rec_id, + path, + DARSHAN_DFS_MOD, + sizeof(struct darshan_dfs_file), + NULL); + + if(!file_rec) + { + darshan_delete_record_ref(&(dfs_runtime->rec_id_hash), + &rec_id, sizeof(darshan_record_id)); + free(rec_ref); + return(NULL); + } + + /* registering this file record was successful, so initialize some fields */ + file_rec->base_rec.id = rec_id; + file_rec->base_rec.rank = my_rank; + if(mnt_info) + { + uuid_copy(file_rec->pool_uuid, mnt_info->pool_uuid); + uuid_copy(file_rec->cont_uuid, mnt_info->cont_uuid); + } + rec_ref->file_rec = file_rec; + dfs_runtime->file_rec_count++; + + return(rec_ref); +} + +static void dfs_finalize_file_records(void *rec_ref_p, void *user_ptr) +{ + struct dfs_file_record_ref *rec_ref = + (struct dfs_file_record_ref *)rec_ref_p; + + tdestroy(rec_ref->access_root, free); + return; +} + +#ifdef HAVE_MPI +static void dfs_record_reduction_op( + void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype) +{ + struct darshan_dfs_file tmp_file; + struct darshan_dfs_file *infile = infile_v; + struct darshan_dfs_file *inoutfile = inoutfile_v; + int i, j, k; + + for(i=0; i<*len; i++) + { + memset(&tmp_file, 0, sizeof(struct darshan_dfs_file)); + tmp_file.base_rec.id = infile->base_rec.id; + tmp_file.base_rec.rank = -1; + uuid_copy(tmp_file.pool_uuid, infile->pool_uuid); + uuid_copy(tmp_file.cont_uuid, infile->cont_uuid); + + /* sum */ + for(j=DFS_OPENS; j<=DFS_RW_SWITCHES; j++) + { + tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j]; + if(tmp_file.counters[j] < 0) /* make sure invalid counters are -1 exactly */ + tmp_file.counters[j] = -1; + } + + /* skip DFS_MAX_*_TIME_SIZE; handled in floating point section */ + + for(j=DFS_SIZE_READ_0_100; j<=DFS_SIZE_WRITE_1G_PLUS; j++) + { + tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j]; + } + + /* common access counters */ + + /* first collapse any duplicates */ + for(j=DFS_ACCESS1_ACCESS; j<=DFS_ACCESS4_ACCESS; j++) + { + for(k=DFS_ACCESS1_ACCESS; k<=DFS_ACCESS4_ACCESS; k++) + { + if(infile->counters[j] == inoutfile->counters[k]) + { + infile->counters[j+4] += inoutfile->counters[k+4]; + inoutfile->counters[k] = 0; + inoutfile->counters[k+4] = 0; + } + } + } + + /* first set */ + for(j=DFS_ACCESS1_ACCESS; j<=DFS_ACCESS4_ACCESS; j++) + { + DARSHAN_UPDATE_COMMON_VAL_COUNTERS( + &(tmp_file.counters[DFS_ACCESS1_ACCESS]), + &(tmp_file.counters[DFS_ACCESS1_COUNT]), + &infile->counters[j], 1, infile->counters[j+4], 1); + } + /* second set */ + for(j=DFS_ACCESS1_ACCESS; j<=DFS_ACCESS4_ACCESS; j++) + { + DARSHAN_UPDATE_COMMON_VAL_COUNTERS( + &(tmp_file.counters[DFS_ACCESS1_ACCESS]), + &(tmp_file.counters[DFS_ACCESS1_COUNT]), + &inoutfile->counters[j], 1, inoutfile->counters[j+4], 1); + } + + tmp_file.counters[DFS_CHUNK_SIZE] = infile->counters[DFS_CHUNK_SIZE]; + + /* min non-zero (if available) value */ + for(j=DFS_F_OPEN_START_TIMESTAMP; j<=DFS_F_CLOSE_START_TIMESTAMP; j++) + { + if((infile->fcounters[j] < inoutfile->fcounters[j] && + infile->fcounters[j] > 0) || inoutfile->fcounters[j] == 0) + tmp_file.fcounters[j] = infile->fcounters[j]; + else + tmp_file.fcounters[j] = inoutfile->fcounters[j]; + } + + /* max */ + for(j=DFS_F_OPEN_END_TIMESTAMP; j<=DFS_F_CLOSE_END_TIMESTAMP; j++) + { + if(infile->fcounters[j] > inoutfile->fcounters[j]) + tmp_file.fcounters[j] = infile->fcounters[j]; + else + tmp_file.fcounters[j] = inoutfile->fcounters[j]; + } + + /* sum */ + for(j=DFS_F_READ_TIME; j<=DFS_F_META_TIME; j++) + { + tmp_file.fcounters[j] = infile->fcounters[j] + inoutfile->fcounters[j]; + } + + /* max (special case) */ + if(infile->fcounters[DFS_F_MAX_READ_TIME] > + inoutfile->fcounters[DFS_F_MAX_READ_TIME]) + { + tmp_file.fcounters[DFS_F_MAX_READ_TIME] = + infile->fcounters[DFS_F_MAX_READ_TIME]; + tmp_file.counters[DFS_MAX_READ_TIME_SIZE] = + infile->counters[DFS_MAX_READ_TIME_SIZE]; + } + else + { + tmp_file.fcounters[DFS_F_MAX_READ_TIME] = + inoutfile->fcounters[DFS_F_MAX_READ_TIME]; + tmp_file.counters[DFS_MAX_READ_TIME_SIZE] = + inoutfile->counters[DFS_MAX_READ_TIME_SIZE]; + } + + if(infile->fcounters[DFS_F_MAX_WRITE_TIME] > + inoutfile->fcounters[DFS_F_MAX_WRITE_TIME]) + { + tmp_file.fcounters[DFS_F_MAX_WRITE_TIME] = + infile->fcounters[DFS_F_MAX_WRITE_TIME]; + tmp_file.counters[DFS_MAX_WRITE_TIME_SIZE] = + infile->counters[DFS_MAX_WRITE_TIME_SIZE]; + } + else + { + tmp_file.fcounters[DFS_F_MAX_WRITE_TIME] = + inoutfile->fcounters[DFS_F_MAX_WRITE_TIME]; + tmp_file.counters[DFS_MAX_WRITE_TIME_SIZE] = + inoutfile->counters[DFS_MAX_WRITE_TIME_SIZE]; + } + + /* min (zeroes are ok here; some procs don't do I/O) */ + if(infile->fcounters[DFS_F_FASTEST_RANK_TIME] < + inoutfile->fcounters[DFS_F_FASTEST_RANK_TIME]) + { + tmp_file.counters[DFS_FASTEST_RANK] = + infile->counters[DFS_FASTEST_RANK]; + tmp_file.counters[DFS_FASTEST_RANK_BYTES] = + infile->counters[DFS_FASTEST_RANK_BYTES]; + tmp_file.fcounters[DFS_F_FASTEST_RANK_TIME] = + infile->fcounters[DFS_F_FASTEST_RANK_TIME]; + } + else + { + tmp_file.counters[DFS_FASTEST_RANK] = + inoutfile->counters[DFS_FASTEST_RANK]; + tmp_file.counters[DFS_FASTEST_RANK_BYTES] = + inoutfile->counters[DFS_FASTEST_RANK_BYTES]; + tmp_file.fcounters[DFS_F_FASTEST_RANK_TIME] = + inoutfile->fcounters[DFS_F_FASTEST_RANK_TIME]; + } + + /* max */ + if(infile->fcounters[DFS_F_SLOWEST_RANK_TIME] > + inoutfile->fcounters[DFS_F_SLOWEST_RANK_TIME]) + { + tmp_file.counters[DFS_SLOWEST_RANK] = + infile->counters[DFS_SLOWEST_RANK]; + tmp_file.counters[DFS_SLOWEST_RANK_BYTES] = + infile->counters[DFS_SLOWEST_RANK_BYTES]; + tmp_file.fcounters[DFS_F_SLOWEST_RANK_TIME] = + infile->fcounters[DFS_F_SLOWEST_RANK_TIME]; + } + else + { + tmp_file.counters[DFS_SLOWEST_RANK] = + inoutfile->counters[DFS_SLOWEST_RANK]; + tmp_file.counters[DFS_SLOWEST_RANK_BYTES] = + inoutfile->counters[DFS_SLOWEST_RANK_BYTES]; + tmp_file.fcounters[DFS_F_SLOWEST_RANK_TIME] = + inoutfile->fcounters[DFS_F_SLOWEST_RANK_TIME]; + } + + /* update pointers */ + *inoutfile = tmp_file; + inoutfile++; + infile++; + } + + return; +} +#endif + +/********************************************************************************* + * shutdown functions exported by this module for coordinating with darshan-core * + *********************************************************************************/ + +#ifdef HAVE_MPI +static void dfs_mpi_redux( + void *dfs_buf, MPI_Comm mod_comm, + darshan_record_id *shared_recs, int shared_rec_count) +{ + int dfs_rec_count; + struct dfs_file_record_ref *rec_ref; + struct darshan_dfs_file *dfs_rec_buf = (struct darshan_dfs_file *)dfs_buf; + double dfs_time; + struct darshan_dfs_file *red_send_buf = NULL; + struct darshan_dfs_file *red_recv_buf = NULL; + MPI_Datatype red_type; + MPI_Op red_op; + int i; + + DFS_LOCK(); + assert(dfs_runtime); + + dfs_rec_count = dfs_runtime->file_rec_count; + + /* necessary initialization of shared records */ + for(i = 0; i < shared_rec_count; i++) + { + rec_ref = darshan_lookup_record_ref(dfs_runtime->rec_id_hash, + &shared_recs[i], sizeof(darshan_record_id)); + assert(rec_ref); + + dfs_time = + rec_ref->file_rec->fcounters[DFS_F_READ_TIME] + + rec_ref->file_rec->fcounters[DFS_F_WRITE_TIME] + + rec_ref->file_rec->fcounters[DFS_F_META_TIME]; + + /* initialize fastest/slowest info prior to the reduction */ + rec_ref->file_rec->counters[DFS_FASTEST_RANK] = + rec_ref->file_rec->base_rec.rank; + rec_ref->file_rec->counters[DFS_FASTEST_RANK_BYTES] = + rec_ref->file_rec->counters[DFS_BYTES_READ] + + rec_ref->file_rec->counters[DFS_BYTES_WRITTEN]; + rec_ref->file_rec->fcounters[DFS_F_FASTEST_RANK_TIME] = + dfs_time; + + /* until reduction occurs, we assume that this rank is both + * the fastest and slowest. It is up to the reduction operator + * to find the true min and max. + */ + rec_ref->file_rec->counters[DFS_SLOWEST_RANK] = + rec_ref->file_rec->counters[DFS_FASTEST_RANK]; + rec_ref->file_rec->counters[DFS_SLOWEST_RANK_BYTES] = + rec_ref->file_rec->counters[DFS_FASTEST_RANK_BYTES]; + rec_ref->file_rec->fcounters[DFS_F_SLOWEST_RANK_TIME] = + rec_ref->file_rec->fcounters[DFS_F_FASTEST_RANK_TIME]; + + rec_ref->file_rec->base_rec.rank = -1; + } + + /* sort the array of records so we get all of the shared records + * (marked by rank -1) in a contiguous portion at end of the array + */ + darshan_record_sort(dfs_rec_buf, dfs_rec_count, + sizeof(struct darshan_dfs_file)); + + /* make send_buf point to the shared files at the end of sorted array */ + red_send_buf = &(dfs_rec_buf[dfs_rec_count-shared_rec_count]); + + /* allocate memory for the reduction output on rank 0 */ + if(my_rank == 0) + { + red_recv_buf = malloc(shared_rec_count * sizeof(struct darshan_dfs_file)); + if(!red_recv_buf) + { + DFS_UNLOCK(); + return; + } + } + + /* construct a datatype for a DFS file record. This is serving no purpose + * except to make sure we can do a reduction on proper boundaries + */ + PMPI_Type_contiguous(sizeof(struct darshan_dfs_file), + MPI_BYTE, &red_type); + PMPI_Type_commit(&red_type); + + /* register a DFS file record reduction operator */ + PMPI_Op_create(dfs_record_reduction_op, 1, &red_op); + + /* reduce shared DFS file records */ + PMPI_Reduce(red_send_buf, red_recv_buf, + shared_rec_count, red_type, red_op, 0, mod_comm); + + /* update module state to account for shared file reduction */ + if(my_rank == 0) + { + /* overwrite local shared records with globally reduced records */ + int tmp_ndx = dfs_rec_count - shared_rec_count; + memcpy(&(dfs_rec_buf[tmp_ndx]), red_recv_buf, + shared_rec_count * sizeof(struct darshan_dfs_file)); + free(red_recv_buf); + } + else + { + /* drop shared records on non-zero ranks */ + dfs_runtime->file_rec_count -= shared_rec_count; + } + + PMPI_Type_free(&red_type); + PMPI_Op_free(&red_op); + + DFS_UNLOCK(); + return; +} +#endif + +static void dfs_output( + void **dfs_buf, int *dfs_buf_sz) +{ + int dfs_rec_count; + struct darshan_dfs_file *dfs_rec_buf = *(struct darshan_dfs_file **)dfs_buf; + int i, j; + int ops; + + DFS_LOCK(); + assert(dfs_runtime); + + dfs_rec_count = dfs_runtime->file_rec_count; + + /* filter out records that have been opened, but don't have any + * I/O operations (e.g, open directories, etc.) + */ + for(i=0; ifrozen = 1; + + DFS_UNLOCK(); + return; +} + +static void dfs_cleanup() +{ + struct dfs_mount_info *mnt_info, *tmp; + + DFS_LOCK(); + assert(dfs_runtime); + + /* cleanup internal structures used for instrumenting */ + darshan_iter_record_refs(dfs_runtime->rec_id_hash, + &dfs_finalize_file_records, NULL); + darshan_clear_record_refs(&(dfs_runtime->file_obj_hash), 0); + darshan_clear_record_refs(&(dfs_runtime->rec_id_hash), 1); + + HASH_ITER(hlink, dfs_runtime->mount_hash, mnt_info, tmp) + { + HASH_DELETE(hlink, dfs_runtime->mount_hash, mnt_info); + free(mnt_info); + } + + free(dfs_runtime); + dfs_runtime = NULL; + dfs_runtime_init_attempted = 0; + + DFS_UNLOCK(); + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/darshan-runtime/share/ld-opts/Makefile.am b/darshan-runtime/share/ld-opts/Makefile.am index aa053bbd7..281de77f0 100644 --- a/darshan-runtime/share/ld-opts/Makefile.am +++ b/darshan-runtime/share/ld-opts/Makefile.am @@ -28,6 +28,9 @@ endif if BUILD_MDHIM_MODULE dist_ld_opts_DATA += darshan-mdhim-ld-opts endif +if BUILD_DAOS_MODULE + dist_ld_opts_DATA += darshan-dfs-ld-opts darshan-daos-ld-opts +endif if BUILD_APMPI_MODULE nodist_ld_opts_DATA += autoperf-apmpi-ld-opts BUILT_SOURCES += autoperf-apmpi-ld-opts @@ -57,6 +60,10 @@ endif if BUILD_MDHIM_MODULE echo '@$(datadir)/ld-opts/darshan-mdhim-ld-opts' >> $@ endif +if BUILD_DAOS_MODULE + echo '@$(datadir)/ld-opts/darshan-dfs-ld-opts' >> $@ + echo '@$(datadir)/ld-opts/darshan-daos-ld-opts' >> $@ +endif if BUILD_APMPI_MODULE echo '@$(datadir)/ld-opts/autoperf-apmpi-ld-opts' >> $@ endif diff --git a/darshan-runtime/share/ld-opts/darshan-daos-ld-opts b/darshan-runtime/share/ld-opts/darshan-daos-ld-opts new file mode 100644 index 000000000..618d95ed5 --- /dev/null +++ b/darshan-runtime/share/ld-opts/darshan-daos-ld-opts @@ -0,0 +1,31 @@ +--wrap=daos_cont_open +--wrap=daos_cont_global2local +--wrap=daos_cont_close +--wrap=daos_obj_open +--wrap=daos_obj_fetch +--wrap=daos_obj_update +--wrap=daos_obj_punch +--wrap=daos_obj_punch_dkeys +--wrap=daos_obj_punch_akeys +--wrap=daos_obj_list_dkey +--wrap=daos_obj_list_akey +--wrap=daos_obj_list_recx +--wrap=daos_obj_close +--wrap=daos_array_create +--wrap=daos_array_open +--wrap=daos_array_open_with_attr +--wrap=daos_array_read +--wrap=daos_array_write +--wrap=daos_array_get_size +--wrap=daos_array_set_size +--wrap=daos_array_stat +--wrap=daos_array_punch +--wrap=daos_array_destroy +--wrap=daos_array_close +--wrap=daos_kv_open +--wrap=daos_kv_get +--wrap=daos_kv_put +--wrap=daos_kv_remove +--wrap=daos_kv_list +--wrap=daos_kv_destroy +--wrap=daos_kv_close diff --git a/darshan-runtime/share/ld-opts/darshan-dfs-ld-opts b/darshan-runtime/share/ld-opts/darshan-dfs-ld-opts new file mode 100644 index 000000000..41473984c --- /dev/null +++ b/darshan-runtime/share/ld-opts/darshan-dfs-ld-opts @@ -0,0 +1,18 @@ +--wrap=dfs_mount +--wrap=dfs_global2local +--wrap=dfs_umount +--wrap=dfs_lookup +--wrap=dfs_lookup_rel +--wrap=dfs_open +--wrap=dfs_dup +--wrap=dfs_obj_global2local +--wrap=dfs_release +--wrap=dfs_read +--wrap=dfs_readx +--wrap=dfs_write +--wrap=dfs_writex +--wrap=dfs_get_size +--wrap=dfs_punch +--wrap=dfs_remove +--wrap=dfs_ostat +--wrap=dfs_osetattr diff --git a/darshan-util/Makefile.am b/darshan-util/Makefile.am index 2a28e50ae..1132b94de 100644 --- a/darshan-util/Makefile.am +++ b/darshan-util/Makefile.am @@ -22,6 +22,8 @@ libdarshan_util_la_SOURCES = darshan-null-logutils.c \ darshan-dxt-logutils.c \ darshan-heatmap-logutils.c \ darshan-mdhim-logutils.c \ + darshan-dfs-logutils.c \ + darshan-daos-logutils.c \ darshan-logutils-accumulator.c include_HEADERS = darshan-null-logutils.h \ @@ -36,6 +38,8 @@ include_HEADERS = darshan-null-logutils.h \ darshan-dxt-logutils.h \ darshan-heatmap-logutils.h \ darshan-mdhim-logutils.h \ + darshan-dfs-logutils.h \ + darshan-daos-logutils.h \ ../include/darshan-bgq-log-format.h \ ../include/darshan-dxt-log-format.h \ ../include/darshan-heatmap-log-format.h \ @@ -47,7 +51,9 @@ include_HEADERS = darshan-null-logutils.h \ ../include/darshan-null-log-format.h \ ../include/darshan-pnetcdf-log-format.h \ ../include/darshan-posix-log-format.h \ - ../include/darshan-stdio-log-format.h + ../include/darshan-stdio-log-format.h \ + ../include/darshan-dfs-log-format.h \ + ../include/darshan-daos-log-format.h bin_PROGRAMS = darshan-analyzer \ darshan-convert \ @@ -58,8 +64,6 @@ bin_PROGRAMS = darshan-analyzer \ noinst_PROGRAMS = jenkins-hash-gen -# LIBS += @LIBBZ2@ - jenkins_hash_gen_SOURCES = jenkins-hash-gen.c lookup3.c jenkins_hash_gen_LDADD = libdarshan-util.la diff --git a/darshan-util/configure.ac b/darshan-util/configure.ac index bdec8d7a2..26764d0b0 100644 --- a/darshan-util/configure.ac +++ b/darshan-util/configure.ac @@ -40,6 +40,10 @@ if test "x$enable_darshan_util" = xyes ; then # bz2 is optional CHECK_BZLIB + # uuid headers/library are optional dependencies for DAOS modules + AC_CHECK_HEADER([uuid/uuid.h], + [AC_CHECK_LIB([uuid], [uuid_unparse])]) + # checks to see how we can print 64 bit values on this architecture gt_INTTYPES_PRI if test "x$PRI_MACROS_BROKEN" = x1 ; then diff --git a/darshan-util/darshan-daos-logutils.c b/darshan-util/darshan-daos-logutils.c new file mode 100644 index 000000000..2b0f8b693 --- /dev/null +++ b/darshan-util/darshan-daos-logutils.c @@ -0,0 +1,687 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#define _GNU_SOURCE +#include "darshan-util-config.h" +#include +#include +#include +#include +#include +#include +#include + +#include "darshan-logutils.h" + +#ifdef HAVE_LIBUUID +#include +#endif + +/* counter name strings for the DAOS module */ +#define X(a) #a, +char *daos_counter_names[] = { + DAOS_COUNTERS +}; + +char *daos_f_counter_names[] = { + DAOS_F_COUNTERS +}; +#undef X + +static int darshan_log_get_daos_object(darshan_fd fd, void** daos_buf_p); +static int darshan_log_put_daos_object(darshan_fd fd, void* daos_buf); +static void darshan_log_print_daos_object(void *object_rec, + char *object_name, char *mnt_pt, char *fs_type); +static void darshan_log_print_daos_description(int ver); +static void darshan_log_print_daos_object_diff(void *obj_rec1, char *obj_name1, + void *obj_rec2, char *obj_name2); +static void darshan_log_agg_daos_objects(void *rec, void *agg_rec, int init_flag); +static int darshan_log_sizeof_daos_object(void* daos_buf_p); +static int darshan_log_record_metrics_daos_object(void* daos_buf_p, + uint64_t* rec_id, + int64_t* r_bytes, + int64_t* w_bytes, + int64_t* max_offset, + double* io_total_time, + double* md_only_time, + double* rw_only_time, + int64_t* rank, + int64_t* nprocs); + +struct darshan_mod_logutil_funcs daos_logutils = +{ + .log_get_record = &darshan_log_get_daos_object, + .log_put_record = &darshan_log_put_daos_object, + .log_print_record = &darshan_log_print_daos_object, + .log_print_description = &darshan_log_print_daos_description, + .log_print_diff = &darshan_log_print_daos_object_diff, + .log_agg_records = &darshan_log_agg_daos_objects, + .log_sizeof_record = &darshan_log_sizeof_daos_object, + .log_record_metrics = &darshan_log_record_metrics_daos_object +}; + +static int darshan_log_sizeof_daos_object(void* daos_buf_p) +{ + /* daos records have a fixed size */ + return(sizeof(struct darshan_daos_object)); +} + +static int darshan_log_record_metrics_daos_object(void* daos_buf_p, + uint64_t* rec_id, + int64_t* r_bytes, + int64_t* w_bytes, + int64_t* max_offset, + double* io_total_time, + double* md_only_time, + double* rw_only_time, + int64_t* rank, + int64_t* nprocs) +{ + struct darshan_daos_object *daos_rec = (struct darshan_daos_object *)daos_buf_p; + + *rec_id = daos_rec->base_rec.id; + *r_bytes = daos_rec->counters[DAOS_BYTES_READ]; + *w_bytes = daos_rec->counters[DAOS_BYTES_WRITTEN]; + + /* the daos module doesn't report this */ + *max_offset = -1; + + *rank = daos_rec->base_rec.rank; + /* nprocs is 1 per record, unless rank is negative, in which case we + * report -1 as the rank value to represent "all" + */ + if(daos_rec->base_rec.rank < 0) + *nprocs = -1; + else + *nprocs = 1; + + if(daos_rec->base_rec.rank < 0) { + /* shared object records populate a counter with the slowest rank time + * (derived during reduction). They do not have a breakdown of meta + * and rw time, though. + */ + *io_total_time = daos_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME]; + *md_only_time = 0; + *rw_only_time = 0; + } + else { + /* non-shared records have separate meta, read, and write values + * that we can combine as needed + */ + *io_total_time = daos_rec->fcounters[DAOS_F_META_TIME] + + daos_rec->fcounters[DAOS_F_READ_TIME] + + daos_rec->fcounters[DAOS_F_WRITE_TIME]; + *md_only_time = daos_rec->fcounters[DAOS_F_META_TIME]; + *rw_only_time = daos_rec->fcounters[DAOS_F_READ_TIME] + + daos_rec->fcounters[DAOS_F_WRITE_TIME]; + } + + return(0); +} + +static int darshan_log_get_daos_object(darshan_fd fd, void** daos_buf_p) +{ + struct darshan_daos_object *obj = *((struct darshan_daos_object **)daos_buf_p); + int rec_len; + int i; + int ret = -1; + + if(fd->mod_map[DARSHAN_DAOS_MOD].len == 0) + return(0); + + if(fd->mod_ver[DARSHAN_DAOS_MOD] == 0 || + fd->mod_ver[DARSHAN_DAOS_MOD] > DARSHAN_DAOS_VER) + { + fprintf(stderr, "Error: Invalid DAOS module version number (got %d)\n", + fd->mod_ver[DARSHAN_DAOS_MOD]); + return(-1); + } + + if(*daos_buf_p == NULL) + { + obj = malloc(sizeof(*obj)); + if(!obj) + return(-1); + } + + if(fd->mod_ver[DARSHAN_DAOS_MOD] == DARSHAN_DAOS_VER) + { + /* log format is in current version, so we don't need to do any + * translation of counters while reading + */ + rec_len = sizeof(struct darshan_daos_object); + ret = darshan_log_get_mod(fd, DARSHAN_DAOS_MOD, obj, rec_len); + } + else + { + assert(0); + } + + if(*daos_buf_p == NULL) + { + if(ret == rec_len) + *daos_buf_p = obj; + else + free(obj); + } + + if(ret < 0) + return(-1); + else if(ret < rec_len) + return(0); + else + { + /* if the read was successful, do any necessary byte-swapping */ + if(fd->swap_flag) + { + DARSHAN_BSWAP64(&obj->base_rec.id); + DARSHAN_BSWAP64(&obj->base_rec.rank); + for(i=0; icounters[i]); + for(i=0; ifcounters[i]); + DARSHAN_BSWAP128(&obj->pool_uuid); + DARSHAN_BSWAP128(&obj->cont_uuid); + DARSHAN_BSWAP64(&obj->oid_hi); + DARSHAN_BSWAP64(&obj->oid_lo); + } + + return(1); + } +} + +static int darshan_log_put_daos_object(darshan_fd fd, void* daos_buf) +{ + struct darshan_daos_object *obj = (struct darshan_daos_object *)daos_buf; + int ret; + + ret = darshan_log_put_mod(fd, DARSHAN_DAOS_MOD, obj, + sizeof(struct darshan_daos_object), DARSHAN_DAOS_VER); + if(ret < 0) + return(-1); + + return(0); +} + +static void darshan_log_print_daos_object(void *object_rec, char *object_name, + char *mnt_pt, char *fs_type) +{ + int i; + struct darshan_daos_object *daos_object_rec = + (struct darshan_daos_object *)object_rec; + char oid[64]; + char pool_cont_uuid_str[128] = {0}; + + sprintf(oid, "%lu.%lu", daos_object_rec->oid_hi, daos_object_rec->oid_lo); + object_name = oid; + +#ifdef HAVE_LIBUUID + uuid_unparse(daos_object_rec->pool_uuid, pool_cont_uuid_str); +#else + strcat(pool_cont_uuid_str, "N/A"); +#endif + strcat(pool_cont_uuid_str, ":"); +#ifdef HAVE_LIBUUID + uuid_unparse(daos_object_rec->cont_uuid, pool_cont_uuid_str+strlen(pool_cont_uuid_str)); +#else + strcat(pool_cont_uuid_str, "N/A"); +#endif + + mnt_pt = pool_cont_uuid_str; + fs_type = "N/A"; + + for(i=0; ibase_rec.rank, daos_object_rec->base_rec.id, + daos_counter_names[i], daos_object_rec->counters[i], + object_name, mnt_pt, fs_type); + } + + for(i=0; ibase_rec.rank, daos_object_rec->base_rec.id, + daos_f_counter_names[i], daos_object_rec->fcounters[i], + object_name, mnt_pt, fs_type); + } + + return; +} + +static void darshan_log_print_daos_description(int ver) +{ + printf("\n# description of DAOS counters:\n"); + printf("# DAOS_*: DAOS operation counts.\n"); + printf("# OBJ_OPENS,OBJ_FETCHES,OBJ_UPDATES,OBJ_PUNCHES,OBJ_DKEY_PUNCHES,OBJ_AKEY_PUNCHES,OBJ_DKEY_LISTS,OBJ_AKEY_LISTS,OBJ_RECX_LISTS are types of DAOS object operations.\n"); + printf("# ARRAY_OPENS,ARRAY_READS,ARRAY_WRITES,ARRAY_GET_SIZES,ARRAY_SET_SIZES,ARRAY_STATS,ARRAY_PUNCHES,ARRAY_DESTROYS are types of DAOS array operations\n"); + printf("# KV_OPENS,KV_GETS,KV_PUTS,KV_REMOVES,KV_LISTS,KV_DESTROYS are types of DAOS kv operations\n"); + printf("# DAOS_BYTES_*: total bytes read and written using all DAOS object APIs.\n"); + printf("# DAOS_RW_SWITCHES: number of times access alternated between read and write.\n"); + printf("# DAOS_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n"); + printf("# DAOS_SIZE_*_*: histogram of read and write access sizes.\n"); + printf("# DAOS_ACCESS*_ACCESS: the four most common access sizes.\n"); + printf("# DAOS_ACCESS*_COUNT: count of the four most common access sizes.\n"); + printf("# DAOS_OBJ_OTYPE: DAOS otype value for the object.\n"); + printf("# DAOS_ARRAY_CELL_SIZE: the cell size for DAOS array objects\n"); + printf("# DAOS_ARRAY_CHUNK_SIZE: the chunk size for DAOS array objects\n"); + printf("# DAOS_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared objects).\n"); + printf("# DAOS_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared objects).\n"); + printf("# DAOS_F_*_START_TIMESTAMP: timestamp of first open/read/write/close.\n"); + printf("# DAOS_F_*_END_TIMESTAMP: timestamp of last open/read/write/close.\n"); + printf("# DAOS_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.\n"); + printf("# DAOS_F_MAX_*_TIME: duration of the slowest read and write operations.\n"); + printf("# DAOS_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared objects).\n"); + + return; +} + +static void darshan_log_print_daos_object_diff(void *obj_rec1, char *obj_name1, + void *obj_rec2, char *obj_name2) +{ + struct darshan_daos_object *obj1 = (struct darshan_daos_object *)obj_rec1; + struct darshan_daos_object *obj2 = (struct darshan_daos_object *)obj_rec2; + char obj_oid1[64], obj_oid2[64]; + int i; + + sprintf(obj_oid1, "%lu.%lu", obj1->oid_hi, obj1->oid_lo); + sprintf(obj_oid2, "%lu.%lu", obj2->oid_hi, obj2->oid_lo); + + /* NOTE: we assume that both input records are the same module format version */ + + for(i=0; ibase_rec.rank, obj1->base_rec.id, daos_counter_names[i], + obj1->counters[i], obj_oid1, "", ""); + } + else if(!obj1) + { + printf("+ "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj2->base_rec.rank, obj2->base_rec.id, daos_counter_names[i], + obj2->counters[i], obj_oid2, "", ""); + } + else if(obj1->counters[i] != obj2->counters[i]) + { + printf("- "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj1->base_rec.rank, obj1->base_rec.id, daos_counter_names[i], + obj1->counters[i], obj_oid1, "", ""); + printf("+ "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj2->base_rec.rank, obj2->base_rec.id, daos_counter_names[i], + obj2->counters[i], obj_oid2, "", ""); + } + } + + for(i=0; ibase_rec.rank, obj1->base_rec.id, daos_f_counter_names[i], + obj1->fcounters[i], obj_oid1, "", ""); + } + else if(!obj1) + { + printf("+ "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj2->base_rec.rank, obj2->base_rec.id, daos_f_counter_names[i], + obj2->fcounters[i], obj_oid2, "", ""); + } + else if(obj1->fcounters[i] != obj2->fcounters[i]) + { + printf("- "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj1->base_rec.rank, obj1->base_rec.id, daos_f_counter_names[i], + obj1->fcounters[i], obj_oid1, "", ""); + printf("+ "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DAOS_MOD], + obj2->base_rec.rank, obj2->base_rec.id, daos_f_counter_names[i], + obj2->fcounters[i], obj_oid2, "", ""); + } + } + + return; +} + +static void darshan_log_agg_daos_objects(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_daos_object *daos_rec = (struct darshan_daos_object *)rec; + struct darshan_daos_object *agg_daos_rec = (struct darshan_daos_object *)agg_rec; + int i, j, k; + int total_count; + int64_t tmp_val[4]; + int64_t tmp_cnt[4]; + int duplicate_mask[4] = {0}; + int tmp_ndx; + int64_t daos_fastest_rank, daos_slowest_rank, + daos_fastest_bytes, daos_slowest_bytes; + double daos_fastest_time, daos_slowest_time; + int shared_file_flag = 0; + + /* For the incoming record, we need to determine what values to use for + * subsequent comparision against the aggregate record's fastest and + * slowest fields. This is is complicated by the fact that shared file + * records already have derived values, while unique file records do + * not. Handle both cases here so that this function can be generic. + */ + if(daos_rec->base_rec.rank == -1) + { + /* shared files should have pre-calculated fastest and slowest + * counters */ + daos_fastest_rank = daos_rec->counters[DAOS_FASTEST_RANK]; + daos_slowest_rank = daos_rec->counters[DAOS_SLOWEST_RANK]; + daos_fastest_bytes = daos_rec->counters[DAOS_FASTEST_RANK_BYTES]; + daos_slowest_bytes = daos_rec->counters[DAOS_SLOWEST_RANK_BYTES]; + daos_fastest_time = daos_rec->fcounters[DAOS_F_FASTEST_RANK_TIME]; + daos_slowest_time = daos_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME]; + } + else + { + /* for non-shared files, derive bytes and time using data from this + * rank + */ + daos_fastest_rank = daos_rec->base_rec.rank; + daos_slowest_rank = daos_fastest_rank; + daos_fastest_bytes = daos_rec->counters[DAOS_BYTES_READ] + + daos_rec->counters[DAOS_BYTES_WRITTEN]; + daos_slowest_bytes = daos_fastest_bytes; + daos_fastest_time = daos_rec->fcounters[DAOS_F_READ_TIME] + + daos_rec->fcounters[DAOS_F_WRITE_TIME] + + daos_rec->fcounters[DAOS_F_META_TIME]; + daos_slowest_time = daos_fastest_time; + } + + /* if this is our first record, store base id and rank */ + if(init_flag) + { + agg_daos_rec->base_rec.rank = daos_rec->base_rec.rank; + agg_daos_rec->base_rec.id = daos_rec->base_rec.id; + } + + /* so far do all of the records reference the same file? */ + if(agg_daos_rec->base_rec.id == daos_rec->base_rec.id) + shared_file_flag = 1; + else + agg_daos_rec->base_rec.id = 0; + + /* so far do all of the records reference the same rank? */ + if(agg_daos_rec->base_rec.rank != daos_rec->base_rec.rank) + agg_daos_rec->base_rec.rank = -1; + + for(i = 0; i < DAOS_NUM_INDICES; i++) + { + switch(i) + { + case DAOS_OBJ_OPENS: + case DAOS_OBJ_FETCHES: + case DAOS_OBJ_UPDATES: + case DAOS_OBJ_PUNCHES: + case DAOS_OBJ_DKEY_PUNCHES: + case DAOS_OBJ_AKEY_PUNCHES: + case DAOS_OBJ_DKEY_LISTS: + case DAOS_OBJ_AKEY_LISTS: + case DAOS_OBJ_RECX_LISTS: + case DAOS_ARRAY_OPENS: + case DAOS_ARRAY_READS: + case DAOS_ARRAY_WRITES: + case DAOS_ARRAY_GET_SIZES: + case DAOS_ARRAY_SET_SIZES: + case DAOS_ARRAY_STATS: + case DAOS_ARRAY_PUNCHES: + case DAOS_ARRAY_DESTROYS: + case DAOS_KV_OPENS: + case DAOS_KV_GETS: + case DAOS_KV_PUTS: + case DAOS_KV_REMOVES: + case DAOS_KV_LISTS: + case DAOS_KV_DESTROYS: + case DAOS_BYTES_READ: + case DAOS_BYTES_WRITTEN: + case DAOS_RW_SWITCHES: + case DAOS_SIZE_READ_0_100: + case DAOS_SIZE_READ_100_1K: + case DAOS_SIZE_READ_1K_10K: + case DAOS_SIZE_READ_10K_100K: + case DAOS_SIZE_READ_100K_1M: + case DAOS_SIZE_READ_1M_4M: + case DAOS_SIZE_READ_4M_10M: + case DAOS_SIZE_READ_10M_100M: + case DAOS_SIZE_READ_100M_1G: + case DAOS_SIZE_READ_1G_PLUS: + case DAOS_SIZE_WRITE_0_100: + case DAOS_SIZE_WRITE_100_1K: + case DAOS_SIZE_WRITE_1K_10K: + case DAOS_SIZE_WRITE_10K_100K: + case DAOS_SIZE_WRITE_100K_1M: + case DAOS_SIZE_WRITE_1M_4M: + case DAOS_SIZE_WRITE_4M_10M: + case DAOS_SIZE_WRITE_10M_100M: + case DAOS_SIZE_WRITE_100M_1G: + case DAOS_SIZE_WRITE_1G_PLUS: + /* sum */ + agg_daos_rec->counters[i] += daos_rec->counters[i]; + if(agg_daos_rec->counters[i] < 0) /* make sure invalid counters are -1 exactly */ + agg_daos_rec->counters[i] = -1; + break; + case DAOS_OBJ_OTYPE: + case DAOS_ARRAY_CELL_SIZE: + case DAOS_ARRAY_CHUNK_SIZE: + /* just set to the input value */ + agg_daos_rec->counters[i] = daos_rec->counters[i]; + break; + case DAOS_MAX_READ_TIME_SIZE: + case DAOS_MAX_WRITE_TIME_SIZE: + case DAOS_FASTEST_RANK: + case DAOS_FASTEST_RANK_BYTES: + case DAOS_SLOWEST_RANK: + case DAOS_SLOWEST_RANK_BYTES: + /* these are set with the FP counters */ + break; + case DAOS_ACCESS1_ACCESS: + /* increment common value counters */ + + /* first, collapse duplicates */ + for(j = i; j < i + 4; j++) + { + for(k = 0; k < 4; k++) + { + if(agg_daos_rec->counters[i + k] == daos_rec->counters[j]) + { + agg_daos_rec->counters[i + k + 4] += daos_rec->counters[j + 4]; + /* flag that we should ignore this one now */ + duplicate_mask[j-i] = 1; + } + } + } + + /* second, add new counters */ + for(j = i; j < i + 4; j++) + { + /* skip any that were handled above already */ + if(duplicate_mask[j-i]) + continue; + tmp_ndx = 0; + memset(tmp_val, 0, 4 * sizeof(int64_t)); + memset(tmp_cnt, 0, 4 * sizeof(int64_t)); + + if(daos_rec->counters[j] == 0) break; + for(k = 0; k < 4; k++) + { + if(agg_daos_rec->counters[i + k] == daos_rec->counters[j]) + { + total_count = agg_daos_rec->counters[i + k + 4] + + daos_rec->counters[j + 4]; + break; + } + } + if(k == 4) total_count = daos_rec->counters[j + 4]; + + for(k = 0; k < 4; k++) + { + if((agg_daos_rec->counters[i + k + 4] > total_count) || + ((agg_daos_rec->counters[i + k + 4] == total_count) && + (agg_daos_rec->counters[i + k] > daos_rec->counters[j]))) + { + tmp_val[tmp_ndx] = agg_daos_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_daos_rec->counters[i + k + 4]; + tmp_ndx++; + } + else break; + } + if(tmp_ndx == 4) break; + + tmp_val[tmp_ndx] = daos_rec->counters[j]; + tmp_cnt[tmp_ndx] = daos_rec->counters[j + 4]; + tmp_ndx++; + + while(tmp_ndx != 4) + { + if(agg_daos_rec->counters[i + k] != daos_rec->counters[j]) + { + tmp_val[tmp_ndx] = agg_daos_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_daos_rec->counters[i + k + 4]; + tmp_ndx++; + } + k++; + } + memcpy(&(agg_daos_rec->counters[i]), tmp_val, 4 * sizeof(int64_t)); + memcpy(&(agg_daos_rec->counters[i + 4]), tmp_cnt, 4 * sizeof(int64_t)); + } + break; + case DAOS_ACCESS2_ACCESS: + case DAOS_ACCESS3_ACCESS: + case DAOS_ACCESS4_ACCESS: + case DAOS_ACCESS1_COUNT: + case DAOS_ACCESS2_COUNT: + case DAOS_ACCESS3_COUNT: + case DAOS_ACCESS4_COUNT: + /* these are set all at once with common counters above */ + break; + } + } + + for(i = 0; i < DAOS_F_NUM_INDICES; i++) + { + switch(i) + { + case DAOS_F_READ_TIME: + case DAOS_F_WRITE_TIME: + case DAOS_F_META_TIME: + /* sum */ + agg_daos_rec->fcounters[i] += daos_rec->fcounters[i]; + break; + case DAOS_F_OPEN_START_TIMESTAMP: + case DAOS_F_READ_START_TIMESTAMP: + case DAOS_F_WRITE_START_TIMESTAMP: + case DAOS_F_CLOSE_START_TIMESTAMP: + /* minimum non-zero */ + if((daos_rec->fcounters[i] > 0) && + ((agg_daos_rec->fcounters[i] == 0) || + (daos_rec->fcounters[i] < agg_daos_rec->fcounters[i]))) + { + agg_daos_rec->fcounters[i] = daos_rec->fcounters[i]; + } + break; + case DAOS_F_OPEN_END_TIMESTAMP: + case DAOS_F_READ_END_TIMESTAMP: + case DAOS_F_WRITE_END_TIMESTAMP: + case DAOS_F_CLOSE_END_TIMESTAMP: + /* maximum */ + if(daos_rec->fcounters[i] > agg_daos_rec->fcounters[i]) + { + agg_daos_rec->fcounters[i] = daos_rec->fcounters[i]; + } + break; + case DAOS_F_MAX_READ_TIME: + if(daos_rec->fcounters[i] > agg_daos_rec->fcounters[i]) + { + agg_daos_rec->fcounters[i] = daos_rec->fcounters[i]; + agg_daos_rec->counters[DAOS_MAX_READ_TIME_SIZE] = + daos_rec->counters[DAOS_MAX_READ_TIME_SIZE]; + } + break; + case DAOS_F_MAX_WRITE_TIME: + if(daos_rec->fcounters[i] > agg_daos_rec->fcounters[i]) + { + agg_daos_rec->fcounters[i] = daos_rec->fcounters[i]; + agg_daos_rec->counters[DAOS_MAX_WRITE_TIME_SIZE] = + daos_rec->counters[DAOS_MAX_WRITE_TIME_SIZE]; + } + break; + case DAOS_F_FASTEST_RANK_TIME: + + if(!shared_file_flag) + { + /* The fastest counters are only valid under these + * conditions when aggregating records that all refer to + * the same file. + */ + agg_daos_rec->counters[DAOS_FASTEST_RANK] = -1; + agg_daos_rec->counters[DAOS_FASTEST_RANK_BYTES] = -1; + agg_daos_rec->fcounters[DAOS_F_FASTEST_RANK_TIME] = 0.0; + break; + } + if (init_flag || + daos_fastest_time < agg_daos_rec->fcounters[DAOS_F_FASTEST_RANK_TIME]) { + /* The incoming record wins if a) this is the first + * record we are aggregating or b) it is the fastest + * record we have seen so far. + */ + agg_daos_rec->counters[DAOS_FASTEST_RANK] + = daos_fastest_rank; + agg_daos_rec->counters[DAOS_FASTEST_RANK_BYTES] + = daos_fastest_bytes; + agg_daos_rec->fcounters[DAOS_F_FASTEST_RANK_TIME] + = daos_fastest_time; + } + break; + case DAOS_F_SLOWEST_RANK_TIME: + if(!shared_file_flag) + { + /* The slowest counters are only valid under these + * conditions when aggregating records that all refer to + * the same file. + */ + agg_daos_rec->counters[DAOS_SLOWEST_RANK] = -1; + agg_daos_rec->counters[DAOS_SLOWEST_RANK_BYTES] = -1; + agg_daos_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME] = 0.0; + break; + } + if (init_flag || + daos_slowest_time > agg_daos_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME]) { + /* The incoming record wins if a) this is the first + * record we are aggregating or b) it is the slowest + * record we have seen so far. + */ + agg_daos_rec->counters[DAOS_SLOWEST_RANK] + = daos_slowest_rank; + agg_daos_rec->counters[DAOS_SLOWEST_RANK_BYTES] + = daos_slowest_bytes; + agg_daos_rec->fcounters[DAOS_F_SLOWEST_RANK_TIME] + = daos_slowest_time; + } + break; + } + } + + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/darshan-util/darshan-daos-logutils.h b/darshan-util/darshan-daos-logutils.h new file mode 100644 index 000000000..3bc56f60c --- /dev/null +++ b/darshan-util/darshan-daos-logutils.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#ifndef __DARSHAN_DAOS_LOG_UTILS_H +#define __DARSHAN_DAOS_LOG_UTILS_H + +extern char *daos_counter_names[]; +extern char *daos_f_counter_names[]; + +extern struct darshan_mod_logutil_funcs daos_logutils; + +#endif diff --git a/darshan-util/darshan-dfs-logutils.c b/darshan-util/darshan-dfs-logutils.c new file mode 100644 index 000000000..3ae948cbc --- /dev/null +++ b/darshan-util/darshan-dfs-logutils.c @@ -0,0 +1,665 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#define _GNU_SOURCE +#include "darshan-util-config.h" +#include +#include +#include +#include +#include +#include +#include + +#include "darshan-logutils.h" + +#ifdef HAVE_LIBUUID +#include +#endif + +/* counter name strings for the DFS module */ +#define X(a) #a, +char *dfs_counter_names[] = { + DFS_COUNTERS +}; + +char *dfs_f_counter_names[] = { + DFS_F_COUNTERS +}; +#undef X + +static int darshan_log_get_dfs_file(darshan_fd fd, void** dfs_buf_p); +static int darshan_log_put_dfs_file(darshan_fd fd, void* dfs_buf); +static void darshan_log_print_dfs_file(void *file_rec, + char *file_name, char *mnt_pt, char *fs_type); +static void darshan_log_print_dfs_description(int ver); +static void darshan_log_print_dfs_file_diff(void *file_rec1, char *file_name1, + void *file_rec2, char *file_name2); +static void darshan_log_agg_dfs_files(void *rec, void *agg_rec, int init_flag); +static int darshan_log_sizeof_dfs_file(void* dfs_buf_p); +static int darshan_log_record_metrics_dfs_file(void* dfs_buf_p, + uint64_t* rec_id, + int64_t* r_bytes, + int64_t* w_bytes, + int64_t* max_offset, + double* io_total_time, + double* md_only_time, + double* rw_only_time, + int64_t* rank, + int64_t* nprocs); + +struct darshan_mod_logutil_funcs dfs_logutils = +{ + .log_get_record = &darshan_log_get_dfs_file, + .log_put_record = &darshan_log_put_dfs_file, + .log_print_record = &darshan_log_print_dfs_file, + .log_print_description = &darshan_log_print_dfs_description, + .log_print_diff = &darshan_log_print_dfs_file_diff, + .log_agg_records = &darshan_log_agg_dfs_files, + .log_sizeof_record = &darshan_log_sizeof_dfs_file, + .log_record_metrics = &darshan_log_record_metrics_dfs_file +}; + +static int darshan_log_sizeof_dfs_file(void* dfs_buf_p) +{ + /* dfs records have a fixed size */ + return(sizeof(struct darshan_dfs_file)); +} + +static int darshan_log_record_metrics_dfs_file(void* dfs_buf_p, + uint64_t* rec_id, + int64_t* r_bytes, + int64_t* w_bytes, + int64_t* max_offset, + double* io_total_time, + double* md_only_time, + double* rw_only_time, + int64_t* rank, + int64_t* nprocs) +{ + struct darshan_dfs_file *dfs_rec = (struct darshan_dfs_file *)dfs_buf_p; + + *rec_id = dfs_rec->base_rec.id; + *r_bytes = dfs_rec->counters[DFS_BYTES_READ]; + *w_bytes = dfs_rec->counters[DFS_BYTES_WRITTEN]; + + /* the dfs module doesn't report this */ + *max_offset = -1; + + *rank = dfs_rec->base_rec.rank; + /* nprocs is 1 per record, unless rank is negative, in which case we + * report -1 as the rank value to represent "all" + */ + if(dfs_rec->base_rec.rank < 0) + *nprocs = -1; + else + *nprocs = 1; + + if(dfs_rec->base_rec.rank < 0) { + /* shared file records populate a counter with the slowest rank time + * (derived during reduction). They do not have a breakdown of meta + * and rw time, though. + */ + *io_total_time = dfs_rec->fcounters[DFS_F_SLOWEST_RANK_TIME]; + *md_only_time = 0; + *rw_only_time = 0; + } + else { + /* non-shared records have separate meta, read, and write values + * that we can combine as needed + */ + *io_total_time = dfs_rec->fcounters[DFS_F_META_TIME] + + dfs_rec->fcounters[DFS_F_READ_TIME] + + dfs_rec->fcounters[DFS_F_WRITE_TIME]; + *md_only_time = dfs_rec->fcounters[DFS_F_META_TIME]; + *rw_only_time = dfs_rec->fcounters[DFS_F_READ_TIME] + + dfs_rec->fcounters[DFS_F_WRITE_TIME]; + } + + return(0); +} + +static int darshan_log_get_dfs_file(darshan_fd fd, void** dfs_buf_p) +{ + struct darshan_dfs_file *file = *((struct darshan_dfs_file **)dfs_buf_p); + int rec_len; + int i; + int ret = -1; + + if(fd->mod_map[DARSHAN_DFS_MOD].len == 0) + return(0); + + if(fd->mod_ver[DARSHAN_DFS_MOD] == 0 || + fd->mod_ver[DARSHAN_DFS_MOD] > DARSHAN_DFS_VER) + { + fprintf(stderr, "Error: Invalid DFS module version number (got %d)\n", + fd->mod_ver[DARSHAN_DFS_MOD]); + return(-1); + } + + if(*dfs_buf_p == NULL) + { + file = malloc(sizeof(*file)); + if(!file) + return(-1); + } + + if(fd->mod_ver[DARSHAN_DFS_MOD] == DARSHAN_DFS_VER) + { + /* log format is in current version, so we don't need to do any + * translation of counters while reading + */ + rec_len = sizeof(struct darshan_dfs_file); + ret = darshan_log_get_mod(fd, DARSHAN_DFS_MOD, file, rec_len); + } + else + { + assert(0); + } + + if(*dfs_buf_p == NULL) + { + if(ret == rec_len) + *dfs_buf_p = file; + else + free(file); + } + + if(ret < 0) + return(-1); + else if(ret < rec_len) + return(0); + else + { + /* if the read was successful, do any necessary byte-swapping */ + if(fd->swap_flag) + { + DARSHAN_BSWAP64(&file->base_rec.id); + DARSHAN_BSWAP64(&file->base_rec.rank); + for(i=0; icounters[i]); + for(i=0; ifcounters[i]); + DARSHAN_BSWAP128(&file->pool_uuid); + DARSHAN_BSWAP128(&file->cont_uuid); + } + + return(1); + } +} + +static int darshan_log_put_dfs_file(darshan_fd fd, void* dfs_buf) +{ + struct darshan_dfs_file *file = (struct darshan_dfs_file *)dfs_buf; + int ret; + + ret = darshan_log_put_mod(fd, DARSHAN_DFS_MOD, file, + sizeof(struct darshan_dfs_file), DARSHAN_DFS_VER); + if(ret < 0) + return(-1); + + return(0); +} + +static void darshan_log_print_dfs_file(void *file_rec, char *file_name, + char *mnt_pt, char *fs_type) +{ + int i; + struct darshan_dfs_file *dfs_file_rec = + (struct darshan_dfs_file *)file_rec; + char pool_cont_uuid_str[128] = {0}; + +#ifdef HAVE_LIBUUID + if(!uuid_is_null(dfs_file_rec->pool_uuid) && !uuid_is_null(dfs_file_rec->cont_uuid)) + { + uuid_unparse(dfs_file_rec->pool_uuid, pool_cont_uuid_str); + strcat(pool_cont_uuid_str, ":"); + uuid_unparse(dfs_file_rec->cont_uuid, pool_cont_uuid_str+strlen(pool_cont_uuid_str)); + } + else + strcat(pool_cont_uuid_str, "UNKNOWN"); +#else + strcat(pool_cont_uuid_str, "N/A"); +#endif + + mnt_pt = pool_cont_uuid_str; + fs_type = "N/A"; + + for(i=0; ibase_rec.rank, dfs_file_rec->base_rec.id, + dfs_counter_names[i], dfs_file_rec->counters[i], + file_name, mnt_pt, fs_type); + } + + for(i=0; ibase_rec.rank, dfs_file_rec->base_rec.id, + dfs_f_counter_names[i], dfs_file_rec->fcounters[i], + file_name, mnt_pt, fs_type); + } + + return; +} + +static void darshan_log_print_dfs_description(int ver) +{ + printf("\n# description of DFS counters:\n"); + printf("# DFS_*: DFS operation counts.\n"); + printf("# OPENS,GLOBAL_OPENS,LOOKUPS,DUPS,READS,READXS,WRITES,WRITEXS,GET_SIZES,PUNCHES,REMOVES,STATS are types of operations.\n"); + printf("# DFS_BYTES_*: total bytes read and written.\n"); + printf("# DFS_RW_SWITCHES: number of times access alternated between read and write.\n"); + printf("# DFS_MAX_*_TIME_SIZE: size of the slowest read and write operations.\n"); + printf("# DFS_SIZE_*_*: histogram of read and write access sizes.\n"); + printf("# DFS_ACCESS*_ACCESS: the four most common access sizes.\n"); + printf("# DFS_ACCESS*_COUNT: count of the four most common access sizes.\n"); + printf("# DFS_CHUNK_SIZE: DFS file chunk size.\n"); + printf("# DFS_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).\n"); + printf("# DFS_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).\n"); + printf("# DFS_F_*_START_TIMESTAMP: timestamp of first open/read/write/close.\n"); + printf("# DFS_F_*_END_TIMESTAMP: timestamp of last open/read/write/close.\n"); + printf("# DFS_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.\n"); + printf("# DFS_F_MAX_*_TIME: duration of the slowest read and write operations.\n"); + printf("# DFS_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).\n"); + + return; +} + +static void darshan_log_print_dfs_file_diff(void *file_rec1, char *file_name1, + void *file_rec2, char *file_name2) +{ + struct darshan_dfs_file *file1 = (struct darshan_dfs_file *)file_rec1; + struct darshan_dfs_file *file2 = (struct darshan_dfs_file *)file_rec2; + int i; + + /* NOTE: we assume that both input records are the same module format version */ + + for(i=0; ibase_rec.rank, file1->base_rec.id, dfs_counter_names[i], + file1->counters[i], file_name1, "", ""); + + } + else if(!file1) + { + printf("+ "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file2->base_rec.rank, file2->base_rec.id, dfs_counter_names[i], + file2->counters[i], file_name2, "", ""); + } + else if(file1->counters[i] != file2->counters[i]) + { + printf("- "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file1->base_rec.rank, file1->base_rec.id, dfs_counter_names[i], + file1->counters[i], file_name1, "", ""); + printf("+ "); + DARSHAN_D_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file2->base_rec.rank, file2->base_rec.id, dfs_counter_names[i], + file2->counters[i], file_name2, "", ""); + } + } + + for(i=0; ibase_rec.rank, file1->base_rec.id, dfs_f_counter_names[i], + file1->fcounters[i], file_name1, "", ""); + + } + else if(!file1) + { + printf("+ "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file2->base_rec.rank, file2->base_rec.id, dfs_f_counter_names[i], + file2->fcounters[i], file_name2, "", ""); + } + else if(file1->fcounters[i] != file2->fcounters[i]) + { + printf("- "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file1->base_rec.rank, file1->base_rec.id, dfs_f_counter_names[i], + file1->fcounters[i], file_name1, "", ""); + printf("+ "); + DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_DFS_MOD], + file2->base_rec.rank, file2->base_rec.id, dfs_f_counter_names[i], + file2->fcounters[i], file_name2, "", ""); + } + } + + return; +} + +static void darshan_log_agg_dfs_files(void *rec, void *agg_rec, int init_flag) +{ + struct darshan_dfs_file *dfs_rec = (struct darshan_dfs_file *)rec; + struct darshan_dfs_file *agg_dfs_rec = (struct darshan_dfs_file *)agg_rec; + int i, j, k; + int total_count; + int64_t tmp_val[4]; + int64_t tmp_cnt[4]; + int duplicate_mask[4] = {0}; + int tmp_ndx; + int64_t dfs_fastest_rank, dfs_slowest_rank, + dfs_fastest_bytes, dfs_slowest_bytes; + double dfs_fastest_time, dfs_slowest_time; + int shared_file_flag = 0; + + /* For the incoming record, we need to determine what values to use for + * subsequent comparision against the aggregate record's fastest and + * slowest fields. This is is complicated by the fact that shared file + * records already have derived values, while unique file records do + * not. Handle both cases here so that this function can be generic. + */ + if(dfs_rec->base_rec.rank == -1) + { + /* shared files should have pre-calculated fastest and slowest + * counters */ + dfs_fastest_rank = dfs_rec->counters[DFS_FASTEST_RANK]; + dfs_slowest_rank = dfs_rec->counters[DFS_SLOWEST_RANK]; + dfs_fastest_bytes = dfs_rec->counters[DFS_FASTEST_RANK_BYTES]; + dfs_slowest_bytes = dfs_rec->counters[DFS_SLOWEST_RANK_BYTES]; + dfs_fastest_time = dfs_rec->fcounters[DFS_F_FASTEST_RANK_TIME]; + dfs_slowest_time = dfs_rec->fcounters[DFS_F_SLOWEST_RANK_TIME]; + } + else + { + /* for non-shared files, derive bytes and time using data from this + * rank + */ + dfs_fastest_rank = dfs_rec->base_rec.rank; + dfs_slowest_rank = dfs_fastest_rank; + dfs_fastest_bytes = dfs_rec->counters[DFS_BYTES_READ] + + dfs_rec->counters[DFS_BYTES_WRITTEN]; + dfs_slowest_bytes = dfs_fastest_bytes; + dfs_fastest_time = dfs_rec->fcounters[DFS_F_READ_TIME] + + dfs_rec->fcounters[DFS_F_WRITE_TIME] + + dfs_rec->fcounters[DFS_F_META_TIME]; + dfs_slowest_time = dfs_fastest_time; + } + + /* if this is our first record, store base id and rank */ + if(init_flag) + { + agg_dfs_rec->base_rec.rank = dfs_rec->base_rec.rank; + agg_dfs_rec->base_rec.id = dfs_rec->base_rec.id; + } + + /* so far do all of the records reference the same file? */ + if(agg_dfs_rec->base_rec.id == dfs_rec->base_rec.id) + shared_file_flag = 1; + else + agg_dfs_rec->base_rec.id = 0; + + /* so far do all of the records reference the same rank? */ + if(agg_dfs_rec->base_rec.rank != dfs_rec->base_rec.rank) + agg_dfs_rec->base_rec.rank = -1; + + for(i = 0; i < DFS_NUM_INDICES; i++) + { + switch(i) + { + case DFS_OPENS: + case DFS_GLOBAL_OPENS: + case DFS_LOOKUPS: + case DFS_DUPS: + case DFS_READS: + case DFS_READXS: + case DFS_WRITES: + case DFS_WRITEXS: + case DFS_NB_READS: + case DFS_NB_WRITES: + case DFS_GET_SIZES: + case DFS_PUNCHES: + case DFS_REMOVES: + case DFS_STATS: + case DFS_BYTES_READ: + case DFS_BYTES_WRITTEN: + case DFS_RW_SWITCHES: + case DFS_SIZE_READ_0_100: + case DFS_SIZE_READ_100_1K: + case DFS_SIZE_READ_1K_10K: + case DFS_SIZE_READ_10K_100K: + case DFS_SIZE_READ_100K_1M: + case DFS_SIZE_READ_1M_4M: + case DFS_SIZE_READ_4M_10M: + case DFS_SIZE_READ_10M_100M: + case DFS_SIZE_READ_100M_1G: + case DFS_SIZE_READ_1G_PLUS: + case DFS_SIZE_WRITE_0_100: + case DFS_SIZE_WRITE_100_1K: + case DFS_SIZE_WRITE_1K_10K: + case DFS_SIZE_WRITE_10K_100K: + case DFS_SIZE_WRITE_100K_1M: + case DFS_SIZE_WRITE_1M_4M: + case DFS_SIZE_WRITE_4M_10M: + case DFS_SIZE_WRITE_10M_100M: + case DFS_SIZE_WRITE_100M_1G: + case DFS_SIZE_WRITE_1G_PLUS: + /* sum */ + agg_dfs_rec->counters[i] += dfs_rec->counters[i]; + if(agg_dfs_rec->counters[i] < 0) /* make sure invalid counters are -1 exactly */ + agg_dfs_rec->counters[i] = -1; + break; + case DFS_CHUNK_SIZE: + /* just set to the input value */ + agg_dfs_rec->counters[i] = dfs_rec->counters[i]; + break; + case DFS_MAX_READ_TIME_SIZE: + case DFS_MAX_WRITE_TIME_SIZE: + case DFS_FASTEST_RANK: + case DFS_FASTEST_RANK_BYTES: + case DFS_SLOWEST_RANK: + case DFS_SLOWEST_RANK_BYTES: + /* these are set with the FP counters */ + break; + case DFS_ACCESS1_ACCESS: + /* increment common value counters */ + + /* first, collapse duplicates */ + for(j = i; j < i + 4; j++) + { + for(k = 0; k < 4; k++) + { + if(agg_dfs_rec->counters[i + k] == dfs_rec->counters[j]) + { + agg_dfs_rec->counters[i + k + 4] += dfs_rec->counters[j + 4]; + /* flag that we should ignore this one now */ + duplicate_mask[j-i] = 1; + } + } + } + + /* second, add new counters */ + for(j = i; j < i + 4; j++) + { + /* skip any that were handled above already */ + if(duplicate_mask[j-i]) + continue; + tmp_ndx = 0; + memset(tmp_val, 0, 4 * sizeof(int64_t)); + memset(tmp_cnt, 0, 4 * sizeof(int64_t)); + + if(dfs_rec->counters[j] == 0) break; + for(k = 0; k < 4; k++) + { + if(agg_dfs_rec->counters[i + k] == dfs_rec->counters[j]) + { + total_count = agg_dfs_rec->counters[i + k + 4] + + dfs_rec->counters[j + 4]; + break; + } + } + if(k == 4) total_count = dfs_rec->counters[j + 4]; + + for(k = 0; k < 4; k++) + { + if((agg_dfs_rec->counters[i + k + 4] > total_count) || + ((agg_dfs_rec->counters[i + k + 4] == total_count) && + (agg_dfs_rec->counters[i + k] > dfs_rec->counters[j]))) + { + tmp_val[tmp_ndx] = agg_dfs_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_dfs_rec->counters[i + k + 4]; + tmp_ndx++; + } + else break; + } + if(tmp_ndx == 4) break; + + tmp_val[tmp_ndx] = dfs_rec->counters[j]; + tmp_cnt[tmp_ndx] = dfs_rec->counters[j + 4]; + tmp_ndx++; + + while(tmp_ndx != 4) + { + if(agg_dfs_rec->counters[i + k] != dfs_rec->counters[j]) + { + tmp_val[tmp_ndx] = agg_dfs_rec->counters[i + k]; + tmp_cnt[tmp_ndx] = agg_dfs_rec->counters[i + k + 4]; + tmp_ndx++; + } + k++; + } + memcpy(&(agg_dfs_rec->counters[i]), tmp_val, 4 * sizeof(int64_t)); + memcpy(&(agg_dfs_rec->counters[i + 4]), tmp_cnt, 4 * sizeof(int64_t)); + } + break; + case DFS_ACCESS2_ACCESS: + case DFS_ACCESS3_ACCESS: + case DFS_ACCESS4_ACCESS: + case DFS_ACCESS1_COUNT: + case DFS_ACCESS2_COUNT: + case DFS_ACCESS3_COUNT: + case DFS_ACCESS4_COUNT: + /* these are set all at once with common counters above */ + break; + } + } + + for(i = 0; i < DFS_F_NUM_INDICES; i++) + { + switch(i) + { + case DFS_F_READ_TIME: + case DFS_F_WRITE_TIME: + case DFS_F_META_TIME: + /* sum */ + agg_dfs_rec->fcounters[i] += dfs_rec->fcounters[i]; + break; + case DFS_F_OPEN_START_TIMESTAMP: + case DFS_F_READ_START_TIMESTAMP: + case DFS_F_WRITE_START_TIMESTAMP: + case DFS_F_CLOSE_START_TIMESTAMP: + /* minimum non-zero */ + if((dfs_rec->fcounters[i] > 0) && + ((agg_dfs_rec->fcounters[i] == 0) || + (dfs_rec->fcounters[i] < agg_dfs_rec->fcounters[i]))) + { + agg_dfs_rec->fcounters[i] = dfs_rec->fcounters[i]; + } + break; + case DFS_F_OPEN_END_TIMESTAMP: + case DFS_F_READ_END_TIMESTAMP: + case DFS_F_WRITE_END_TIMESTAMP: + case DFS_F_CLOSE_END_TIMESTAMP: + /* maximum */ + if(dfs_rec->fcounters[i] > agg_dfs_rec->fcounters[i]) + { + agg_dfs_rec->fcounters[i] = dfs_rec->fcounters[i]; + } + break; + case DFS_F_MAX_READ_TIME: + if(dfs_rec->fcounters[i] > agg_dfs_rec->fcounters[i]) + { + agg_dfs_rec->fcounters[i] = dfs_rec->fcounters[i]; + agg_dfs_rec->counters[DFS_MAX_READ_TIME_SIZE] = + dfs_rec->counters[DFS_MAX_READ_TIME_SIZE]; + } + break; + case DFS_F_MAX_WRITE_TIME: + if(dfs_rec->fcounters[i] > agg_dfs_rec->fcounters[i]) + { + agg_dfs_rec->fcounters[i] = dfs_rec->fcounters[i]; + agg_dfs_rec->counters[DFS_MAX_WRITE_TIME_SIZE] = + dfs_rec->counters[DFS_MAX_WRITE_TIME_SIZE]; + } + break; + case DFS_F_FASTEST_RANK_TIME: + + if(!shared_file_flag) + { + /* The fastest counters are only valid under these + * conditions when aggregating records that all refer to + * the same file. + */ + agg_dfs_rec->counters[DFS_FASTEST_RANK] = -1; + agg_dfs_rec->counters[DFS_FASTEST_RANK_BYTES] = -1; + agg_dfs_rec->fcounters[DFS_F_FASTEST_RANK_TIME] = 0.0; + break; + } + if (init_flag || + dfs_fastest_time < agg_dfs_rec->fcounters[DFS_F_FASTEST_RANK_TIME]) { + /* The incoming record wins if a) this is the first + * record we are aggregating or b) it is the fastest + * record we have seen so far. + */ + agg_dfs_rec->counters[DFS_FASTEST_RANK] + = dfs_fastest_rank; + agg_dfs_rec->counters[DFS_FASTEST_RANK_BYTES] + = dfs_fastest_bytes; + agg_dfs_rec->fcounters[DFS_F_FASTEST_RANK_TIME] + = dfs_fastest_time; + } + break; + case DFS_F_SLOWEST_RANK_TIME: + if(!shared_file_flag) + { + /* The slowest counters are only valid under these + * conditions when aggregating records that all refer to + * the same file. + */ + agg_dfs_rec->counters[DFS_SLOWEST_RANK] = -1; + agg_dfs_rec->counters[DFS_SLOWEST_RANK_BYTES] = -1; + agg_dfs_rec->fcounters[DFS_F_SLOWEST_RANK_TIME] = 0.0; + break; + } + if (init_flag || + dfs_slowest_time > agg_dfs_rec->fcounters[DFS_F_SLOWEST_RANK_TIME]) { + /* The incoming record wins if a) this is the first + * record we are aggregating or b) it is the slowest + * record we have seen so far. + */ + agg_dfs_rec->counters[DFS_SLOWEST_RANK] + = dfs_slowest_rank; + agg_dfs_rec->counters[DFS_SLOWEST_RANK_BYTES] + = dfs_slowest_bytes; + agg_dfs_rec->fcounters[DFS_F_SLOWEST_RANK_TIME] + = dfs_slowest_time; + } + break; + } + } + + return; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/darshan-util/darshan-dfs-logutils.h b/darshan-util/darshan-dfs-logutils.h new file mode 100644 index 000000000..1fd185084 --- /dev/null +++ b/darshan-util/darshan-dfs-logutils.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2020 University of Chicago. + * See COPYRIGHT notice in top-level directory. + * + */ + +#ifndef __DARSHAN_DFS_LOG_UTILS_H +#define __DARSHAN_DFS_LOG_UTILS_H + +extern char *dfs_counter_names[]; +extern char *dfs_f_counter_names[]; + +extern struct darshan_mod_logutil_funcs dfs_logutils; + +#endif diff --git a/darshan-util/darshan-logutils.c b/darshan-util/darshan-logutils.c index 46073c635..438046339 100644 --- a/darshan-util/darshan-logutils.c +++ b/darshan-util/darshan-logutils.c @@ -1007,7 +1007,7 @@ static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len, * to handle incomplete mappings temporarily here */ name_rec = (struct darshan_name_record *)name_rec_buf; - while(buf_len > sizeof(darshan_record_id) + 1) + while(buf_len >= sizeof(darshan_record_id) + 1) { if(strnlen(name_rec->name, buf_len - sizeof(darshan_record_id)) == (buf_len - sizeof(darshan_record_id))) @@ -1046,6 +1046,19 @@ static int darshan_log_get_namerecs(void *name_rec_buf, int buf_len, /* add this record to the hash */ HASH_ADD(hlink, *hash, name_record->id, sizeof(darshan_record_id), ref); } + else if(ref && !strlen(ref->name_record->name) && strlen(name_rec->name) > 0) + { + free(ref->name_record); + ref->name_record = malloc(rec_len); + if(!ref->name_record) + { + free(ref); + return(-1); + } + + /* copy the name record over from the hash buffer */ + memcpy(ref->name_record, name_rec, rec_len); + } tmp_p = (char *)name_rec + rec_len; name_rec = (struct darshan_name_record *)tmp_p; diff --git a/darshan-util/darshan-logutils.h b/darshan-util/darshan-logutils.h index ee4033e11..529ec5856 100644 --- a/darshan-util/darshan-logutils.h +++ b/darshan-util/darshan-logutils.h @@ -183,10 +183,12 @@ extern struct darshan_mod_logutil_funcs *mod_logutils[]; #include "darshan-lustre-logutils.h" #include "darshan-stdio-logutils.h" #include "darshan-heatmap-logutils.h" +#include "darshan-mdhim-logutils.h" +#include "darshan-dfs-logutils.h" +#include "darshan-daos-logutils.h" /* DXT */ #include "darshan-dxt-logutils.h" -#include "darshan-mdhim-logutils.h" #ifdef DARSHAN_USE_APXC #include "darshan-apxc-logutils.h" @@ -275,6 +277,27 @@ void darshan_free(void *ptr); } while(0) /* naive byte swap implementation */ +#define DARSHAN_BSWAP128(__ptr) do {\ + char __dst_char[16]; \ + char* __src_char = (char*)__ptr; \ + __dst_char[0] = __src_char[15]; \ + __dst_char[1] = __src_char[14]; \ + __dst_char[2] = __src_char[13]; \ + __dst_char[3] = __src_char[12]; \ + __dst_char[4] = __src_char[11]; \ + __dst_char[5] = __src_char[10]; \ + __dst_char[6] = __src_char[9]; \ + __dst_char[7] = __src_char[8]; \ + __dst_char[8] = __src_char[7]; \ + __dst_char[9] = __src_char[6]; \ + __dst_char[10] = __src_char[5]; \ + __dst_char[11] = __src_char[4]; \ + __dst_char[12] = __src_char[3]; \ + __dst_char[13] = __src_char[2]; \ + __dst_char[14] = __src_char[1]; \ + __dst_char[15] = __src_char[0]; \ + memcpy(__ptr, __dst_char, 16); \ +} while(0) #define DARSHAN_BSWAP64(__ptr) do {\ char __dst_char[8]; \ char* __src_char = (char*)__ptr; \ diff --git a/darshan-util/darshan-parser.c b/darshan-util/darshan-parser.c index 83c97eefc..bdeeca4f5 100644 --- a/darshan-util/darshan-parser.c +++ b/darshan-util/darshan-parser.c @@ -51,6 +51,8 @@ void posix_print_total_file(struct darshan_posix_file *pfile, int posix_ver); void mpiio_print_total_file(struct darshan_mpiio_file *mfile, int mpiio_ver); void stdio_print_total_file(struct darshan_stdio_file *pfile, int stdio_ver); +void dfs_print_total_file(struct darshan_dfs_file *pfile, int dfs_ver); +void daos_print_total_file(struct darshan_daos_object *pfile, int daos_ver); int usage (char *exename) { @@ -318,11 +320,12 @@ int main(int argc, char **argv) /* always ignore DXT modules -- those have a standalone parsing utility */ else if (i == DXT_POSIX_MOD || i == DXT_MPIIO_MOD) continue; - /* currently only POSIX, MPIIO, and STDIO modules support non-base + /* currently only POSIX, MPIIO, STDIO, and DAOS modules support non-base * parsing */ else if((i != DARSHAN_POSIX_MOD) && (i != DARSHAN_MPIIO_MOD) && - (i != DARSHAN_STDIO_MOD) && !(mask & OPTION_BASE)) + (i != DARSHAN_STDIO_MOD) && (i != DARSHAN_DFS_MOD) && + (i != DARSHAN_DAOS_MOD) && !(mask & OPTION_BASE)) continue; /* this module has data to be parsed and printed */ @@ -452,10 +455,11 @@ int main(int argc, char **argv) if(acc) darshan_accumulator_emit(acc, &metrics, mod_buf); - /* we calculate more detailed stats for POSIX and MPI-IO modules, + /* we calculate more detailed stats for POSIX, MPI-IO, STDIO, and DAOS modules, * if the parser is executed with more than the base option */ - if(i != DARSHAN_POSIX_MOD && i != DARSHAN_MPIIO_MOD && i != DARSHAN_STDIO_MOD) + if(i != DARSHAN_POSIX_MOD && i != DARSHAN_MPIIO_MOD && i != DARSHAN_STDIO_MOD && + i != DARSHAN_DFS_MOD && i != DARSHAN_DAOS_MOD) continue; /* Total Calc */ @@ -473,6 +477,14 @@ int main(int argc, char **argv) { stdio_print_total_file((struct darshan_stdio_file*)mod_buf, fd->mod_ver[i]); } + else if(i == DARSHAN_DFS_MOD) + { + dfs_print_total_file((struct darshan_dfs_file*)mod_buf, fd->mod_ver[i]); + } + else if(i == DARSHAN_DAOS_MOD) + { + daos_print_total_file((struct darshan_daos_object*)mod_buf, fd->mod_ver[i]); + } } /* File Calc */ @@ -637,6 +649,44 @@ void mpiio_print_total_file(struct darshan_mpiio_file *mfile, int mpiio_ver) return; } +void dfs_print_total_file(struct darshan_dfs_file *pfile, int dfs_ver) +{ + int i; + + mod_logutils[DARSHAN_DFS_MOD]->log_print_description(dfs_ver); + printf("\n"); + for(i = 0; i < DFS_NUM_INDICES; i++) + { + printf("total_%s: %"PRId64"\n", + dfs_counter_names[i], pfile->counters[i]); + } + for(i = 0; i < DFS_F_NUM_INDICES; i++) + { + printf("total_%s: %lf\n", + dfs_f_counter_names[i], pfile->fcounters[i]); + } + return; +} + +void daos_print_total_file(struct darshan_daos_object *pfile, int daos_ver) +{ + int i; + + mod_logutils[DARSHAN_DAOS_MOD]->log_print_description(daos_ver); + printf("\n"); + for(i = 0; i < DAOS_NUM_INDICES; i++) + { + printf("total_%s: %"PRId64"\n", + daos_counter_names[i], pfile->counters[i]); + } + for(i = 0; i < DAOS_F_NUM_INDICES; i++) + { + printf("total_%s: %lf\n", + daos_f_counter_names[i], pfile->fcounters[i]); + } + return; +} + /* * Local variables: * c-indent-level: 4 diff --git a/darshan-util/doc/darshan-util.txt b/darshan-util/doc/darshan-util.txt index 44bf18eff..28aa82f1d 100644 --- a/darshan-util/doc/darshan-util.txt +++ b/darshan-util/doc/darshan-util.txt @@ -480,6 +480,121 @@ value of 1 MiB for optimal file alignment. | PNETCDF_VAR_FILE_REC_ID | Darshan file record ID of the file the variable belongs to |==== +.Lustre module (if enabled, for Lustre file systems) +[cols="40%,60%",options="header"] +|==== +| counter name | description +| LUSTRE_NUM_COMPONENTS | number of instrumented components in the Lustre layout +| LUSTRE_NUM_STRIPES | number of active stripes in the Lustre layout components +| LUSTRE_COMP*_STRIPE_SIZE | stripe size for this file layout component in bytes +| LUSTRE_COMP*_STRIPE_COUNT | number of OSTs over which the file layout component is striped +| LUSTRE_COMP*_STRIPE_PATTERN | pattern (e.g., raid0, mdt, overstriped) for this file layout component +| LUSTRE_COMP*_FLAGS | captured flags (e.g. init, prefwr, stale) for this file layout component +| LUSTRE_COMP*_EXT_START | starting file extent for this file layout component +| LUSTRE_COMP*_EXT_END | ending file extent for this file layout component (-1 means EOF) +| LUSTRE_COMP*_MIRROR_ID | mirror ID for this file layout component, if mirrors are enabled +| LUSTRE_COMP*_POOL_NAME | Lustre OST pool used for this file layout component +| LUSTRE_COMP*\_OST_ID_* | indices of OSTs over which this file layout component is striped +|==== + +.DFS (DAOS File System) module (if enabled) +[cols="40%,60%",options="header"] +|==== +| counter name | description +| DFS_OPENS | DFS file open operation counts +| DFS_GLOBAL_OPENS | DFS file global open operation (i.e., `dfs_obj_global2local()`) counts +| DFS_LOOKUPS | DFS file lookup operation counts +| DFS_DUPS | DFS file dup operation counts +| DFS_READS | DFS file read operation counts +| DFS_READXS | DFS non-contiguous file read operation counts +| DFS_WRITES | DFS file write operation counts +| DFS_WRITEXS | DFS non-contiguous file write operation counts +| DFS_NB_READS | DFS non-blocking file read operation counts (included in read/readx counts) +| DFS_NB_WRITES | DFS non-blocking file write operation counts (included in write/writex counts) +| DFS_GET_SIZES | DFS file get size operation counts +| DFS_PUNCHES | DFS file punch operation counts +| DFS_REMOVES | DFS file remove operation counts +| DFS_STATS | DFS file stat operation counts +| DFS_BYTES_READ | Total number of bytes that were read from the DFS file +| DFS_BYTES_WRITTEN | Total number of bytes that were written to the DFS file +| DFS_RW_SWITCHES | Number of times that access toggled between read and write in consecutive operations +| DFS_MAX_READ_TIME_SIZE | Size of the slowest DFS read operation +| DFS_MAX_WRITE_TIME_SIZE | Size of the slowest DFS write operation +| DFS_SIZE_READ_* | Histogram of read access sizes at DFS level +| DFS_SIZE_WRITE_* | Histogram of write access sizes at DFS level +| DFS_ACCESS[1-4]_ACCESS | 4 most common DFS access sizes +| DFS_ACCESS[1-4]_COUNT | Count of 4 most common DFS access sizes +| DFS_CHUNK_SIZE | DFS file chunk size +| DFS_FASTEST_RANK | The MPI rank with smallest time spent in DFS I/O (cumulative read, write, and meta times) +| DFS_FASTEST_RANK_BYTES | The number of bytes transferred by the rank with smallest time spent in DFS I/O (cumulative read, write, and meta times) +| DFS_SLOWEST_RANK | The MPI rank with largest time spent in DFS I/O (cumulative read, write, and meta times) +| DFS_SLOWEST_RANK_BYTES | The number of bytes transferred by the rank with the largest time spent in DFS I/O (cumulative read, write, and meta times) +| DFS_F_*_START_TIMESTAMP | Timestamp that the first DFS file open/read/write/close operation began +| DFS_F_*_END_TIMESTAMP | Timestamp that the last DFS file open/read/write/close operation ended +| DFS_F_READ_TIME | Cumulative time spent reading at the DFS level +| DFS_F_WRITE_TIME | Cumulative time spent writing at the DFS level +| DFS_F_META_TIME | Cumulative time spent in open, dup, lookup, get size, punch, release, remove, and stat at the DFS level +| DFS_F_MAX_READ_TIME | Duration of the slowest individual DFS read operation +| DFS_F_MAX_WRITE_TIME | Duration of the slowest individual DFS write operation +| DFS_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in DFS I/O (cumulative read, write, and meta times) +| DFS_F_SLOWEST_RANK_TIME | The time of the rank which had the largest amount of time spent in DFS I/O (cumulative read, write, and meta times) +|==== + +.DAOS module (if enabled) +[cols="40%,60%",options="header"] +|==== +| counter name | description +| DAOS_OBJ_OPENS | DAOS object open operation counts +| DAOS_OBJ_FETCHES | DAOS object fetch operation counts +| DAOS_OBJ_UPDATES | DAOS object update operation counts +| DAOS_OBJ_PUNCHES | DAOS object punch operation counts +| DAOS_OBJ_DKEY_PUNCHES | DAOS object dkey punch operation counts +| DAOS_OBJ_AKEY_PUNCHES | DAOS object akey punch operation counts +| DAOS_OBJ_DKEY_LISTS | DAOS object dkey list operation counts +| DAOS_OBJ_AKEY_LISTS | DAOS object akey list operation counts +| DAOS_OBJ_RECX_LISTS | DAOS object recx list operation counts +| DAOS_ARRAY_OPENS | DAOS array object open operation counts +| DAOS_ARRAY_READS | DAOS array object read operation counts +| DAOS_ARRAY_WRITES | DAOS array object write operation counts +| DAOS_ARRAY_GET_SIZES | DAOS array object get size operation counts +| DAOS_ARRAY_SET_SIZES | DAOS array object set size operation counts +| DAOS_ARRAY_STATS | DAOS array object stat operation counts +| DAOS_ARRAY_PUNCHES | DAOS array object punch operation counts +| DAOS_ARRAY_DESTROYS | DAOS array object destroy operation counts +| DAOS_KV_OPENS | DAOS kv object open operation counts +| DAOS_KV_GETS | DAOS kv object get operation counts +| DAOS_KV_PUTS | DAOS kv object put operation counts +| DAOS_KV_REMOVES | DAOS kv object remove operation counts +| DAOS_KV_LISTS | DAOS kv object list operation counts +| DAOS_KV_DESTROYS | DAOS kv object destroy operation counts +| DAOS_NB_OPS | DAOS non-blocking I/O operations (includes reads, writes, and metadata operations) +| DAOS_BYTES_READ | Total number of bytes that were read from the DAOS object +| DAOS_BYTES_WRITTEN | Total number of bytes that were written to the DAOS object +| DAOS_RW_SWITCHES | Number of times that access toggled between read and write in consecutive operations +| DAOS_MAX_READ_TIME_SIZE | Size of the slowest DAOS read operation +| DAOS_MAX_WRITE_TIME_SIZE | Size of the slowest DAOS write operation +| DAOS_SIZE_READ_* | Histogram of read access sizes at DAOS level +| DAOS_SIZE_WRITE_* | Histogram of write access sizes at DAOS level +| DAOS_ACCESS[1-4]_ACCESS | 4 most common DAOS access sizes +| DAOS_ACCESS[1-4]_COUNT | Count of 4 most common DAOS access sizes +| DAOS_OBJ_OTYPE | DAOS object otype ID +| DAOS_ARRAY_CELL_SIZE | For DAOS array objects, the array cell size +| DAOS_ARRAY_CHUNK_SIZE | For DAOS array objects, the array chunk size +| DAOS_FASTEST_RANK | The MPI rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times) +| DAOS_FASTEST_RANK_BYTES | The number of bytes transferred by the rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times) +| DAOS_SLOWEST_RANK | The MPI rank with largest time spent in DAOS I/O (cumulative read, write, and meta times) +| DAOS_SLOWEST_RANK_BYTES | The number of bytes transferred by the rank with the largest time spent in DAOS I/O (cumulative read, write, and meta times) +| DAOS_F_*_START_TIMESTAMP | Timestamp that the first DAOS object open/read/write/close operation began +| DAOS_F_*_END_TIMESTAMP | Timestamp that the last DAOS object open/read/write/close operation ended +| DAOS_F_READ_TIME | Cumulative time spent reading at the DAOS level +| DAOS_F_WRITE_TIME | Cumulative time spent writing at the DAOS level +| DAOS_F_META_TIME | Cumulative time spent in open, punch, list, get size, set size, stat, destroy, and remove at the DAOS level +| DAOS_F_MAX_READ_TIME | Duration of the slowest individual DAOS read operation +| DAOS_F_MAX_WRITE_TIME | Duration of the slowest individual DAOS write operation +| DAOS_F_FASTEST_RANK_TIME | The time of the rank which had the smallest amount of time spent in DAOS I/O (cumulative read, write, and meta times) +| DAOS_F_SLOWEST_RANK_TIME | The time of the rank which had the largest amount of time spent in DAOS I/O (cumulative read, write, and meta times) +|==== + ===== Heatmap fields Each heatmap module record reports a histogram of the number of bytes read @@ -505,23 +620,6 @@ execution time. ===== Additional modules -.Lustre module (if enabled, for Lustre file systems) -[cols="40%,60%",options="header"] -|==== -| counter name | description -| LUSTRE_NUM_COMPONENTS | number of instrumented components in the Lustre layout -| LUSTRE_NUM_STRIPES | number of active stripes in the Lustre layout components -| LUSTRE_COMP*_STRIPE_SIZE | stripe size for this file layout component in bytes -| LUSTRE_COMP*_STRIPE_COUNT | number of OSTs over which the file layout component is striped -| LUSTRE_COMP*_STRIPE_PATTERN | pattern (e.g., raid0, mdt, overstriped) for this file layout component -| LUSTRE_COMP*_FLAGS | captured flags (e.g. init, prefwr, stale) for this file layout component -| LUSTRE_COMP*_EXT_START | starting file extent for this file layout component -| LUSTRE_COMP*_EXT_END | ending file extent for this file layout component (-1 means EOF) -| LUSTRE_COMP*_MIRROR_ID | mirror ID for this file layout component, if mirrors are enabled -| LUSTRE_COMP*_POOL_NAME | Lustre OST pool used for this file layout component -| LUSTRE_COMP*\_OST_ID_* | indices of OSTs over which this file layout component is striped -|==== - .APXC module header record (if enabled, for Cray XC systems) [cols="40%,60%",options="header"] |==== diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 47634f6be..d0cb6be3a 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -95,6 +95,26 @@ double fcounters[17]; }; +struct darshan_dfs_file +{ + struct darshan_base_record base_rec; + int64_t counters[52]; + double fcounters[15]; + unsigned char pool_uuid[16]; + unsigned char cont_uuid[16]; +}; + +struct darshan_daos_object +{ + struct darshan_base_record base_rec; + int64_t counters[64]; + double fcounters[15]; + unsigned char pool_uuid[16]; + unsigned char cont_uuid[16]; + uint64_t oid_hi; + uint64_t oid_lo; +}; + struct darshan_stdio_file { struct darshan_base_record base_rec; @@ -203,6 +223,10 @@ extern char *pnetcdf_var_f_counter_names[]; extern char *posix_counter_names[]; extern char *posix_f_counter_names[]; +extern char *dfs_counter_names[]; +extern char *dfs_f_counter_names[]; +extern char *daos_counter_names[]; +extern char *daos_f_counter_names[]; extern char *stdio_counter_names[]; extern char *stdio_f_counter_names[]; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index d509218ec..8b64dd490 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -70,6 +70,8 @@ "APXC", "APMPI", "HEATMAP", + "DFS", + "DAOS", ] def mod_name_to_idx(mod_name): return _mod_names.index(mod_name) @@ -86,6 +88,8 @@ def mod_name_to_idx(mod_name): "PNETCDF_FILE": "struct darshan_pnetcdf_file **", "PNETCDF_VAR": "struct darshan_pnetcdf_var **", "POSIX": "struct darshan_posix_file **", + "DFS": "struct darshan_dfs_file **", + "DAOS": "struct darshan_daos_object **", "STDIO": "struct darshan_stdio_file **", "APXC-HEADER": "struct darshan_apxc_header_record **", "APXC-PERF": "struct darshan_apxc_perf_record **", @@ -358,7 +362,7 @@ def log_get_generic_record(log, mod_name, dtype='numpy'): Example: - The typical darshan log record provides two arrays, on for integer counters + The typical darshan log record provides two arrays, one for integer counters and one for floating point counters: >>> darshan.log_get_generic_record(log, "POSIX", "struct darshan_posix_file **") diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index a7f8ed2f0..bc75ac3e1 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -388,6 +388,8 @@ def register_figures(self): ["DXT_MPIIO", None], ["HEATMAP_POSIX", None], ["DXT_POSIX", None], + ["HEATMAP_DFS", None], + ["HEATMAP_DAOS", None], ["HEATMAP_STDIO", None], ]) if not set(hmap_modules).isdisjoint(modules_avail): @@ -487,7 +489,7 @@ def register_figures(self): # for the operation counts, since the `H5D` variant contains # both modules' data, we either want `H5F` or `H5D`, not both - opcounts_mods = ["POSIX", "MPI-IO", "STDIO"] + opcounts_mods = ["POSIX", "MPI-IO", "STDIO", "DFS", "DAOS"] if "H5D" in self.report.modules: opcounts_mods.append("H5D") elif "H5F" in self.report.modules: @@ -513,7 +515,7 @@ def register_figures(self): sect_title = f"Per-Module Statistics: {mod}" try: - if mod in ["POSIX", "MPI-IO", "STDIO"]: + if mod in ["POSIX", "MPI-IO", "STDIO", "DFS", "DAOS"]: # get the module's record dataframe and then pass to # Darshan accumulator interface to generate a cumulative # record and derived metrics @@ -531,9 +533,10 @@ def register_figures(self): fig_grid_area="overview") self.figures.append(mod_overview_fig) + data_type = "File" if mod != "DAOS" else "Object" file_count_summary_fig = ReportFigure( section_title=sect_title, - fig_title=f"File Count Summary
(estimated by {mod} I/O access offsets)", + fig_title=f"{data_type} Count Summary
(estimated by {mod} I/O access offsets)", fig_func=log_file_count_summary_table, fig_args=dict(derived_metrics=acc.derived_metrics, mod_name=mod), @@ -567,7 +570,7 @@ def register_figures(self): # repo pass - if mod in ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR"]: + if mod in ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS", "DAOS"]: access_hist_description = ( "Histogram of read and write access sizes. The specific values " "of the most frequently occurring access sizes can be found in " diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py index 6496f89d6..7759f4dd1 100644 --- a/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py +++ b/darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py @@ -11,20 +11,12 @@ def agg_ioops(self, mode='append'): None or dict: Depending on mode """ - series = [ - {'name': 'POSIX', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] }, - {'name': 'MPI-IO Indep.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] }, - {'name': 'MPI-IO Coll.', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] }, - {'name': 'STDIO', 'type': 'bar', 'data': [0, 0, 0, 0, 0, 0, 0] } - ] - - # convienience recs = self.records ctx = {} - # aggragate - mods = ['MPI-IO', 'POSIX', 'STDIO', "H5F", "H5D", "PNETCDF_VAR", "PNETCDF_FILE"] + # aggregate + mods = ['MPI-IO', 'POSIX', 'STDIO', "H5F", "H5D", "PNETCDF_VAR", "PNETCDF_FILE", "DFS", "DAOS"] for mod in mods: # check records for module are present @@ -112,6 +104,52 @@ def agg_ioops(self, mode='append'): ctx[mod] = agg ctx[mod + '_simple'] = tmp + elif mod == "DFS": + tmp = { + 'Read': agg[mod + '_READS'], + 'Readx': agg[mod + '_READXS'], + 'Write': agg[mod + '_WRITES'], + 'Writex': agg[mod + '_WRITEXS'], + 'Open': agg[mod + '_OPENS'], + 'GlobalOpen': agg[mod + '_GLOBAL_OPENS'], + 'Lookup': agg[mod + '_LOOKUPS'], + 'Get Size': agg[mod + '_GET_SIZES'], + 'Punch': agg[mod + '_PUNCHES'], + 'Remove': agg[mod + '_REMOVES'], + 'Stat': agg[mod + '_STATS'], + } + ctx[mod] = agg + ctx[mod + '_simple'] = tmp + + elif mod == "DAOS": + tmp = { + 'Obj Fetches': agg[mod + '_OBJ_FETCHES'], + 'Obj Updates': agg[mod + '_OBJ_UPDATES'], + 'Obj Opens': agg[mod + '_OBJ_OPENS'], + 'Obj Punches': agg[mod + '_OBJ_PUNCHES'], + 'Obj Dkey Punches': agg[mod + '_OBJ_DKEY_PUNCHES'], + 'Obj Akey Punches': agg[mod + '_OBJ_AKEY_PUNCHES'], + 'Obj Dkey Lists': agg[mod + '_OBJ_DKEY_LISTS'], + 'Obj Akey Lists': agg[mod + '_OBJ_AKEY_LISTS'], + 'Obj Recx Lists': agg[mod + '_OBJ_RECX_LISTS'], + 'Array Reads': agg[mod + '_ARRAY_READS'], + 'Array Writes': agg[mod + '_ARRAY_WRITES'], + 'Array Opens': agg[mod + '_ARRAY_OPENS'], + 'Array Get Sizes': agg[mod + '_ARRAY_GET_SIZES'], + 'Array Set Sizes': agg[mod + '_ARRAY_SET_SIZES'], + 'Array Stats': agg[mod + '_ARRAY_STATS'], + 'Array Punches': agg[mod + '_ARRAY_PUNCHES'], + 'Array Destroys': agg[mod + '_ARRAY_DESTROYS'], + 'KV Gets': agg[mod + '_KV_PUTS'], + 'KV Puts': agg[mod + '_KV_GETS'], + 'KV Opens': agg[mod + '_KV_OPENS'], + 'KV Removes': agg[mod + '_KV_REMOVES'], + 'KV Lists': agg[mod + '_KV_LISTS'], + 'KV Destroys': agg[mod + '_KV_DESTROYS'], + } + ctx[mod] = agg + ctx[mod + '_simple'] = tmp + else: # POSIX and STDIO share most counter names and are handled # together for this reason, except for metadata/sync counter diff --git a/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py b/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py index 232b95428..302f2fac2 100644 --- a/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py +++ b/darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py @@ -12,7 +12,7 @@ def mod_agg_iohist(self, mod, mode='append'): """ # sanitation and guards - supported = ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR"] + supported = ["POSIX", "MPI-IO", "H5D", "PNETCDF_VAR", "DFS", "DAOS"] if mod not in supported: raise Exception("Unsupported mod_name for aggregated iohist.") diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py index 9f8d1961e..c50ace7c5 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_io_cost.py @@ -109,7 +109,7 @@ def get_io_cost_df(report: darshan.DarshanReport) -> Any: """ io_cost_dict = {} - supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR"] + supported_modules = ["POSIX", "MPI-IO", "STDIO", "H5F", "H5D", "PNETCDF_FILE", "PNETCDF_VAR", "DFS", "DAOS"] for mod_key in report.modules: if mod_key in supported_modules and len(report.records[mod_key]) > 0: # collect the records in dataframe form diff --git a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py index 1d648c610..b495eaaa2 100644 --- a/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py +++ b/darshan-util/pydarshan/darshan/experimental/plots/plot_opcounts.py @@ -154,6 +154,56 @@ def gather_count_data(report, mod): report.summary['agg_ioops']['PNETCDF_FILE']['PNETCDF_FILE_COLL_WAITS'], ] + elif mod == 'DFS': + labels = ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'GetSize', 'Punch', 'Remove', 'Stat'] + counts = [ + mod_data['DFS_READS'], + mod_data['DFS_READXS'], + mod_data['DFS_WRITES'], + mod_data['DFS_WRITEXS'], + mod_data['DFS_OPENS'], + mod_data['DFS_GLOBAL_OPENS'], + mod_data['DFS_LOOKUPS'], + mod_data['DFS_GET_SIZES'], + mod_data['DFS_PUNCHES'], + mod_data['DFS_REMOVES'], + mod_data['DFS_STATS'], + ] + + elif mod == 'DAOS': + labels = ['ObjFetch', 'ObjUpdate', 'ObjOpen', + 'ObjPunch', 'ObjDkeyPunch', 'ObjAkeyPunch', 'ObjDkeyList', 'ObjAkeyList', 'ObjRecxList', + 'ArrRead', 'ArrWrite', 'ArrOpen', + 'ArrGetSize', 'ArrSetSize', 'ArrStat', 'ArrPunch', 'ArrDestroy', + 'KVGet', 'KVPut', 'KVOpen', + 'KVRemove', 'KVList', 'KVDestroy', + ] + counts = [ + mod_data['DAOS_OBJ_FETCHES'], + mod_data['DAOS_OBJ_UPDATES'], + mod_data['DAOS_OBJ_OPENS'], + mod_data['DAOS_OBJ_PUNCHES'], + mod_data['DAOS_OBJ_DKEY_PUNCHES'], + mod_data['DAOS_OBJ_AKEY_PUNCHES'], + mod_data['DAOS_OBJ_DKEY_LISTS'], + mod_data['DAOS_OBJ_AKEY_LISTS'], + mod_data['DAOS_OBJ_RECX_LISTS'], + mod_data['DAOS_ARRAY_READS'], + mod_data['DAOS_ARRAY_WRITES'], + mod_data['DAOS_ARRAY_OPENS'], + mod_data['DAOS_ARRAY_GET_SIZES'], + mod_data['DAOS_ARRAY_SET_SIZES'], + mod_data['DAOS_ARRAY_STATS'], + mod_data['DAOS_ARRAY_PUNCHES'], + mod_data['DAOS_ARRAY_DESTROYS'], + mod_data['DAOS_KV_GETS'], + mod_data['DAOS_KV_PUTS'], + mod_data['DAOS_KV_OPENS'], + mod_data['DAOS_KV_REMOVES'], + mod_data['DAOS_KV_LISTS'], + mod_data['DAOS_KV_DESTROYS'], + ] + return labels, counts def plot_opcounts(report, mod, ax=None): diff --git a/darshan-util/pydarshan/darshan/lib/accum.py b/darshan-util/pydarshan/darshan/lib/accum.py index 4d40ee0da..c8d8702f3 100644 --- a/darshan-util/pydarshan/darshan/lib/accum.py +++ b/darshan-util/pydarshan/darshan/lib/accum.py @@ -10,18 +10,19 @@ def log_file_count_summary_table(derived_metrics, mod_name: str): + data_type = "files" if mod_name != "DAOS" else "objects" # the darshan_file_category enum is not really # exposed in CFFI/Python layer, so we effectively # re-export the content indices we need here # so that we can properly index the C-level data - darshan_file_category = {"total files":0, - "read-only files":1, - "write-only files":2, - "read/write files":3} + darshan_file_category = {f"total {data_type}":0, + f"read-only {data_type}":1, + f"write-only {data_type}":2, + f"read/write {data_type}":3} df = pd.DataFrame.from_dict(darshan_file_category, orient="index") df.rename(columns={0:"index"}, inplace=True) df.index.rename('type', inplace=True) - df["number of files"] = np.zeros(4, dtype=int) + df[f"number of {data_type}"] = np.zeros(4, dtype=int) df["avg. size"] = np.zeros(4, dtype=str) df["max size"] = np.zeros(4, dtype=str) @@ -59,9 +60,10 @@ def log_module_overview_table(derived_metrics, mod_overview = [] total_cat = derived_metrics.category_counters[0] - total_files = total_cat.count - indices = ["files accessed", "bytes read", "bytes written", "I/O performance estimate"] - mod_overview.append(f"{total_files}") + total_count = total_cat.count + data_type = "files" if mod_name != "DAOS" else "objects" + indices = [f"{data_type} accessed", "bytes read", "bytes written", "I/O performance estimate"] + mod_overview.append(f"{total_count}") total_bytes_read = total_cat.total_read_volume_bytes total_bytes_read_str = humanize.naturalsize(total_bytes_read, binary=True, format="%.2f") total_bytes_written = total_cat.total_write_volume_bytes diff --git a/darshan-util/pydarshan/darshan/report.py b/darshan-util/pydarshan/darshan/report.py index 0df68efe1..fb8ecde7d 100644 --- a/darshan-util/pydarshan/darshan/report.py +++ b/darshan-util/pydarshan/darshan/report.py @@ -607,7 +607,9 @@ def read_all_heatmap_records(self): _nrecs_heatmap = { 16592106915301738621: "heatmap:POSIX", 3989511027826779520: "heatmap:STDIO", - 3668870418325792824: "heatmap:MPIIO" + 3668870418325792824: "heatmap:MPIIO", + 1597927878319380788: "heatmap:DFS", + 4131494093108637317: "heatmap:DAOS" } def heatmap_rec_to_module_name(rec, nrecs=None): diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py index 8e29828dc..24e675fec 100644 --- a/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py +++ b/darshan-util/pydarshan/darshan/tests/test_plot_exp_common.py @@ -13,6 +13,20 @@ @pytest.mark.parametrize( "log_path, mod, func, expected_xticklabels", [ + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + "DFS", + plot_access_histogram, + ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M", + "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"] + ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + "DAOS", + plot_access_histogram, + ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M", + "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"] + ), ( "dxt.darshan", "POSIX", @@ -64,6 +78,20 @@ ["0-100", "101-1K", "1K-10K", "10K-100K", "100K-1M", "1M-4M", "4M-10M", "10M-100M", "100M-1G", "1G+"], ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + "DFS", + plot_opcounts, + ['Read', 'Readx', 'Write', 'Writex', 'Open', 'GlobalOpen', 'Lookup', 'GetSize', 'Punch', 'Remove', 'Stat'], + ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + "DAOS", + plot_opcounts, + ['ObjFetch', 'ObjUpdate', 'ObjOpen', 'ObjPunch', 'ObjDkeyPunch', 'ObjAkeyPunch', 'ObjDkeyList', 'ObjAkeyList', 'ObjRecxList', + 'ArrRead', 'ArrWrite', 'ArrOpen', 'ArrGetSize', 'ArrSetSize', 'ArrStat', 'ArrPunch', 'ArrDestroy', + 'KVGet', 'KVPut', 'KVOpen', 'KVRemove', 'KVList', 'KVDestroy',], + ), ( "dxt.darshan", "POSIX", diff --git a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py index 2a003d987..efffc11bc 100644 --- a/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py +++ b/darshan-util/pydarshan/darshan/tests/test_plot_io_cost.py @@ -56,6 +56,19 @@ ["Read", "Write", "Meta", "Wait"], ), ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + pd.DataFrame( + np.array([ + [0.0, 0.0, 0.0, 0.0], + [0.0, 5.93066216e-06, 0.0, 0.0], + [0.002395875, 0.011805813, 0.008395875, 0.0], + [0.002437813, 0.012004437, 0.00816075, 0.0], + ]), + ["POSIX", "STDIO", "DFS", "DAOS"], + ["Read", "Write", "Meta", "Wait"], + ), + ), ], ) def test_get_io_cost_df(logname, expected_df): @@ -87,6 +100,10 @@ def test_get_io_cost_df(logname, expected_df): "noposixopens.darshan", [0.0, 1111.0], ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + [0.0, 0.61345315], + ), ], ) def test_plot_io_cost_ylims(logname, expected_ylims): @@ -105,24 +122,28 @@ def test_plot_io_cost_ylims(logname, expected_ylims): assert_allclose(actual_ylims, [0.0, 100.0]) @pytest.mark.parametrize( - "logname, expected_yticks", [ + "logname, expected_yticks, expected_yticklabels", [ ( "ior_hdf5_example.darshan", [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], + ['0.0', '0.2', '0.4', '0.6', '0.8', '1.0'], ), ( "sample-badost.darshan", [0, 156, 312, 468, 624, 780], + ['0', '156', '312', '468', '624', '780'], + ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + [0.0 , 0.12269063, 0.24538126, 0.36807189, 0.49076252, 0.61345315], + ['0.0000', '0.1227', '0.2454', '0.3681', '0.4908', '0.6135'], ), ], ) -def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks): +def test_plot_io_cost_y_ticks_and_labels(logname, expected_yticks, expected_yticklabels): # check the y-axis tick marks are at the appropriate # locations and the labels are as expected - # create the expected y-axis tick labels from the y ticks - expected_yticklabels = [str(i) for i in expected_yticks] - logpath = get_log_path(logname) with darshan.DarshanReport(logpath) as report: fig = plot_io_cost(report=report) diff --git a/darshan-util/pydarshan/darshan/tests/test_report.py b/darshan-util/pydarshan/darshan/tests/test_report.py index e1a7eab70..72ac8dc9e 100644 --- a/darshan-util/pydarshan/darshan/tests/test_report.py +++ b/darshan-util/pydarshan/darshan/tests/test_report.py @@ -85,6 +85,72 @@ def test_load_records_filtered(): assert 1 == len(report.data['records']['POSIX']) assert 1 == len(report.data['records']['MPI-IO']) +def test_dfs_daos_posix_match(): + # the ior runs by Shane with POSIX vs. DAOS DFS + # backend should produce matching counters where + # comparable data fields exist + posix_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-POSIX_id1057716-202103_11-8-64415-6936117869459351096_1.darshan")) + dfs_ior_report = darshan.DarshanReport(get_log_path("snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan")) + posix_ior_report.mod_read_all_records("POSIX") + dfs_ior_report.mod_read_all_records("DFS") + dfs_ior_report.mod_read_all_records("DAOS") + posix_data_dict = posix_ior_report.data['records']["POSIX"].to_df()["counters"] + dfs_data_dict = dfs_ior_report.data['records']["DFS"].to_df()["counters"] + daos_data_dict = dfs_ior_report.data['records']["DAOS"].to_df()["counters"] + dfs_ior_name_recs = dfs_ior_report.data["name_records"] + + # also gather counters for the underlying DAOS record for the DFS record + # (they have the same record ID, simplifying this a bit) + dfs_hash = dfs_data_dict["id"][0] + daos_data_dict = daos_data_dict[daos_data_dict["id"] == dfs_hash] + for column_name in dfs_data_dict.columns: + # for some columns we can't reasonably expect a match + # or we need to handle the data differently between POSIX + # and DAOS DFS + if column_name in ["id", "DFS_LOOKUPS", "DFS_DUPS", "DFS_NB_READS", "DFS_NB_WRITES", + "DFS_GET_SIZES", "DFS_PUNCHES", "DFS_REMOVES", "DFS_STATS", + "DFS_CHUNK_SIZE", "DFS_FASTEST_RANK", "DFS_SLOWEST_RANK", + "DFS_FASTEST_RANK_BYTES", "DFS_SLOWEST_RANK_BYTES", + "DFS_MAX_READ_TIME_SIZE", "DFS_MAX_WRITE_TIME_SIZE", + "DFS_GLOBAL_OPENS", "DFS_READXS", "DFS_WRITEXS"]: + continue + elif column_name == "DFS_OPENS": + # sum these together to match the POSIX version + dfs_data = (dfs_data_dict["DFS_GLOBAL_OPENS"] + + dfs_data_dict["DFS_OPENS"]) + elif column_name == "DFS_READS": + # sum these together to match the POSIX version + dfs_data = (dfs_data_dict["DFS_READS"] + + dfs_data_dict["DFS_READXS"]) + # we know the hardcoded value for certain + assert dfs_data.values == 64 + elif column_name == "DFS_WRITES": + # sum these together to match the POSIX version + dfs_data = (dfs_data_dict["DFS_WRITES"] + + dfs_data_dict["DFS_WRITEXS"]) + # we know the hardcoded value for certain + assert dfs_data.values == 64 + else: + dfs_data = dfs_data_dict[column_name] + posix_column_name = column_name.replace("DFS", "POSIX") + posix_data = posix_data_dict[posix_column_name] + assert_allclose(dfs_data.values, posix_data.values) + # also check the DAOS-level data + daos_column_name = column_name.replace("DFS", "DAOS") + if daos_column_name == "DAOS_OPENS": + # this won't match exactly + continue + elif daos_column_name in ["DAOS_READS", "DAOS_WRITES"]: + daos_column_name = daos_column_name.replace("DAOS", "DAOS_ARRAY") + daos_data = daos_data_dict[daos_column_name] + assert_allclose(dfs_data.values, daos_data.values) + if column_name.endswith("BYTES_WRITTEN"): + # we know the hardcoded value for certain + # 256 KiB * 16 + assert dfs_data.values == 16777216 + assert daos_data.values == 16777216 + + @pytest.mark.parametrize("unsupported_record", ["DXT_POSIX", "DXT_MPIIO", "LUSTRE", "APMPI", "APXC"] ) diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index f5eb71e8a..c61f4bc18 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -263,12 +263,15 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): if ("e3sm_io_heatmap_only" in log_filepath or "shane_ior-HDF5" in log_filepath or "shane_ior-PNETCDF" in log_filepath or + "snyder_ior-DFS" in log_filepath or (match and int(darshan_log_version[2]) >= 4)): assert actual_runtime_heatmap_titles == 3 elif ("runtime_and_dxt_heatmaps_diagonal_write_only" in log_filepath or "treddy_runtime_heatmap_inactive_ranks" in log_filepath or "h5d_no_h5f" in log_filepath): assert actual_runtime_heatmap_titles == 1 + elif "snyder_ior-POSIX" in log_filepath: + assert actual_runtime_heatmap_titles == 2 else: assert actual_runtime_heatmap_titles == 0 @@ -494,6 +497,23 @@ def test_metadata_table(self, log_path, expected_df): ], ), 2, + ), + ( + "snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan", + pd.DataFrame( + index=[ + "Log Filename", "Runtime Library Version", "Log Format Version", + "POSIX (ver=4)", + "STDIO (ver=2)", "HEATMAP (ver=1)", + "DFS (ver=1)", "DAOS (ver=1)" + ], + data=[ + ["snyder_ior-DFS_id4681120-53379_5-8-15060-3270540599978592154_1.darshan"], ["3.4.7"], ["3.41"], + ["0.07 KiB"], ["0.07 KiB"], ["1.09 KiB"], + ["0.17 KiB"], ["0.40 KiB"], + ], + ), + 0, ) ], ) diff --git a/include/darshan-daos-log-format.h b/include/darshan-daos-log-format.h new file mode 100644 index 000000000..0240c3800 --- /dev/null +++ b/include/darshan-daos-log-format.h @@ -0,0 +1,182 @@ +/* + * (C) 2020 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __DARSHAN_DAOS_LOG_FORMAT_H +#define __DARSHAN_DAOS_LOG_FORMAT_H + +/* current DAOS log format version */ +#define DARSHAN_DAOS_VER 1 + +#define DAOS_COUNTERS \ + /* count of daos obj opens */\ + X(DAOS_OBJ_OPENS) \ + /* count of daos obj fetches */\ + X(DAOS_OBJ_FETCHES) \ + /* count of daos obj updates */\ + X(DAOS_OBJ_UPDATES) \ + /* count of daos obj punches */\ + X(DAOS_OBJ_PUNCHES) \ + /* count of daos obj dkey punches */\ + X(DAOS_OBJ_DKEY_PUNCHES) \ + /* count of daos obj akey punches */\ + X(DAOS_OBJ_AKEY_PUNCHES) \ + /* count of daos obj dkey lists */\ + X(DAOS_OBJ_DKEY_LISTS) \ + /* count of daos obj akey lists */\ + X(DAOS_OBJ_AKEY_LISTS) \ + /* count of daos obj recx lists */\ + X(DAOS_OBJ_RECX_LISTS) \ + /* count of daos array opens */\ + X(DAOS_ARRAY_OPENS) \ + /* count of daos array reads */\ + X(DAOS_ARRAY_READS) \ + /* count of daos array writes */\ + X(DAOS_ARRAY_WRITES) \ + /* count of daos array get sizes */\ + X(DAOS_ARRAY_GET_SIZES) \ + /* count of daos array set sizes */\ + X(DAOS_ARRAY_SET_SIZES) \ + /* count of daos array stats */\ + X(DAOS_ARRAY_STATS) \ + /* count of daos array punches */\ + X(DAOS_ARRAY_PUNCHES) \ + /* count of daos array destroys */\ + X(DAOS_ARRAY_DESTROYS) \ + /* count of daos kv opens */\ + X(DAOS_KV_OPENS) \ + /* count of daos kv gets */\ + X(DAOS_KV_GETS) \ + /* count of daos kv puts */\ + X(DAOS_KV_PUTS) \ + /* count of daos kv removes */\ + X(DAOS_KV_REMOVES) \ + /* count of daos kv lists */\ + X(DAOS_KV_LISTS) \ + /* count of daos kv destroys */\ + X(DAOS_KV_DESTROYS) \ + /* count of daos non-blocking operations */\ + X(DAOS_NB_OPS) \ + /* total bytes read */\ + X(DAOS_BYTES_READ) \ + /* total bytes written */\ + X(DAOS_BYTES_WRITTEN) \ + /* number of times switched between read and write */\ + X(DAOS_RW_SWITCHES) \ + X(DAOS_MAX_READ_TIME_SIZE) \ + X(DAOS_MAX_WRITE_TIME_SIZE) \ + /* buckets for daos read size ranges */\ + X(DAOS_SIZE_READ_0_100) \ + X(DAOS_SIZE_READ_100_1K) \ + X(DAOS_SIZE_READ_1K_10K) \ + X(DAOS_SIZE_READ_10K_100K) \ + X(DAOS_SIZE_READ_100K_1M) \ + X(DAOS_SIZE_READ_1M_4M) \ + X(DAOS_SIZE_READ_4M_10M) \ + X(DAOS_SIZE_READ_10M_100M) \ + X(DAOS_SIZE_READ_100M_1G) \ + X(DAOS_SIZE_READ_1G_PLUS) \ + /* buckets for daos write size ranges */\ + X(DAOS_SIZE_WRITE_0_100) \ + X(DAOS_SIZE_WRITE_100_1K) \ + X(DAOS_SIZE_WRITE_1K_10K) \ + X(DAOS_SIZE_WRITE_10K_100K) \ + X(DAOS_SIZE_WRITE_100K_1M) \ + X(DAOS_SIZE_WRITE_1M_4M) \ + X(DAOS_SIZE_WRITE_4M_10M) \ + X(DAOS_SIZE_WRITE_10M_100M) \ + X(DAOS_SIZE_WRITE_100M_1G) \ + X(DAOS_SIZE_WRITE_1G_PLUS) \ + /* the four most frequently appearing access sizes */\ + X(DAOS_ACCESS1_ACCESS) \ + X(DAOS_ACCESS2_ACCESS) \ + X(DAOS_ACCESS3_ACCESS) \ + X(DAOS_ACCESS4_ACCESS) \ + /* count of each of the most frequent access sizes */\ + X(DAOS_ACCESS1_COUNT) \ + X(DAOS_ACCESS2_COUNT) \ + X(DAOS_ACCESS3_COUNT) \ + X(DAOS_ACCESS4_COUNT) \ + /* daos obj otype id */\ + X(DAOS_OBJ_OTYPE) \ + /* cell size of the daos array */\ + X(DAOS_ARRAY_CELL_SIZE) \ + /* chunk size of the daos array */\ + X(DAOS_ARRAY_CHUNK_SIZE) \ + /* rank and number of bytes moved for fastest/slowest ranks */\ + X(DAOS_FASTEST_RANK) \ + X(DAOS_FASTEST_RANK_BYTES) \ + X(DAOS_SLOWEST_RANK) \ + X(DAOS_SLOWEST_RANK_BYTES) \ + /* end of counters */\ + X(DAOS_NUM_INDICES) + +#define DAOS_F_COUNTERS \ + /* timestamp of first open */\ + X(DAOS_F_OPEN_START_TIMESTAMP) \ + /* timestamp of first read */\ + X(DAOS_F_READ_START_TIMESTAMP) \ + /* timestamp of first write */\ + X(DAOS_F_WRITE_START_TIMESTAMP) \ + /* timestamp of first close */\ + X(DAOS_F_CLOSE_START_TIMESTAMP) \ + /* timestamp of last open */\ + X(DAOS_F_OPEN_END_TIMESTAMP) \ + /* timestamp of last read */\ + X(DAOS_F_READ_END_TIMESTAMP) \ + /* timestamp of last write */\ + X(DAOS_F_WRITE_END_TIMESTAMP) \ + /* timestamp of last close */\ + X(DAOS_F_CLOSE_END_TIMESTAMP) \ + /* cumulative daos read time */\ + X(DAOS_F_READ_TIME) \ + /* cumulative daos write time */\ + X(DAOS_F_WRITE_TIME) \ + /* cumulative daos meta time */\ + X(DAOS_F_META_TIME) \ + /* maximum daos read duration */\ + X(DAOS_F_MAX_READ_TIME) \ + /* maximum daos write duration */\ + X(DAOS_F_MAX_WRITE_TIME) \ + /* total i/o and meta time consumed for fastest/slowest ranks */\ + X(DAOS_F_FASTEST_RANK_TIME) \ + X(DAOS_F_SLOWEST_RANK_TIME) \ + /* end of counters */\ + X(DAOS_F_NUM_INDICES) + +#define X(a) a, +/* integer statistics for DAOS object records */ +enum darshan_daos_indices +{ + DAOS_COUNTERS +}; + +/* floating point statistics for DAOS object records */ +enum darshan_daos_f_indices +{ + DAOS_F_COUNTERS +}; +#undef X + +/* record structure for DAOS objects. a record is created and stored for + * every DAOS object opened by the original application. For the DAOS module, + * the record includes: + * - a darshan_base_record structure, which contains the record id & rank + * - integer I/O statistics (open, read/write counts, etc) + * - floating point I/O statistics (timestamps, cumulative timers, etc.) + * - pool and container UUIDs + * - 128-bit OID (upper 64-bits in oid_hi and lower 64-bits in oid_lo) + */ +struct darshan_daos_object +{ + struct darshan_base_record base_rec; + int64_t counters[DAOS_NUM_INDICES]; + double fcounters[DAOS_F_NUM_INDICES]; + unsigned char pool_uuid[16]; + unsigned char cont_uuid[16]; + uint64_t oid_hi; + uint64_t oid_lo; +}; + +#endif /* __DARSHAN_DAOS_LOG_FORMAT_H */ diff --git a/include/darshan-dfs-log-format.h b/include/darshan-dfs-log-format.h new file mode 100644 index 000000000..38fc0979e --- /dev/null +++ b/include/darshan-dfs-log-format.h @@ -0,0 +1,155 @@ +/* + * (C) 2020 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __DARSHAN_DFS_LOG_FORMAT_H +#define __DARSHAN_DFS_LOG_FORMAT_H + +/* current DFS log format version */ +#define DARSHAN_DFS_VER 1 + +#define DFS_COUNTERS \ + /* count of dfs opens */\ + X(DFS_OPENS) \ + /* count of dfs global (global2local) opens */\ + X(DFS_GLOBAL_OPENS) \ + /* count of dfs lookups */\ + X(DFS_LOOKUPS) \ + /* count of dfs dups */\ + X(DFS_DUPS) \ + /* count of dfs reads */\ + X(DFS_READS) \ + /* count of dfs readxs */\ + X(DFS_READXS) \ + /* count of dfs writes */\ + X(DFS_WRITES) \ + /* count of dfs writexs */\ + X(DFS_WRITEXS) \ + /* count of total non-blocking dfs reads (including read/readx) */\ + X(DFS_NB_READS) \ + /* count of total non-blocking dfs writes (including write/writex) */\ + X(DFS_NB_WRITES) \ + /* count of dfs get sizes */\ + X(DFS_GET_SIZES) \ + /* count of dfs punches */\ + X(DFS_PUNCHES) \ + /* count of dfs removes */\ + X(DFS_REMOVES) \ + /* count of dfs stats */\ + X(DFS_STATS) \ + /* total bytes read */\ + X(DFS_BYTES_READ) \ + /* total bytes written */\ + X(DFS_BYTES_WRITTEN) \ + /* number of times switched between read and write */\ + X(DFS_RW_SWITCHES) \ + X(DFS_MAX_READ_TIME_SIZE) \ + X(DFS_MAX_WRITE_TIME_SIZE) \ + /* buckets for dfs read size ranges */\ + X(DFS_SIZE_READ_0_100) \ + X(DFS_SIZE_READ_100_1K) \ + X(DFS_SIZE_READ_1K_10K) \ + X(DFS_SIZE_READ_10K_100K) \ + X(DFS_SIZE_READ_100K_1M) \ + X(DFS_SIZE_READ_1M_4M) \ + X(DFS_SIZE_READ_4M_10M) \ + X(DFS_SIZE_READ_10M_100M) \ + X(DFS_SIZE_READ_100M_1G) \ + X(DFS_SIZE_READ_1G_PLUS) \ + /* buckets for dfs write size ranges */\ + X(DFS_SIZE_WRITE_0_100) \ + X(DFS_SIZE_WRITE_100_1K) \ + X(DFS_SIZE_WRITE_1K_10K) \ + X(DFS_SIZE_WRITE_10K_100K) \ + X(DFS_SIZE_WRITE_100K_1M) \ + X(DFS_SIZE_WRITE_1M_4M) \ + X(DFS_SIZE_WRITE_4M_10M) \ + X(DFS_SIZE_WRITE_10M_100M) \ + X(DFS_SIZE_WRITE_100M_1G) \ + X(DFS_SIZE_WRITE_1G_PLUS) \ + /* the four most frequently appearing access sizes */\ + X(DFS_ACCESS1_ACCESS) \ + X(DFS_ACCESS2_ACCESS) \ + X(DFS_ACCESS3_ACCESS) \ + X(DFS_ACCESS4_ACCESS) \ + /* count of each of the most frequent access sizes */\ + X(DFS_ACCESS1_COUNT) \ + X(DFS_ACCESS2_COUNT) \ + X(DFS_ACCESS3_COUNT) \ + X(DFS_ACCESS4_COUNT) \ + /* dfs file chunk size */\ + X(DFS_CHUNK_SIZE) \ + /* rank and number of bytes moved for fastest/slowest ranks */\ + X(DFS_FASTEST_RANK) \ + X(DFS_FASTEST_RANK_BYTES) \ + X(DFS_SLOWEST_RANK) \ + X(DFS_SLOWEST_RANK_BYTES) \ + /* end of counters */\ + X(DFS_NUM_INDICES) + +#define DFS_F_COUNTERS \ + /* timestamp of first open */\ + X(DFS_F_OPEN_START_TIMESTAMP) \ + /* timestamp of first read */\ + X(DFS_F_READ_START_TIMESTAMP) \ + /* timestamp of first write */\ + X(DFS_F_WRITE_START_TIMESTAMP) \ + /* timestamp of first close */\ + X(DFS_F_CLOSE_START_TIMESTAMP) \ + /* timestamp of last open */\ + X(DFS_F_OPEN_END_TIMESTAMP) \ + /* timestamp of last read */\ + X(DFS_F_READ_END_TIMESTAMP) \ + /* timestamp of last write */\ + X(DFS_F_WRITE_END_TIMESTAMP) \ + /* timestamp of last close */\ + X(DFS_F_CLOSE_END_TIMESTAMP) \ + /* cumulative dfs read time */\ + X(DFS_F_READ_TIME) \ + /* cumulative dfs write time */\ + X(DFS_F_WRITE_TIME) \ + /* cumulative dfs meta time */\ + X(DFS_F_META_TIME) \ + /* maximum dfs read duration */\ + X(DFS_F_MAX_READ_TIME) \ + /* maximum dfs write duration */\ + X(DFS_F_MAX_WRITE_TIME) \ + /* total i/o and meta time consumed for fastest/slowest ranks */\ + X(DFS_F_FASTEST_RANK_TIME) \ + X(DFS_F_SLOWEST_RANK_TIME) \ + /* end of counters */\ + X(DFS_F_NUM_INDICES) + +#define X(a) a, +/* integer statistics for DFS file records */ +enum darshan_dfs_indices +{ + DFS_COUNTERS +}; + +/* floating point statistics for DFS file records */ +enum darshan_dfs_f_indices +{ + DFS_F_COUNTERS +}; +#undef X + +/* file record structure for DFS files. a record is created and stored for + * every DFS file opened by the original application. For the DFS module, + * the record includes: + * - a darshan_base_record structure, which contains the record id & rank + * - integer file I/O statistics (open, read/write counts, etc) + * - floating point I/O statistics (timestamps, cumulative timers, etc.) + * - pool and container UUIDs + */ +struct darshan_dfs_file +{ + struct darshan_base_record base_rec; + int64_t counters[DFS_NUM_INDICES]; + double fcounters[DFS_F_NUM_INDICES]; + unsigned char pool_uuid[16]; + unsigned char cont_uuid[16]; +}; + +#endif /* __DARSHAN_DFS_LOG_FORMAT_H */ diff --git a/include/darshan-log-format.h b/include/darshan-log-format.h index 4fbf37b53..a600635b9 100644 --- a/include/darshan-log-format.h +++ b/include/darshan-log-format.h @@ -129,6 +129,8 @@ struct darshan_base_record #include "darshan-apmpi-log-format.h" #endif #include "darshan-heatmap-log-format.h" +#include "darshan-dfs-log-format.h" +#include "darshan-daos-log-format.h" /* X-macro for keeping module ordering consistent */ /* NOTE: first val used to define module enum values, @@ -175,7 +177,9 @@ struct darshan_base_record X(DARSHAN_MDHIM_MOD, "MDHIM", DARSHAN_MDHIM_VER, &mdhim_logutils) \ X(DARSHAN_APXC_MOD, "APXC", __APXC_VER, __apxc_logutils) \ X(DARSHAN_APMPI_MOD, "APMPI", __APMPI_VER, __apmpi_logutils) \ - X(DARSHAN_HEATMAP_MOD, "HEATMAP", DARSHAN_HEATMAP_VER, &heatmap_logutils) + X(DARSHAN_HEATMAP_MOD, "HEATMAP", DARSHAN_HEATMAP_VER, &heatmap_logutils) \ + X(DARSHAN_DFS_MOD, "DFS", DARSHAN_DFS_VER, &dfs_logutils) \ + X(DARSHAN_DAOS_MOD, "DAOS", DARSHAN_DAOS_VER, &daos_logutils) /* unique identifiers to distinguish between available darshan modules */ /* NOTES: - valid ids range from [0...DARSHAN_MAX_MODS-1]