diff --git a/.readthedocs.yaml b/.readthedocs.yaml index abd0de459..5edd6333a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -2,13 +2,30 @@ version: "2" build: os: "ubuntu-22.04" + apt_packages: + - autoconf + - automake + - libtool + - libtool-bin + - m4 tools: python: "3.10" + jobs: + pre_install: + # configure and install of darshan-util is required + # Note DARSHAN_INSTALL_PREFIX is a readthedocs environment variable which + # must be defined in the readthedocs dashboard + - cd darshan-util && autoreconf -i + - mkdir -p build && cd build && ../darshan-util/configure --prefix=$DARSHAN_INSTALL_PREFIX && make -j4 install + # create pydarshan doc files + - cd darshan-util/pydarshan && pip install -r requirements_dev.txt && make docs python: install: - requirements: readthedocs/requirements.txt + - method: pip + path: darshan-util/pydarshan sphinx: configuration: conf.py diff --git a/conf.py b/conf.py index e09d6c414..efa467581 100644 --- a/conf.py +++ b/conf.py @@ -1,29 +1,131 @@ -# Configuration file for the Sphinx documentation builder. +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Documentation build configuration file. +# This file must be stored at the root directory with name "conf.py". +# -# -- Project information +import darshan +# -- General configuration --------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode', + 'sphinx.ext.intersphinx', + 'sphinx.ext.napoleon', +] + + +napoleon_google_docstring = True +napoleon_numpy_docstring = False +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = True +napoleon_include_special_with_doc = True +napoleon_use_ivar = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = {'.rst': 'restructuredtext'} + +# The master toctree document. +master_doc = 'index' + +# General information about the project. project = u'Darshan' copyright = u"2025, Argonne National Laboratory" author = u"" -release = '0.1' -version = '0.1.0' +# The version info for the project you're documenting, acts as replacement +# for |version| and |release|, also used in various other places throughout +# the built documents. +# +# The short X.Y version. +version = darshan.__version__ +# The full version, including alpha/beta/rc tags. +release = darshan.__version__ -# -- General configuration +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -extensions = [ - 'sphinx.ext.duration', - 'sphinx.ext.doctest', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +#html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# -- Options for HTMLHelp output --------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'darshandoc' + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'darshan.tex', u'Darshan Documentation', u'Argonne National Laboratory', 'manual'), ] +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'darshan', u'Darshan Documentation', author, 'darshan', 'Utilities for Darshan HPC I/O instrumentation.', 'Miscellaneous'), +] + + intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), - 'sphinx': ('https://www.sphinx-doc.org/en/master/', None), + "python": ('https://docs.python.org/', None), + "matplotlib": ("https://matplotlib.org", None), } -intersphinx_disabled_domains = ['std'] - -templates_path = ['_templates'] diff --git a/darshan-runtime/doc/DXT-overhead.jpg b/darshan-runtime/doc/DXT-overhead.jpg new file mode 100644 index 000000000..3ffb18efe Binary files /dev/null and b/darshan-runtime/doc/DXT-overhead.jpg differ diff --git a/darshan-runtime/doc/darshan-runtime.rst b/darshan-runtime/doc/darshan-runtime.rst new file mode 100644 index 000000000..3d14f9164 --- /dev/null +++ b/darshan-runtime/doc/darshan-runtime.rst @@ -0,0 +1,1153 @@ +************************************** +Darshan-runtime installation and usage +************************************** + +Introduction +============================================================================== + +This document describes darshan-runtime, which is the instrumentation portion +of the Darshan characterization tool. It should be installed on the system +where you intend to collect I/O characterization information. + +Darshan instruments applications via either compile time wrappers or dynamic +library preloading. An application that has been instrumented with Darshan +will produce a single log file each time it is executed. This log summarizes +the I/O access patterns used by the application. + +The darshan-runtime instrumentation has traditionally only supported MPI +applications (specifically, those that call ``MPI_Init()`` and +``MPI_Finalize()``), but, as of version 3.2.0, Darshan also supports +instrumentation of non-MPI applications. Regardless of whether MPI is used, +Darshan provides detailed statistics about POSIX level file accesses made by +the application. In the case of MPI applications, Darshan additionally +captures details on MPI-IO and HDF5 level access, as well as limited +information about PnetCDF access. Note that instrumentation of non-MPI +applications is currently only supported in Darshan's shared library, which +applications must ``LD_PRELOAD``. + +Starting in version 3.0.0, Darshan also exposes an API that can be used to +develop and add new instrumentation modules (for other I/O library interfaces +or to gather system-specific data, for instance), as detailed in +`Darshan Modularization `_. +Newly contributed modules include a module for gathering system-specific +parameters for jobs running on BG/Q systems, a module for gathering Lustre +striping data for files on Lustre file systems, and a module for instrumenting +stdio (i.e., stream I/O functions like ``fopen()``, ``fread()``, etc.) + +Starting in version 3.1.3, Darshan also allows for full tracing of application +I/O workloads using the newly developed Darshan eXtended Tracing (DxT) +instrumentation module. This module can be selectively enabled at runtime to +provide high-fidelity traces of an application's I/O workload, as opposed to +the coarse-grained I/O summary data that Darshan has traditionally provided. +Currently, DxT only traces at the POSIX and MPI-IO layers. Figure below +demonstrates the low overhead of DxT tracing, an example run collected from +Cori at NERSC, which offers comparable performance to Darshan's traditional +coarse-grained instrumentation methods. + +.. image:: DXT-overhead.jpg + :align: center + :width: 500 + :alt: Measure DXT Overhead with IOR Benchmark on Cori + +Starting in version 3.4.5, Darshan facilitates real-time collection of +comprehensive application I/O workload traces through the newly integrated +Darshan LDMS data module, known as the darshanConnector. Leveraging the +Lightweight Distributed Metric Service (LDMS) streams API, the darshanConnector +collects, transports and/or stores traces of application I/O operations +instrumented by Darshan at runtime. This module can only be enabled if the LDMS +library is included in the Darshan build process. For more information about +LDMS or LDMS streams please refer to the official `LDMS documentation +`_. + +This document provides generic installation instructions, but "recipes" for +several common HPC systems are provided at the end of the document as well. + +More information about Darshan can be found at the +`Darshan web site `_. + +Requirements +============================================================================== + +* C compiler (preferably GCC-compatible) +* zlib development headers and library + +Conventional installation +============================================================================== + +Compilation +---------------------------------------- + +**Configure and build example (with MPI support)** + + .. code-block:: bash + + tar -xvzf darshan-.tar.gz + cd darshan-/ + ./prepare.sh + cd darshan-runtime/ + ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc + make + make install + + +**Configure and build example (without MPI support)** + + .. code-block:: bash + + tar -xvzf darshan-.tar.gz + cd darshan-/ + ./prepare.sh + cd darshan-runtime/ + ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID --without-mpi CC=gcc + make + make install + +**Explanation of configure arguments:** + +* ``--with-mem-align=NUM``: This value is system-dependent and will be used by + Darshan to determine if the buffer for a read or write operation is + aligned in memory (default is 8). +* ``--with-jobid-env=NAME`` (mandatory): this specifies the environment + variable that Darshan should check to determine the jobid of a job. Common + values are ``PBS_JOBID`` or ``COBALT_JOBID``. If you are not using a + scheduler (or your scheduler does not advertise the job ID) then you can + specify ``NONE`` here. Darshan will fall back to using the pid of the rank 0 + process if the specified environment variable is not set. +* ``--with-username-env=NAME``: this specifies the environment variable that + Darshan should check to determine the username for a job. If not specified, + Darshan will use internal mechanisms to try to determine the username. + + .. note:: + Darshan relies on the ``LOGNAME`` environment variable to determine a + username, but this method isn't always reliable (e.g., on Slurm systems, + ``LOGNAME`` can be wiped when specifying additional environment + variables using the ``--export`` option to ``srun``). This configure + option allows specification of an additional environment variable to + extract a username from (e.g., ``SLURM_JOB_USER``). +* ``--with-log-path=DIR`` (this, or ``--with-log-path-by-env``, is mandatory): + This specifies the parent directory for the directory tree where Darshan logs + will be placed. + + .. note:: + After installation, any user can display the configured path with the + ``darshan-config --log-path`` command. +* ``--with-log-path-by-env=NAME1,NAME2,...``: specifies a comma separated list + of environment variables to check at runtime for log path location before the + one set by ``--with-log-path=DIR`` at configure time. +* ``--with-log-hints=hint1=x;hint2=y,...``: specifies hints to use when writing + the Darshan log file. See ``./configure --help`` for details. +* ``--with-mod-mem=NUM``: specifies the maximum amount of memory (in MiB) that + active Darshan instrumentation modules can collectively consume. +* ``--with-zlib=DIR``: specifies an alternate location for the zlib development + header and library. +* ``--without-mpi``: disables MPI support when building Darshan - MPI support is + assumed if not specified. +* ``--enable-mmap-logs``: enables the use of Darshan's mmap log file mechanism. +* ``--enable-cuserid``: enables use of cuserid() at runtime. +* ``--disable-ld-preload``: disables building of the Darshan ``LD_PRELOAD`` library +* ``--enable-group-readable-logs``: sets Darshan log file permissions to allow + group read access. +* ``--disable-exit-wrapper``: disables wrapping of ``_exit()`` calls as last + ditch shutdown hook for the Darshan library when used in non-MPI mode. +* ``CC=``: specifies the C compiler to use for compilation. + +**Configure arguments for controlling which Darshan modules to use:** + +* ``--disable-posix-mod``: disables compilation and use of Darshan's POSIX + module (default=enabled) +* ``--disable-mpiio-mod``: disables compilation and use of Darshan's MPI-IO + module (default=enabled) +* ``--disable-stdio-mod``: disables compilation and use of Darshan's STDIO + module (default=enabled) +* ``--disable-dxt-mod``: disables compilation and use of Darshan's DXT module + (default=enabled) +* ``--enable-hdf5-mod``: enables compilation and use of Darshan's HDF5 module + (default=disabled) +* ``--with-hdf5=DIR``: + installation directory for HDF5 + + .. note:: + * Users must call ``--enable-hdf5-mod`` to enable HDF5 modules, + ``--with-hdf5`` is only used to additionally provide an HDF5 install + prefix. + * HDF5 instrumentation only works on HDF5 library versions >= 1.8, and + further requires that the HDF5 library used to build Darshan and the + HDF5 library being linked in either both be version >=1.10 or both be + version < 1.10. + * This option does not work with the profile configuration + instrumentation method described in the "Instrumenting applications" + section :ref:`Sec Use Profile`. +* ``--enable-pnetcdf-mod``: enables compilation and use of Darshan's PnetCDF + module (default=disabled) +* ``--with-pnetcdf=DIR``: + installation directory for PnetCDF + + .. note:: + * Users must call ``--enable-pnetcdf-mod`` to enable PnetCDF modules, + ``--with-pnetcdf`` is only used to additionally provide a PnetCDF + install prefix. + * PnetCDF instrumentation only works on PnetCDF library versions >= 1.8. +* ``--disable-lustre-mod``: disables compilation and use of Darshan's Lustre + module (default=enabled) +* ``--enable-daos-mod``: enables compilation and use of Darshan's DAOS module + (default=disabled) +* ``--with-daos=DIR``: + installation directory for DAOS + + .. note:: + Users must call ``--enable-daos-mod`` to enable DAOS modules, + ``--with-daos`` is only used to additionally provide a DAOS install + prefix. +* ``--enable-mdhim-mod``: enables compilation and use of Darshan's MDHIM module + (default=disabled) +* ``--enable-ldms-mod``: enables compilation and use of Darshan’s LDMS runtime + module (default=disabled) +* ``--with-ldms=DIR``: + installation directory for LDMS + + .. note:: + * Users must use the configuration flags ``--enable-ldms-mod`` and + ``--with-ldms=DIR`` to enable runtime data collection via LDMS. + * To collect runtime I/O information from Darshan, you will need to + configure, initialize, and connect to an LDMS streams daemon. For + detailed instructions please visit + `Running An LDMS Streams Daemon for Darshan `_. + * If LDMS is not installed on the system, please visit “Getting the + Source” and “Building the Source” in the + `LDMS Quick Start Guide `_. + +Environment preparation +---------------------------------------- + +Once darshan-runtime has been installed, you must prepare a location in which +to store the Darshan log files and configure an instrumentation method. + +This step can be safely skipped if you configured darshan-runtime using the +``--with-log-path-by-env`` option. A more typical configuration uses a static +directory hierarchy for Darshan log files. + +The ``darshan-mk-log-dirs.pl`` utility will configure the path specified at +configure time to include subdirectories organized by year, month, and day in +which log files will be placed. The deepest subdirectories will have sticky +permissions to enable multiple users to write to the same directory. If the +log directory is shared system-wide across many users then the following script +should be run as root. + + .. code-block:: bash + + darshan-mk-log-dirs.pl + +.. note:: + **A note about finding log paths after installation** - + Regardless of whether a Darshan installation is using the ``--with-log-path`` or + ``--with-log-path-by-env`` option, end users can display the path (and/or + environment variables) at any time by running ``darshan-config --log-path`` + on the command line. + +.. note:: + **A note about log directory permissions** - + All log files written by Darshan have permissions set to only allow + read access by the owner of the file. You can modify this behavior, + however, by specifying the --enable-group-readable-logs option at + configure time. One notable deployment scenario would be to configure + Darshan and the log directories to allow all logs to be readable by both the + end user and a Darshan administrators group. This can be done with the + following steps: + + * set the --enable-group-readable-logs option at configure time + * create the log directories with darshan-mk-log-dirs.pl + * recursively set the group ownership of the log directories to the Darshan + administrators group + * recursively set the setgid bit on the log directories + +Spack installation +============================================================================== + +You can also install Darshan via `Spack `_ as an alternative +to manual download, compilation, and installation. This may be especially +convenient for single-user installs. Darshan is divided into two separate +packages for the command line utilities and runtime instrumentation. You can +install either or both as follows: + + .. code-block:: bash + + spack install darshan-util + spack install darshan-runtime + +.. note:: + Darshan will generally compile and install fine using a variety of + compilers, but we advise using a gcc compiler in Spack to compile Darshan + (regardless of what compiler you will use for your applications) to + ensure maximum runtime compatibility. + +You can use the ``spack info darshan-runtime`` query to view the full list of +variants available for the darshan-runtime Spack package. For example, adding +a ``+slurm`` to the command line (``spack install darshan-runtime+slurm``) will +cause Darshan to be compiled with support for gathering job ID information from +the Slurm scheduler. + +The following commands will load the Darshan packages once they have been +installed: + + .. code-block:: bash + + spack load -r darshan-util + spack load -r darshan-runtime + + +Note that the spack install of darshan-runtime will use an environment variable +named ``$DARSHAN_LOG_DIR_PATH`` to indicate where it should store log files. +This variable is set to the user's home directory by default when the package +is loaded, but it may be overridden. + +On Cray systems, you can also perform an additional step to load a +Cray-specific module file. This will make a module called ``darshan`` available +as described later in this document in the Cray platform recipe. It enables +automatic instrumentation when using the standard Cray compiler wrappers. + + .. code-block:: bash + + module use `spack location -i darshan-runtime`/share/craype-2.x/modulefiles + +Instrumenting applications +============================================================================== + +.. note:: + More specific installation "recipes" are provided later in this document + for some platforms. This section of the documentation covers general + techniques. + +Once Darshan has been installed and a log path has been prepared, the next step +is to actually instrument applications. The preferred method is to instrument +applications at compile time. + +Option 1: Instrumenting MPI applications at compile time +-------------------------------------------------------- + +This method is applicable to C, Fortran, and C++ MPI applications (regardless +of whether they are static or dynamically linked) and is the most +straightforward method to apply transparently system-wide. It works by +injecting additional libraries and options into the linker command line to +intercept relevant I/O calls. + +On Cray platforms you can enable the compile time instrumentation by simply +loading the Darshan module. It can then be enabled for all users by placing +that module in the default environment. As of Darshan 3.2.0 this will +instrument both static and dynamic executables, while in previous versions of +Darshan this was only sufficient for static executables. See the Cray +installation recipe for more details. + +For other general MPICH-based MPI implementations, you can generate +Darshan-enabled variants of the standard mpicc/mpicxx/mpif90/mpif77 wrappers +using the following commands: + + .. code-block:: bash + + darshan-gen-cc.pl `which mpicc` --output mpicc.darshan + darshan-gen-cxx.pl `which mpicxx` --output mpicxx.darshan + darshan-gen-fortran.pl `which mpif77` --output mpif77.darshan + darshan-gen-fortran.pl `which mpif90` --output mpif90.darshan + + +The resulting ``*.darshan`` wrappers will transparently inject Darshan +instrumentation into the link step without any explicit user intervention. +They can be renamed and placed in an appropriate PATH to enable automatic +instrumentation. This method also works correctly for both static and dynamic +executables as of Darshan 3.2.0. + +For other systems you can enable compile-time instrumentation by either +manually adding the appropriate link options to your command line or modifying +your default MPI compiler script. The ``darshan-config`` command line tool can +be used to display the options that you should use: + + .. code-block:: bash + + # Linker options to use for dynamic linking (default on most platforms) + # These arguments should go *before* the MPI libraries in the underlying + # linker command line to ensure that Darshan can be activated. It should + # also ideally go before other libraries that may issue I/O function calls. + darshan-config --dyn-ld-flags + + # linker options to use for static linking + # The first set of arguments should go early in the link command line + # (before MPI, while the second set should go at the end of the link command + # line + darshan-config --pre-ld-flags + darshan-config --post-ld-flags + +.. _Sec Use Profile: + +Using a profile configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The MPICH MPI implementation supports the specification of a profiling library +configuration that can be used to insert Darshan instrumentation without +modifying the existing MPI compiler script. You can enable a profiling +configuration using environment variables or command line arguments to the +compiler scripts: + +Example for MPICH 3.1.1 or newer: + + .. code-block:: bash + + export MPICC_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-cc + export MPICXX_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-cxx + export MPIFORT_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-f + + +Examples for command line use: + + .. code-block:: bash + + mpicc -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-c + mpicxx -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-cxx + mpif77 -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-f + mpif90 -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-f + + +Note that unlike the previously described methods in this section, this method +*will not* automatically adapt to static and dynamic linking options. The +example profile configurations show above only support dynamic linking. + +Example profile configurations are also provided with a "-static" suffix if you +need examples for static linking. + +Option 2: Instrumenting MPI applications at runtime +-------------------------------------------------------- + +This method is applicable to pre-compiled dynamically linked executables as +well as interpreted languages such as Python. You do not need to change your +compile options in any way. This method works by injecting instrumentation at +runtime. It will not work for statically linked executables. + +To use this mechanism, set the ``LD_PRELOAD`` environment variable to the full +path to the Darshan shared library. The preferred method of inserting Darshan +instrumentation in this case is to set the ``LD_PRELOAD`` variable specifically +for the application of interest. Typically this is possible using command line +arguments offered by the ``mpirun`` or ``mpiexec`` scripts or by the job +scheduler: + + .. code-block:: bash + + mpiexec -n 4 -env LD_PRELOAD /home/carns/darshan-install/lib/libdarshan.so mpi-io-test + + + .. code-block:: bash + + srun -n 4 --export=LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so mpi-io-test + + +For sequential invocations of MPI programs, the following will set +``LD_PRELOAD`` for process duration only: + + .. code-block:: bash + + env LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so mpi-io-test + + +Other environments may have other specific options for controlling this +behavior. Please check your local site documentation for details. + +It is also possible to just export ``LD_PRELOAD`` as follows, but it is +recommended against doing that to prevent Darshan and MPI symbols from being +pulled into unrelated binaries: + + .. code-block:: bash + + export LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so + + +.. note:: + For SGI systems running the MPT environment, it may be necessary to set the + ``MPI_SHEPHERD`` environment variable equal to ``true`` to avoid deadlock + when preloading the Darshan shared library. + +Option 3: Instrumenting non-MPI applications at runtime +-------------------------------------------------------- + +Similar to the process described in the previous section, Darshan relies on the +``LD_PRELOAD`` mechanism for instrumenting dynamically-linked non-MPI +applications. This allows Darshan to instrument dynamically-linked binaries +produced by non-MPI compilers (e.g., gcc or clang), extending Darshan +instrumentation to new contexts (like instrumentation of arbitrary Python +programs or instrumenting serial file transfer utilities like ``cp`` and +``scp``). + +The only additional step required of Darshan non-MPI users is to also set the +``DARSHAN_ENABLE_NONMPI`` environment variable to signal to Darshan that +non-MPI instrumentation is requested: + + .. code-block:: bash + + export DARSHAN_ENABLE_NONMPI=1 + + +As described in the previous section, it may be desirable to users to limit the +scope of Darshan's instrumentation by only enabling ``LD_PRELOAD`` on the +target executable: + + .. code-block:: bash + + env LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so io-test + + +.. note:: + Recall that Darshan instrumentation of non-MPI applications is only + possible with dynamically-linked applications. + +Using other profiling tools at the same time as Darshan +-------------------------------------------------------- + +As of Darshan version 3.2.0, Darshan does not necessarily interfere with other +profiling tools (particularly those using the PMPI profiling interface). +Darshan itself does not use the PMPI interface, and instead uses dynamic linker +symbol interception or --wrap function interception for static executables. + +As a rule of thumb most profiling tools should appear in the linker command +line *before* -ldarshan if possible. + +Using the Darshan eXtended Tracing (DXT) module +============================================================================== + +Darshan's DXT module provides full tracing of MPI-IO and POSIX read/write APIs. +While the DXT module is able to capture finer-grained details compared to +traditional Darshan instrumentation, it may exhibit higher runtime and memory +overheads. For this reason, DXT support is disabled by default in Darshan, but +users can opt-in to DXT instrumentation at runtime by setting their environment +as follows: + + .. code-block:: bash + + export DXT_ENABLE_IO_TRACE=1 + + +DXT will trace each I/O operation to files instrumented by Darshan's MPI-IO and +POSIX modules, using a default memory limit of 2 MiB for each module (DXT_POSIX +and DXT_MPIIO). Memory usage and a number of other aspects of DXT tracing can +be configured as described in section :ref:`Sec Conf Runtime`. + +Using AutoPerf instrumentation modules +============================================================================== + +AutoPerf offers two additional Darshan instrumentation modules that may be +enabled for MPI applications. + +* APMPI: Instrumentation of over 70 MPI-3 communication routines, providing + operation counts, datatype sizes, and timing information for each application + MPI rank. +* APXC: Instrumentation of Cray XC environments to provide network and compute + counters of interest, via PAPI. + +Users can request Darshan to build the APMPI and APXC modules by passing +``--enable-apmpi-mod`` and ``--enable-apxc-mod`` options to configure, +respectively. Note that these options can be requested independently (i.e., you +can build Darshan with APMPI support but not APXC support, and vice versa). + +The only prerequisite for the APMPI module is that Darshan be configured with a +MPI-3 compliant compiler. For APXC, the user must obviously be using a Cray XC +system and must make the PAPI interface available to Darshan (i.e., by running +``module load papi``, before building Darshan). + +If using the APMPI module, users can additionally specify the +``--enable-apmpi-coll-sync`` configure option to force Darshan to synchronize +before calling underlying MPI routines and to capture additional timing +information on how synchronized processes are. Users should note this option +will impose additional overheads, but can be useful to help diagnose whether +applications are spending a lot of time synchronizing as part of collective +communication calls. For this reason, we do not recommend users setting this +particular option for production Darshan deployments. + +.. note:: + The AutoPerf instrumentation modules are provided as Git submodules to + Darshan's main repository, so if building Darshan source that has been + cloned from Git, it is necessary to first retrieve the AutoPerf submodules + by running the following command: + + .. code-block:: bash + + git submodule update --init + + +.. _Sec Conf Runtime: + +Configuring Darshan library at runtime +============================================================================== + +To fine tune Darshan library settings (e.g., internal memory usage, +instrumentation scope, etc.), Darshan provides a couple of mechanisms: + +* user environment variable overrides +* a configuration file, which users must specify the path to using the + ``DARSHAN_CONFIG_PATH`` environment variable + +For settings that are specified via a config file and via an environment +variable, the environment settings will take precedence. + +.. note:: + Users of facility-provided Darshan installs should be mindful that these + installs could define their own default Darshan config file. In this case, + users should double check that ``DARSHAN_CONFIG_PATH`` environment variable + is not already set, and if it is, users should consider copying the default + config file as a starting point before applying their own settings. + +Darshan library config settings +---------------------------------------- + +The Darshan library honors the following settings to modify behavior at +runtime: + +**Table 1. Darshan library config settings** + +.. list-table:: + :header-rows: 1 + :widths: 20, 10, 70 + :class: longtable + :align: left + + * - environment variable setting + - config file setting + - description + * - DARSHAN_DISABLE=1 + - N/A + - Disables Darshan instrumentation. + * - DARSHAN_ENABLE_NONMPI=1 + - N/A + - Enables Darshan's non-MPI mode, required for applications that do not + + call MPI_Init and MPI_Finalize. + * - DARSHAN_CONFIG_PATH= + - N/A + - Specifies the path to a Darshan config file to load settings from. + * - DARSHAN_DUMP_CONFIG=1 + - DUMP_CONFIG + - Prints the Darshan configuration to stderr at runtime. + * - DARSHAN_DISABLE_SHARED_REDUCTION=1 + - DISABLE_SHARED_REDUCTION + - Disables the step in Darshan aggregation in which files that were + + accessed by all ranks are collapsed into a single cumulative file record + + at rank 0. This option retains more per-process information at the + + expense of creating larger log files. + * - DARSHAN_INTERNAL_TIMING=1 + - INTERNAL_TIMING + - Enables internal instrumentation that will print the time required to + + startup and shutdown Darshan to stderr at runtime. + * - DARSHAN_MODMEM= + - MODMEM + - Specifies the amount of memory (in MiB) Darshan instrumentation modules + + can collectively consume (if not specified, a default 4 MiB quota is + + used). Overrides any ``--with-mod-mem`` configure argument. + * - DARSHAN_NAMEMEM= + - NAMEMEM + - Specifies the amount of memory (in MiB) Darshan can consume for storing + + record names (if not specified, a default 1 MiB quota is used). + + Overrides any ``--with-name-mem`` configure argument. + * - DARSHAN_MEMALIGN= + - MEMALIGN + - Specifies a value for system memory alignment. Overrides any + + ``--with-mem-align`` configure argument (default is 8 bytes). + * - DARSHAN_JOBID= + - JOBID + - Specifies the name of the environment variable to use for the job + + identifier, such as PBS_JOBID. Overrides ``--with-jobid-env`` configure + + argument. + * - DARSHAN_LOGHINTS= + - LOGHINTS + - Specifies the MPI-IO hints to use when storing the Darshan output file. + + The format is a semicolon-delimited list of key=value pairs, for + + example: hint1=value1;hint2=value2. Overrides any ``--with-log-hints`` + + configure argument. + * - DARSHAN_LOGPATH= + - LOGPATH + - Specifies the path to write Darshan log files to. Note that this + + directory needs to be formatted using the darshan-mk-log-dirs script. + + Overrides any ``--with-log-path`` configure argument. + * - DARSHAN_MMAP_LOGPATH= + - MMAP_LOGPATH + - If Darshan's mmap log file mechanism is enabled, this variable specifies + + what path the mmap log files should be stored in (if not specified, log + + files will be stored in ``/tmp``). + * - DARSHAN_LOGFILE= + - N/A + - Specifies the path (directory + Darshan log file name) to write the + + output Darshan log to. This overrides the default Darshan behavior of + + automatically generating a log file name and adding it to a log file + + directory formatted using darshan-mk-log-dirs script. + * - DARSHAN_MOD_DISABLE= + - MOD_DISABLE + - Specifies a list of comma-separated Darshan module names to disable at + + runtime. + * - DARSHAN_MOD_ENABLE= + - MOD_ENABLE + - Specifies a list of comma-separated Darshan module names to enable at + + runtime. + * - DARSHAN_APP_EXCLUDE= + - APP_EXCLUDE + - Specifies a list of comma-separated regexes that match application names + + that should not be instrumented. This is useful if Darshan is + + ``LD_PRELOAD``, in which case logs may be generated for many unintended + + applications. + * - DARSHAN_APP_INCLUDE= + - APP_INCLUDE + - Specifies a list of comma-separated regexes that match application names + + that should be instrumented. This setting is used to override any + + APP_INCLUDE rules. + * - DARSHAN_RANK_EXCLUDE= + - RANK_EXCLUDE + - Specifies a list of comma-separated ranks (or rank ranges) that should + + not be instrumented. Rank ranges are formatted like "start:end" (if + + start or end are not specified, the first or last rank is assumed, + + respectively). Note that the Darshan library will still run on all + + processes of an application, this setting just controls whether specific + + ranks are capturing instrumentation data. + * - DARSHAN_RANK_INCLUDE= + - RANK_INCLUDE + - Specifies a list of comma-separated ranks (or rank ranges) that should + + be instrumented. This setting is used to override any RANK_INCLUDE + + rules. + * - DARSHAN_DXT_SMALL_IO_TRIGGER= + - DXT_SMALL_IO_TRIGGER + - Specifies a floating point percentage (i.e., ".8" would be 80%) + + indicating a threshold of small I/O operation accesses (defined as + + accesses smaller than 10 KiB), with DXT trace data being discarded for + + files that exhibit a percentage of small I/O operations less than this + + threshold. + * - DARSHAN_DXT_UNALIGNED_IO_TRIGGER= + - DXT_UNALIGNED_IO_TRIGGER + - Specifies a floating point percentage (i.e., ".8" would be 80%) + + indicating a threshold of unaligned I/O operation accesses (defined as + + accesses not aligned to the file alignment value determined by Darshan), + + with DXT trace data being discarded for files that exhibit a percentage + + of unaligned I/O operations less than this threshold. + * - N/A + - MAX_RECORDS + - Specifies the number of records to pre-allocate for each instrumentation + + module given in a comma-separated list. Most modules default to tracing + + 1024 file records per-process. + * - N/A + - NAME_EXCLUDE + - Specifies a list of comma-separated regexes that match record names that + + should not be instrumented for instrumentation modules given in a + + comma-separated module list. + * - N/A + - NAME_INCLUDE + - Specifies a list of comma-separated regexes that match record names that + + should be instrumented for instrumentation modules given in a + + comma-separated module list. This setting is used to override any + + NAME_EXCLUDE rules. + * - DXT_ENABLE_IO_TRACE=1 + - N/A + - (DEPRECATED) Setting this environment variable enables the DXT (Darshan + + eXtended Tracing) modules at runtime for all files instrumented by + + Darshan. Replaced by MODULE_ENABLE setting. + * - DARSHAN_EXCLUDE_DIRS= + - N/A + - (DEPRECATED) Specifies a list of comma-separated paths that Darshan will + + not instrument at runtime (in addition to Darshan's default exclusion + + list). Replaced by NAME_EXCLUDE setting. + * - DARSHAN_LDMS_ENABLE= + - N/A + - Switch to initialize LDMS. If not set, no runtime I/O data will be + + collected. This only needs to be exported (i.e. setting to a + + value/string is optional). + * - DARSHAN_LDMS_ENABLE_= + - N/A + - Specifies the module data that will be collected during runtime using + + LDMS streams API. These only need to be exported (i.e. setting to a + + value/string is optional). + +.. note:: + - Config file settings must be specified one per-line, with settings and + their parameters separated by any whitespace. + - Settings that take a comma-separated list of modules can use "*" as a + wildcard to represent all modules. + - Some config file settings (specifically, ``MOD_DISABLE``/``ENABLE``, + ``APP_EXCLUDE``/``INCLUDE``, ``RANK_EXCLUDE``/``INCLUDE``, + ``NAME_EXCLUDE``/``INCLUDE``, and ``MAX_RECORDS``) may be repeated multiple + times rather than providing comma-separated values, to ease readability. + - Improperly formatted config settings are ignored, with Darshan falling + back to its default configuration. + - All settings that take regular expressions as input expect them to be + formatted according to the POSIX ``regex.h`` interface -- refer to the + `regex.h manpage `_ + for more details on regex syntax. + + +Example Darshan configuration +---------------------------------------- + +An example configuration file with annotations is given below (note that +comments are allowed by prefixing a line with ``#``): + + .. code-block:: bash + + # enable DXT modules, which are off by default + MOD_ENABLE DXT_POSIX,DXT_MPIIO + + # allocate 4096 file records for POSIX and MPI-IO modules + # (darshan only allocates 1024 per-module by default) + MAX_RECORDS 4096 POSIX,MPI-IO + + # the '*' specifier can be used to apply settings for all modules + # in this case, we want all modules to ignore record names + # prefixed with "/home" (i.e., stored in our home directory), + # with a superseding inclusion for files with a ".out" suffix) + NAME_EXCLUDE ^/home * + NAME_INCLUDE .out$ * + + # bump up Darshan's default memory usage to 8 MiB + MODMEM 8 + + # avoid generating logs for git and ls binaries + APP_EXCLUDE git,ls + + # exclude instrumentation for all ranks first + RANK_EXCLUDE 0: + # then selectively re-include ranks 0-3 and 12:15 + RANK_INCLUDE 0:3 + RANK_INCLUDE 12:15 + + # only retain DXT traces for files that were accessed + # using small I/O ops 20+% of the time + DXT_SMALL_IO_TRIGGER .2 + + +This configuration could be similarly set using environment variables, though +note that both ``MAX_RECORDS`` and ``NAME_EXCLUDE``/``INCLUDE`` settings do not +have environment variable counterparts: + + .. code-block:: bash + + export DARSHAN_MOD_ENABLE="DXT_POSIX,DXT_MPIIO" + export DARSHAN_MODMEM=8 + export DARSHAN_APP_EXCLUDE="git,ls" + export DARSHAN_RANK_EXCLUDE="0:" + export DARSHAN_RANK_INCLUDE="0:3,12:15" + export DARSHAN_DXT_SMALL_IO_TRIGGER=.2 + + +Darshan installation recipes +============================================================================== + +The following recipes provide examples for prominent HPC systems. These are +intended to be used as a starting point. You will most likely have to adjust +paths and options to reflect the specifics of your system. + +Cray platforms (XE, XC, or similar) +---------------------------------------- + +This section describes how to compile and install Darshan, as well as how to +use a software module to enable and disable Darshan instrumentation on Cray +systems. + +Building and installing Darshan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Please set your environment to use the GNU programming environment before +configuring or compiling Darshan. Although Darshan can be built with a variety +of compilers, the GNU compiler is recommended because it will produce a Darshan +library that is interoperable with the widest range of compilers and linkers. +On most Cray systems you can enable the GNU programming environment with a +command similar to ``module swap PrgEnv-intel PrgEnv-gnu``. Please see your +site documentation for information about how to switch programming +environments. + +The following example shows how to configure and build Darshan on a Cray system +using the GNU programming environment. Adjust the ``--with-log-path`` and +``--prefix`` arguments to point to the desired log file path and installation +path, respectively. + + .. code-block:: bash + + module swap PrgEnv-pgi PrgEnv-gnu + ./configure \ + --with-log-path=/shared-file-system/darshan-logs \ + --prefix=/soft/darshan-3.3.0 \ + --with-jobid-env=SLURM_JOBID \ + --with-username-env=SLURM_JOB_USER \ + CC=cc + make install + module swap PrgEnv-gnu PrgEnv-pgi + + +Rationale +""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +.. note:: + The job ID is set to ``SLURM_JOBID`` for use with a Slurm based scheduler. + An additional environment variable for querying a job's username + (``SLURM_JOB_USER``) is provided as a fallback in case the default + environment variable ``LOGNAME`` is not properly set (e.g., as is the case + when using Slurm's ``--export`` option to ``srun``). The ``CC`` variable + is configured to point the standard MPI compiler. + +If instrumentation of the HDF5 library is desired, additionally load an +acceptable HDF5 module (e.g., ``module load cray-hdf5-parallel``) prior to +building and use the ``--enable-hdf5-mod`` configure argument. We additionally +recommend that you modify Darshan's generated Cray software module to include a +dependency on the HDF5 software module used -- this is necessary to ensure +Darshan library dependencies are satisfied at application link and run time. + + .. code-block:: bash + + prereq cray-hdf5-parallel + + +Note that the Darshan-enabled Cray compiler wrappers will always prefer +user-supplied HDF5 libraries over the library used to build Darshan. However, +due to ABI changes in the HDF5 library, the two HDF5 libraries used must be +compatible. Specifically, the HDF5 library versions need to be either both +greater than or equal to 1.10 or both less than 1.10. If users use an HDF5 +version that is incompatible with Darshan, either link or runtime errors will +occur and the user will have to switch HDF5 versions or unload the Darshan +module. + +Optional RDTSCP timers for Theta +""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. note:: + Darshan's default mechanism (``clock_gettime()``) for retrieving timing + information may introduce more overhead than expected for statically linked + executables on some platforms. The Theta system at the ALCF (as of July + 2021) is a notable example. It uses static linking by default (which + prevents the use of the standard vDSO optimization for ``clock_gettime()`` + calls), and it's CPU architecture exhibits relatively high system call + overhead. For Theta and other similar platforms you can explicitly request + that Darshan use the ``RDTSCP`` instruction in place of ``clock_gettime()`` + for timing purposes. ``RDTSCP`` is a non-portable, Intel-specific + instruction. It must be enabled explicitly at configure time, and the base + clock frequency of the compute node CPU must be specified. + + This mechanism can be activated on Theta by adding the + ``--enable-rdtscp=1300000000`` to the configure command line (the KNL CPUs + on Theta have a base frequency of 1.3 GHz). + + Note that timer overhead is unlikely to be a factor in overall performance + unless the application has an edge case workload with frequent sequential + I/O operations, such as small I/O accesses to cached data on a single + process. + +As in any Darshan installation, the darshan-mk-log-dirs.pl script can then be +used to create the appropriate directory hierarchy for storing Darshan log +files in the ``--with-log-path`` directory. + +Note that Darshan is not currently capable of detecting the stripe size (and +therefore the Darshan FILE_ALIGNMENT value) on Lustre file systems. If a +Lustre file system is detected, then Darshan assumes an optimal file alignment +of 1 MiB. + +Enabling Darshan instrumentation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Darshan will automatically install example software module files in the +following locations (depending on how you specified the --prefix option in the +previous section): + + .. code-block:: bash + + /soft/darshan-3.3.0/share/craype-1.x/modulefiles/darshan + /soft/darshan-3.3.0/share/craype-2.x/modulefiles/darshan + + +Select the one that is appropriate for your Cray programming environment (see +the version number of the craype module in ``module list``). + +If you are using the Cray Programming Environment version 1.x, then you must +modify the corresponding modulefile before using it. Please see the comments +at the end of the file and choose an environment variable method that is +appropriate for your system. If this is not done, then the compiler may fail +to link some applications when the Darshan module is loaded. + +If you are using the Cray Programming Environment version 2.x then you can +likely use the modulefile as is. Note that it pulls most of its configuration +from the lib/pkgconfig/darshan-runtime.pc file installed with Darshan. + +The modulefile that you select can be copied to a system location, or the +install location can be added to your local module path with the following +command: + + .. code-block:: bash + + module use /soft/darshan-3.3.0/share/craype-/modulefiles + + +From this point, Darshan instrumentation can be enabled for all future +application compilations by running "module load darshan". + +Linux clusters using MPICH +---------------------------------------- + +Most MPICH installations produce dynamic executables by default. To configure +Darshan in this environment you can use the following example. We recommend +using mpicc with GNU compilers to compile Darshan. + + .. code-block:: bash + + ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc + + +The ``darshan-gen-*`` scripts described earlier in this document can be used to +create variants of the standard mpicc/mpicxx/mpif77/mpif90 scripts that are +Darshan enabled. These scripts will work correctly for both dynamic and +statically linked executables. + +Linux clusters using Intel MPI +---------------------------------------- + +Most Intel MPI installations produce dynamic executables by default. To +configure Darshan in this environment you can use the following example: + +.. code-block:: bash + + ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc + + +Rationale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + There is nothing unusual in this configuration except that you should use + the underlying GNU compilers rather than the Intel ICC compilers to compile + Darshan itself. + +You can enable Darshan instrumentation at compile time by adding +``darshan-config --dyn-ld-flags`` options to your linker command line. + +Alternatively you can use ``LD_PRELOAD`` runtime instrumentation method to +instrument executables that have already been compiled. + +Linux clusters using Open MPI +---------------------------------------- + +Follow the generic instructions provided at the top of this document for +compilation, and make sure that the ``CC`` used for compilation is based on a +GNU compiler. + +You can enable Darshan instrumentation at compile time by adding +``darshan-config --dyn-ld-flags`` options to your linker command line. + +Alternatively you can use ``LD_PRELOAD`` runtime instrumentation method to +instrument executables that have already been compiled. + +Debugging +============================================================================== + +No log file +---------------------------------------- + +In cases where Darshan is not generating a log file for an application, some +common things to check are: + +* Make sure you are looking in the correct place for logs. Confirm the + location with the ``darshan-config --log-path`` command. + +* Check stderr to ensure Darshan isn't indicating any internal errors (e.g., + invalid log file path) + +For statically linked executables: + +* Ensure that Darshan symbols are present in the underlying executable by + running ``nm`` on it: + + .. code-block:: bash + + > nm test | grep darshan + 0000000000772260 b darshan_core + 0000000000404440 t darshan_core_cleanup + 00000000004049b0 T darshan_core_initialize + 000000000076b660 d darshan_core_mutex + 00000000004070a0 T darshan_core_register_module + +For dynamically linked executables: + +* Ensure that the Darshan library is present in the list of shared libraries + to be used by the application, and that it appears before the MPI library: + + .. code-block:: bash + + > ldd mpi-io-test + linux-vdso.so.1 (0x00007ffd83925000) + libdarshan.so => /home/carns/working/install/lib/libdarshan.so (0x00007f0f4a7a6000) + libmpi.so.12 => /home/carns/working/src/spack/opt/spack/linux-ubuntu19.10-skylake/gcc-9.2.1/mpich-3.3.2-h3dybprufq7i5kt4hcyfoyihnrnbaogk/lib/libmpi.so.12 (0x00007f0f4a44f000) + libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f0f4a241000) + ... + + +General: + +* Ensure that the linker is correctly linking in Darshan's runtime libraries: + + - A common mistake is to explicitly link in the underlying MPI libraries + (e.g., ``-lmpich`` or ``-lmpichf90``) in the link command, which can + interfere with Darshan's instrumentation + + * These libraries are usually linked in automatically by the compiler + + * MPICH's ``mpicc`` compiler's ``-show`` flag can be used to examine the + invoked link command, for instance + + - The linker's ``-y`` option can be used to verify that Darshan is properly + intercepting MPI_Init function (e.g. by setting + ``CFLAGS='-Wl,-yMPI_Init'``), which it uses to initialize its runtime + structures + + .. code-block:: bash + + /usr/common/software/darshan/3.0.0-pre3/lib/libdarshan.a(darshan-core-init-finalize.o): definition of MPI_Init + diff --git a/darshan-test/RELEASE-CHECKLIST.txt b/darshan-test/RELEASE-CHECKLIST.txt index 86fc62cbc..e2cdc209b 100644 --- a/darshan-test/RELEASE-CHECKLIST.txt +++ b/darshan-test/RELEASE-CHECKLIST.txt @@ -45,6 +45,9 @@ Notes on how to release a new version of Darshan 12) generate web documentation from asciidoc by running make in darshan-util/doc/ and darshan-runtime/doc/ directories - upload .html files to CELS server (/nfs/pub_html/gce/projects/darshan/docs) + - This step is only applicable to release of 3.4.7 and prior. Documents of + later releases have been migrated to readthedocs.io, which are + automatically generated every time a new commit is pushed. 13) update download page on darshan web site: www.mcs.anl.gov/darshan/download 14) update darshan-runtime and darshan-util Spack packages with new release - consider whether new variants should be exposed based on new features diff --git a/darshan-util/doc/darshan-util.rst b/darshan-util/doc/darshan-util.rst new file mode 100644 index 000000000..2ed0bc347 --- /dev/null +++ b/darshan-util/doc/darshan-util.rst @@ -0,0 +1,1443 @@ +################################### +Darshan-util installation and usage +################################### + +********************************** +Introduction +********************************** + +This document describes darshan-util, a collection of tools for parsing and +summarizing log files produced by Darshan instrumentation. The darshan-util +package can be installed and used on any system regardless of where the logs +were originally generated. Darshan log files are platform-independent. + +More information about Darshan can be found at the +`Darshan web site `_. + +********************************** +Requirements +********************************** + +Darshan-util has only been tested in Linux environments, but will likely +work in other Unix-like environments as well. + +**Hard requirements** + +* C compiler +* zlib development headers and library (zlib-dev or similar) + +**Optional requirements** + +* libbz2 development headers and library (libbz2-dev or similar) +* Perl +* pdflatex +* gnuplot 4.2 or later +* epstopdf + +********************************** +Compilation and installation +********************************** + +**Configure and build example** + +.. code-block:: bash + + tar -xvzf darshan-.tar.gz + cd darshan-/ + ./prepare.sh + cd darshan-util/ + ./configure + make + make install + +The darshan-util package is intended to be used on a login node or workstation. +For most use cases this means that you should either leave ``CC`` to its +default setting or specify a local compiler. This is in contrast to the +darshan-runtime documentation, which suggests setting ``CC`` to mpicc because +the runtime library will be used in the compute node environment. + +You can specify ``--prefix`` to install darshan-util in a specific location +(such as in your home directory for non-root installations). See ``./configure +--help`` for additional optional arguments, including how to specify +alternative paths for zlib and libbz2 development libraries. darshan-util also +supports VPATH or "out-of-tree" builds if you prefer that method of +compilation. + +The ``--enable-shared`` argument to configure can be used to enable compilation +of a shared version of the darshan-util library. + +The ``--enable-apmpi-mod`` and ``--enable-apxc-mod`` configure arguments must +be specified to build darshan-util with support for AutoPerf APMPI and APXC +modules, respectively. + +.. note:: + AutoPerf log analysis code is provided as Git submodules to Darshan's main + repository, so if building Darshan source that has been cloned from Git, it + is necessary to first retrieve the AutoPerf submodules by running the + following command: + + .. code-block:: bash + + git submodule update --init + + +********************************** +Analyzing log files +********************************** + +Each time a darshan-instrumented application is executed, it will generate a +single log file summarizing the I/O activity from that application. See the +darshan-runtime documentation for more details, but the log file for a given +application will likely be found in a centralized directory, with the path and +log file name in the following format: + +.. code-block:: bash + + ///_____.darshan + + +This is a binary format file that summarizes I/O activity. As of version 2.0.0 +of Darshan, this file is portable and does not have to be analyzed on the same +system that executed the job. Also, note that Darshan logs generated with +Darshan versions preceding version 3.0 will have the extension ``darshan.gz`` +(or ``darshan.bz2`` if compressed using bzip2 format). These logs are not +compatible with Darshan 3.0 utilities, and thus must be analyzed using an +appropriate version (2.x) of the darshan-util package. + +darshan-job-summary.pl +======================================= + +You can generate a graphical summary of the I/O activity for a job by using the +``darshan-job-summary.pl`` graphical summary tool as in the following example: + +.. code-block:: bash + + darshan-job-summary.pl carns_my-app_id114525_7-27-58921_19.darshan.gz + +This utility requires Perl, pdflatex, epstopdf, and gnuplot in order to +generate its summary. By default, the output is written to a multi-page pdf +file based on the name of the input file (in this case it would produce a +``carns_my-app_id114525_7-27-58921_19.pdf`` output file). You can also +manually specify the name of the output file using the ``--output`` argument. + +An example of the output produced by ``darshan-job-summary.pl`` can be found +`HERE `_. + +.. note:: + The darshan-job-summary tool depends on a few LaTeX packages that may not be + available by default on all systems, including: lastpage, subfigure, and + threeparttable. These packages can be found and installed using your + system's package manager. For instance, the packages can be installed on + Debian or Ubuntu systems as follows: ``apt-get install texlive-latex-extra`` + +darshan-summary-per-file.sh +======================================= + +This utility is similar to darshan-job-summary.pl, except that it produces a +separate pdf summary for every file accessed by an application. It can be +executed as follows: + +.. code-block:: bash + + darshan-summary-per-file.sh carns_my-app_id114525_7-27-58921_19.darshan.gz output-dir + +The second argument is the name of a directory (to be created) that will +contain the collection of pdf files. Note that this utility probably is not +appropriate if your application opens a large number of files. + +You can produce a summary for a specific file of interest with the following +commands: + +.. code-block:: bash + + darshan-convert --file HASH carns_my-app_id114525_7-27-58921_19.darshan.gz interesting_file.darshan.gz + darshan-job-summary.pl interesting_file.darshan.gz + +The "HASH" argument is the hash of a file name as reported by darshan-parser. +The ``interesting_file.darshan.gz`` file produced by darshan-convert is like a +normal Darshan log file, but it will only contain instrumentation for the +specified file. + +darshan-parser +======================================= + +You can use the ``darshan-parser`` command line utility to obtain a complete, +human-readable, text-format dump of all information contained in a log file. +The following example converts the contents of the log file into a fully +expanded text file: + +.. code-block:: bash + + darshan-parser carns_my-app_id114525_7-27-58921_19.darshan.gz > ~/job-characterization.txt + +The format of this output is described in the following section. + +Guide to darshan-parser output +======================================= + +The beginning of the output from darshan-parser displays a summary of overall +information about the job. Additional job-level summary information can also be +produced using the ``--perf``, ``--file``, or ``--total`` command line options. +See Section :ref:`Sec Additional summary output` for more information about +those options. + +The following table defines the meaning of each line in the default header +section of the output: + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - output line + - description + * - "# darshan log version" + - internal version number of the Darshan log file + * - "# exe" + - name of the executable that generated the log file + * - "# uid" + - user id that the job ran as + * - "# jobid" + - job id from the scheduler + * - "# start_time" + - start time of the job, in seconds since the epoch + * - "# start_time_asci" + - start time of the job, in human readable format + * - "# end_time" + - end time of the job, in seconds since the epoch + * - "# end_time_asci" + - end time of the job, in human readable format + * - "# nprocs" + - number of MPI processes + * - "# run time" + - run time of the job in seconds + +Log file region sizes +------------------------------------- + +The next portion of the parser output displays the size of each region +contained within the given log file. Each log file will contain the following +regions: + +* header - constant-sized uncompressed header providing data on how to properly + access the log +* job data - job-level metadata (e.g., start/end time and exe name) for the log +* record table - a table mapping Darshan record identifiers to full file name paths +* module data - each module (e.g., POSIX, MPI-IO, etc.) stores their I/O + characterization data in distinct regions of the log + +All regions of the log file are compressed (in libz or bzip2 format), except +the header. + +Table of mounted file systems +------------------------------------- + +The next portion of the output shows a table of all general purpose file +systems that were mounted while the job was running. Each line uses the +following format: + +.. code-block:: bash + + + +Format of I/O characterization fields +------------------------------------- + +The remainder of the output will show characteristics for each file that was +opened by the application. Each line uses the following format: + +.. code-block:: bash + + + +The ```` column specifies the module responsible for recording this +piece of I/O characterization data. The ```` column indicates the rank of +the process that opened the file. A rank value of -1 indicates that all +processes opened the same file. In that case, the value of the counter +represents an aggregate across all processes. The ```` is a 64 bit +hash of the file path/name that was opened. It is used as a way to uniquely +differentiate each file. The ```` is the name of the statistic +that the line is reporting, while the ```` is the value of that +statistic. A value of -1 indicates that Darshan was unable to collect +statistics for that particular counter, and the value should be ignored. The +```` field shows the complete file name the record corresponds to. +The ```` is the mount point of the file system that this file +belongs to and ```` is the type of that file system. + +I/O characterization fields +------------------------------------- + +The following tables show a list of integer statistics that are available for +each of Darshan's current instrumentation modules, along with a description of +each. Unless otherwise noted, counters include all variants of the call in +question, such as ``read()``, ``pread()``, and ``readv()`` for POSIX_READS. + +**Table 1. POSIX module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - POSIX_OPENS + - Count of how many times the file was opened (INCLUDING ``fileno`` and ``dup`` operations) + * - POSIX_FILENOS + - Count of POSIX fileno operations + * - POSIX_DUPS + - Count of POSIX dup operations + * - POSIX_READS + - Count of POSIX read operations + * - POSIX_WRITES + - Count of POSIX write operations + * - POSIX_SEEKS + - Count of POSIX seek operations + * - POSIX_STATS + - Count of POSIX stat operations + * - POSIX_MMAPS + - Count of POSIX mmap operations + * - POSIX_FSYNCS + - Count of POSIX fsync operations + * - POSIX_FDSYNCS + - Count of POSIX fdatasync operations + * - POSIX_RENAME_SOURCES + - Number of times this file was the source of a rename operation + * - POSIX_RENAME_TARGETS + - Number of times this file was the target of a rename operation + * - POSIX_RENAMED_FROM + - If this file was a rename target, the Darshan record ID of the first rename source + * - POSIX_MODE + - Mode that the file was last opened in + * - POSIX_BYTES_READ + - Total number of bytes that were read from the file + * - POSIX_BYTES_WRITTEN + - Total number of bytes written to the file + * - POSIX_MAX_BYTE_READ + - Highest offset in the file that was read + * - POSIX_MAX_BYTE_WRITTEN + - Highest offset in the file that was written + * - POSIX_CONSEC_READS + - Number of consecutive reads (that were immediately adjacent to the previous access) + * - POSIX_CONSEC_WRITES + - Number of consecutive writes (that were immediately adjacent to the previous access) + * - POSIX_SEQ_READS + - Number of sequential reads (at a higher offset than where the previous access left off) + * - POSIX_SEQ_WRITES + - Number of sequential writes (at a higher offset than where the previous access left off) + * - POSIX_RW_SWITCHES + - Number of times that access toggled between read and write in consecutive operations + * - POSIX_MEM_NOT_ALIGNED + - Number of times that a read or write was not aligned in memory + * - POSIX_MEM_ALIGNMENT + - Memory alignment value (chosen at compile time) + * - POSIX_FILE_NOT_ALIGNED + - Number of times that a read or write was not aligned in file + * - POSIX_FILE_ALIGNMENT + - File alignment value. This value is detected at runtime on most file systems. On Lustre, however, Darshan assumes a default value of 1 MiB for optimal file alignment. + * - POSIX_MAX_READ_TIME_SIZE + - Size of the slowest POSIX read operation + * - POSIX_MAX_WRITE_TIME_SIZE + - Size of the slowest POSIX write operation + * - POSIX_SIZE_READ_* + - Histogram of read access sizes at POSIX level + * - POSIX_SIZE_WRITE_* + - Histogram of write access sizes at POSIX level + * - POSIX_STRIDE[1-4]_STRIDE + - Size of 4 most common stride patterns + * - POSIX_STRIDE[1-4]_COUNT + - Count of 4 most common stride patterns + * - POSIX_ACCESS[1-4]_ACCESS + - 4 most common POSIX access sizes + * - POSIX_ACCESS[1-4]_COUNT + - Count of 4 most common POSIX access sizes + * - POSIX_FASTEST_RANK + - The MPI rank with smallest time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with smallest time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_SLOWEST_RANK + - The MPI rank with largest time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_F_*_START_TIMESTAMP + - Timestamp that the first POSIX file open/read/write/close operation began + * - POSIX_F_*_END_TIMESTAMP + - Timestamp that the last POSIX file open/read/write/close operation ended + * - POSIX_F_READ_TIME + - Cumulative time spent reading at the POSIX level + * - POSIX_F_WRITE_TIME + - Cumulative time spent in write, fsync, and fdatasync at the POSIX level + * - POSIX_F_META_TIME + - Cumulative time spent in open, close, stat, and seek at the POSIX level + * - POSIX_F_MAX_READ_TIME + - Duration of the slowest individual POSIX read operation + * - POSIX_F_MAX_WRITE_TIME + - Duration of the slowest individual POSIX write operation + * - POSIX_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest amount of time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest amount of time spent in POSIX I/O (cumulative read, write, and meta times) + * - POSIX_F_VARIANCE_RANK_TIME + - The population variance for POSIX I/O time of all the ranks + * - POSIX_F_VARIANCE_RANK_BYTES + - The population variance for bytes transferred of all the ranks + +**Table 2. MPI-IO module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - MPIIO_INDEP_OPENS + - Count of non-collective MPI opens + * - MPIIO_COLL_OPENS + - Count of collective MPI opens + * - MPIIO_INDEP_READS + - Count of non-collective MPI reads + * - MPIIO_INDEP_WRITES + - Count of non-collective MPI writes + * - MPIIO_COLL_READS + - Count of collective MPI reads + * - MPIIO_COLL_WRITES + - Count of collective MPI writes + * - MPIIO_SPLIT_READS + - Count of MPI split collective reads + * - MPIIO_SPLIT_WRITES + - Count of MPI split collective writes + * - MPIIO_NB_READS + - Count of MPI non-blocking reads + * - MPIIO_NB_WRITES + - Count of MPI non-blocking writes + * - MPIIO_SYNCS + - Count of MPI file syncs + * - MPIIO_HINTS + - Count of MPI file hints used + * - MPIIO_VIEWS + - Count of MPI file views used + * - MPIIO_MODE + - MPI mode that the file was last opened in + * - MPIIO_BYTES_READ + - Total number of bytes that were read from the file at MPI level + * - MPIIO_BYTES_WRITTEN + - Total number of bytes written to the file at MPI level + * - MPIIO_RW_SWITCHES + - Number of times that access toggled between read and write in consecutive MPI operations + * - MPIIO_MAX_READ_TIME_SIZE + - Size of the slowest MPI read operation + * - MPIIO_MAX_WRITE_TIME_SIZE + - Size of the slowest MPI write operation + * - MPIIO_SIZE_READ_AGG_* + - Histogram of total size of read accesses at MPI level, even if access is noncontiguous + * - MPIIO_SIZE_WRITE_AGG_* + - Histogram of total size of write accesses at MPI level, even if access is noncontiguous + * - MPIIO_ACCESS[1-4]_ACCESS + - 4 most common MPI aggregate access sizes + * - MPIIO_ACCESS[1-4]_COUNT + - Count of 4 most common MPI aggregate access sizes + * - MPIIO_FASTEST_RANK + - The MPI rank with smallest time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with smallest time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_SLOWEST_RANK + - The MPI rank with largest time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_F_*_START_TIMESTAMP + - Timestamp that the first MPIIO file open/read/write/close operation began + * - MPIIO_F_*_END_TIMESTAMP + - Timestamp that the last MPIIO file open/read/write/close operation ended + * - MPIIO_F_READ_TIME + - Cumulative time spent reading at MPI level + * - MPIIO_F_WRITE_TIME + - Cumulative time spent write and sync at MPI level + * - MPIIO_F_META_TIME + - Cumulative time spent in open and close at MPI level + * - MPIIO_F_MAX_READ_TIME + - Duration of the slowest individual MPI read operation + * - MPIIO_F_MAX_WRITE_TIME + - Duration of the slowest individual MPI write operation + * - MPIIO_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest amount of time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest amount of time spent in MPI I/O (cumulative read, write, and meta times) + * - MPIIO_F_VARIANCE_RANK_TIME + - The population variance for MPI I/O time of all the ranks + * - MPIIO_F_VARIANCE_RANK_BYTES + - The population variance for bytes transferred of all the ranks at MPI level + +**Table 3. STDIO module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - STDIO_OPENS + - Count of stdio file open operations (INCLUDING ``fdopen`` operations) + * - STDIO_FDOPENS + - Count of stdio fdopen operations + * - STDIO_READS + - Count of stdio read operations + * - STDIO_WRITES + - Count of stdio write operations + * - STDIO_SEEKS + - Count of stdio seek operations + * - STDIO_FLUSHES + - Count of stdio flush operations + * - STDIO_BYTES_WRITTEN + - Total number of bytes written to the file using stdio operations + * - STDIO_BYTES_READ + - Total number of bytes read from the file using stdio operations + * - STDIO_MAX_BYTE_READ + - Highest offset in the file that was read + * - STDIO_MAX_BYTE_WRITTEN + - Highest offset in the file that was written + * - STDIO_FASTEST_RANK + - The MPI rank with the smallest time spent in stdio operations (cumulative read, write, and meta times) + * - STDIO_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with the smallest time spent in stdio operations (cumulative read, write, and meta times) + * - STDIO_SLOWEST_RANK + - The MPI rank with the largest time spent in stdio operations (cumulative read, write, and meta times) + * - STDIO_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in stdio operations (cumulative read, write, and meta times) + * - STDIO_F_META_TIME + - Cumulative time spent in stdio open/close/seek operations + * - STDIO_F_WRITE_TIME + - Cumulative time spent in stdio write operations + * - STDIO_F_READ_TIME + - Cumulative time spent in stdio read operations + * - STDIO_F_*_START_TIMESTAMP + - Timestamp that the first stdio file open/read/write/close operation began + * - STDIO_F_*_END_TIMESTAMP + - Timestamp that the last stdio file open/read/write/close operation ended + * - STDIO_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest time spent in stdio I/O (cumulative read, write, and meta times) + * - STDIO_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest time spent in stdio I/O (cumulative read, write, and meta times) + * - STDIO_F_VARIANCE_RANK_TIME + - The population variance for stdio I/O time of all the ranks + * - STDIO_F_VARIANCE_RANK_BYTES + - The population variance for bytes transferred of all the ranks + +**Table 4. H5F module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - H5F_OPENS + - Count of H5F opens + * - H5F_FLUSHES + - Count of H5F flushes + * - H5F_USE_MPIIO + - Flag indicating whether MPI-IO is used for accessing the file + * - H5F_F_*_START_TIMESTAMP + - Timestamp that the first H5F open/close operation began + * - H5F_F_*_END_TIMESTAMP + - Timestamp that the last H5F open/close operation ended + * - H5F_F_META_TIME + - Cumulative time spent in H5F open/close/flush operations + +**Table 5. H5D module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - H5D_OPENS + - Count of H5D opens + * - H5D_READS + - Count of H5D reads + * - H5D_WRITES + - Count of H5D writes + * - H5D_FLUSHES + - Count of H5D flushes + * - H5D_BYTES_READ + - Total number of bytes read from the dataset using H5D + * - H5D_BYTES_WRITTEN + - Total number of bytes written to the dataset using H5D + * - H5D_RW_SWITCHES + - Number of times that access toggled between read and write in consecutive H5D operations + * - H5D_REGULAR_HYPERSLAB_SELECTS + - Number of H5D read/write ops with regular hyperslab selections + * - H5D_IRREGULAR_HYPERSLAB_SELECTS + - Number of H5D read/write ops with irregular hyperslab selections + * - H5D_POINT_SELECTS + - Number of read/write ops with point selections + * - H5D_MAX_READ_TIME_SIZE + - Size of the slowest H5D read operation + * - H5D_MAX_WRITE_TIME_SIZE + - Size of the slowest H5D write operation + * - H5D_SIZE_READ_AGG_* + - Histogram of total size of read accesses at H5D level + * - H5D_SIZE_WRITE_AGG_* + - Histogram of total size of write accesses at H5D level + * - H5D_ACCESS[1-4]_ACCESS + - Sizes of 4 most common H5D accesses + * - H5D_ACCESS[1-4]_LENGTH_D[1-5] + - Access lengths along last 5 dimensions (D5 is fastest changing) of 4 most common H5D accesses + * - H5D_ACCESS[1-4]_STRIDE_D[1-5] + - Access strides along last 5 dimensions (D5 is fastest changing) of 4 most common H5D accesses + * - H5D_ACCESS[1-4]_COUNT + - Count of 4 most common H5D aggregate access sizes + * - H5D_DATASPACE_NDIMS + - Number of dimensions in dataset's dataspace + * - H5D_DATASPACE_NPOINTS + - Number of points in dataset's dataspace + * - H5D_DATATYPE_SIZE + - Total size of dataset elements in bytes + * - H5D_CHUNK_SIZE_D[1-5] + - Chunk sizes in the last 5 dimensions of the dataset (D5 is the fastest changing dimension) + * - H5D_USE_MPIIO_COLLECTIVE + - Flag indicating use of MPI-IO collectives + * - H5D_USE_DEPRECATED + - Flag indicating whether deprecated create/open calls were used + * - H5D_FASTEST_RANK + - The MPI rank with smallest time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with smallest time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_SLOWEST_RANK + - The MPI rank with largest time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_F_*_START_TIMESTAMP + - Timestamp that the first H5D open/read/write/close operation began + * - H5D_F_*_END_TIMESTAMP + - Timestamp that the last H5D open/read/write/close operation ended + * - H5D_F_READ_TIME + - Cumulative time spent reading at H5D level + * - H5D_F_WRITE_TIME + - Cumulative time spent writing at H5D level + * - H5D_F_META_TIME + - Cumulative time spent in open/close/flush at H5D level + * - H5D_F_MAX_READ_TIME + - Duration of the slowest individual H5D read operation + * - H5D_F_MAX_WRITE_TIME + - Duration of the slowest individual H5D write operation + * - H5D_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest amount of time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest amount of time spent in H5D I/O (cumulative read, write, and meta times) + * - H5D_F_VARIANCE_RANK_TIME + - The population variance for H5D I/O time of all the ranks + * - H5D_F_VARIANCE_RANK_BYTES + - The population variance for bytes transferred of all the ranks at H5D level + * - H5D_FILE_REC_ID + - Darshan file record ID of the file the dataset belongs to + +**Table 6. PNETCDF_FILE module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - PNETCDF_FILE_CREATES + - PnetCDF file create operation counts + * - PNETCDF_FILE_OPENS + - PnetCDF file open operation counts + * - PNETCDF_FILE_REDEFS + - PnetCDF file re-define operation counts + * - PNETCDF_FILE_INDEP_WAITS + - PnetCDF independent file wait operation counts (for flushing non-blocking I/O) + * - PNETCDF_FILE_COLL_WAITS + - PnetCDF collective file wait operation counts (for flushing non-blocking I/O) + * - PNETCDF_FILE_SYNCS + - PnetCDF file sync operation counts + * - PNETCDF_FILE_BYTES_READ + - PnetCDF total bytes read for all file variables (includes internal library metadata I/O) + * - PNETCDF_FILE_BYTES_WRITTEN + - PnetCDF total bytes written for all file variables (includes internal library metadata I/O) + * - PNETCDF_FILE_WAIT_FAILURES + - PnetCDF file wait operation failure counts (failures indicate that variable-level counters are unreliable) + * - PNETCDF_FILE_F_*_START_TIMESTAMP + - Timestamp that the first PNETCDF file open/close/wait operation began + * - PNETCDF_FILE_F_*_END_TIMESTAMP + - Timestamp that the last PNETCDF file open/close/wait operation ended + * - PNETCDF_FILE_F_META_TIME + - Cumulative time spent in file open/close/sync/redef/enddef metadata operations + * - PNETCDF_FILE_F_WAIT_TIME + - Cumulative time spent in file wait operations (for flushing non-blocking I/O) + +**Table 7. PNETCDF_VAR module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - PNETCDF_VAR_OPENS + - PnetCDF variable define/inquire operation counts + * - PNETCDF_VAR_INDEP_READS + - PnetCDF variable independent read operation counts + * - PNETCDF_VAR_INDEP_WRITES + - PnetCDF variable independent write operation counts + * - PNETCDF_VAR_COLL_READS + - PnetCDF variable collective read operation counts + * - PNETCDF_VAR_COLL_WRITES + - PnetCDF variable collective write operation counts + * - PNETCDF_VAR_NB_READS + - PnetCDF variable nonblocking read operation counts + * - PNETCDF_VAR_NB_WRITES + - PnetCDF variable nonblocking write operation counts + * - PNETCDF_VAR_BYTES_* + - total bytes read and written at PnetCDF variable layer (not including internal library metadata I/O) + * - PNETCDF_VAR_RW_SWITCHES + - number of times access alternated between read and write + * - PNETCDF_VAR_PUT_VAR* + - number of calls to different ncmpi_put_var* APIs (var, var1, vara, vars, varm, varn, vard) + * - PNETCDF_VAR_GET_VAR* + - number of calls to different ncmpi_get_var* APIs (var, var1, vara, vars, varm, varn, vard) + * - PNETCDF_VAR_IPUT_VAR* + - number of calls to different ncmpi_iput_var* APIs (var, var1, vara, vars, varm, varn) + * - PNETCDF_VAR_IGET_VAR* + - number of calls to different ncmpi_iget_var* APIs (var, var1, vara, vars, varm, varn) + * - PNETCDF_VAR_BPUT_VAR* + - number of calls to different ncmpi_bput_var* APIs (var, var1, vara, vars, varm, varn) + * - PNETCDF_VAR_MAX_*_TIME_SIZE + - size of the slowest read and write operations + * - PNETCDF_VAR_SIZE_*_AGG_* + - histogram of PnetCDf total access sizes for read and write operations + * - PNETCDF_VAR_ACCESS*_* + - the four most common total accesses, in terms of size and length/stride (in last 5 dimensions) + * - PNETCDF_VAR_ACCESS*_COUNT + - count of the four most common total access sizes + * - PNETCDF_VAR_NDIMS + - number of dimensions in the variable + * - PNETCDF_VAR_NPOINTS + - number of points in the variable + * - PNETCDF_VAR_DATATYPE_SIZE + - size of each variable element + * - PNETCDF_VAR_*_RANK + - rank of the processes that were the fastest and slowest at I/O (for shared datasets) + * - PNETCDF_VAR_*_RANK_BYTES + - total bytes transferred at PnetCDF layer by the fastest and slowest ranks (for shared datasets) + * - PNETCDF_VAR_F_*_START_TIMESTAMP + - timestamp of first PnetCDF variable open/read/write/close + * - PNETCDF_VAR_F_*_END_TIMESTAMP + - timestamp of last PnetCDF variable open/read/write/close + * - PNETCDF_VAR_F_READ/WRITE/META_TIME + - cumulative time spent in PnetCDF read, write, or metadata operations + * - PNETCDF_VAR_F_MAX_*_TIME + - duration of the slowest PnetCDF read and write operations + * - PNETCDF_VAR_F_*_RANK_TIME + - fastest and slowest I/O time for a single rank (for shared datasets) + * - PNETCDF_VAR_F_VARIANCE_RANK_* + - variance of total I/O time and bytes moved for all ranks (for shared datasets) + * - PNETCDF_VAR_FILE_REC_ID + - Darshan file record ID of the file the variable belongs to + +**Table 8. Lustre module (if enabled, for Lustre file systems)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - LUSTRE_NUM_COMPONENTS + - number of instrumented components in the Lustre layout + * - LUSTRE_NUM_STRIPES + - number of active stripes in the Lustre layout components + * - LUSTRE_COMP*_STRIPE_SIZE + - stripe size for this file layout component in bytes + * - LUSTRE_COMP*_STRIPE_COUNT + - number of OSTs over which the file layout component is striped + * - LUSTRE_COMP*_STRIPE_PATTERN + - pattern (e.g., raid0, mdt, overstriped) for this file layout component + * - LUSTRE_COMP*_FLAGS + - captured flags (e.g. init, prefwr, stale) for this file layout component + * - LUSTRE_COMP*_EXT_START + - starting file extent for this file layout component + * - LUSTRE_COMP*_EXT_END + - ending file extent for this file layout component (-1 means EOF) + * - LUSTRE_COMP*_MIRROR_ID + - mirror ID for this file layout component, if mirrors are enabled + * - LUSTRE_COMP*_POOL_NAME + - Lustre OST pool used for this file layout component + * - LUSTRE_COMP*\_OST_ID_* + - indices of OSTs over which this file layout component is striped + +**Table 9. DFS (DAOS File System) module (if enabled)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - DFS_OPENS + - DFS file open operation counts + * - DFS_GLOBAL_OPENS + - DFS file global open operation (i.e., ``dfs_obj_global2local()``) counts + * - DFS_LOOKUPS + - DFS file lookup operation counts + * - DFS_DUPS + - DFS file dup operation counts + * - DFS_READS + - DFS file read operation counts + * - DFS_READXS + - DFS non-contiguous file read operation counts + * - DFS_WRITES + - DFS file write operation counts + * - DFS_WRITEXS + - DFS non-contiguous file write operation counts + * - DFS_NB_READS + - DFS non-blocking file read operation counts (included in read/readx counts) + * - DFS_NB_WRITES + - DFS non-blocking file write operation counts (included in write/writex counts) + * - DFS_GET_SIZES + - DFS file get size operation counts + * - DFS_PUNCHES + - DFS file punch operation counts + * - DFS_REMOVES + - DFS file remove operation counts + * - DFS_STATS + - DFS file stat operation counts + * - DFS_BYTES_READ + - Total number of bytes that were read from the DFS file + * - DFS_BYTES_WRITTEN + - Total number of bytes that were written to the DFS file + * - DFS_RW_SWITCHES + - Number of times that access toggled between read and write in consecutive operations + * - DFS_MAX_READ_TIME_SIZE + - Size of the slowest DFS read operation + * - DFS_MAX_WRITE_TIME_SIZE + - Size of the slowest DFS write operation + * - DFS_SIZE_READ_* + - Histogram of read access sizes at DFS level + * - DFS_SIZE_WRITE_* + - Histogram of write access sizes at DFS level + * - DFS_ACCESS[1-4]_ACCESS + - 4 most common DFS access sizes + * - DFS_ACCESS[1-4]_COUNT + - Count of 4 most common DFS access sizes + * - DFS_CHUNK_SIZE + - DFS file chunk size + * - DFS_FASTEST_RANK + - The MPI rank with smallest time spent in DFS I/O (cumulative read, write, and meta times) + * - DFS_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with smallest time spent in DFS I/O (cumulative read, write, and meta times) + * - DFS_SLOWEST_RANK + - The MPI rank with largest time spent in DFS I/O (cumulative read, write, and meta times) + * - DFS_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in DFS I/O (cumulative read, write, and meta times) + * - DFS_F_*_START_TIMESTAMP + - Timestamp that the first DFS file open/read/write/close operation began + * - DFS_F_*_END_TIMESTAMP + - Timestamp that the last DFS file open/read/write/close operation ended + * - DFS_F_READ_TIME + - Cumulative time spent reading at the DFS level + * - DFS_F_WRITE_TIME + - Cumulative time spent writing at the DFS level + * - DFS_F_META_TIME + - Cumulative time spent in open, dup, lookup, get size, punch, release, remove, and stat at the DFS level + * - DFS_F_MAX_READ_TIME + - Duration of the slowest individual DFS read operation + * - DFS_F_MAX_WRITE_TIME + - Duration of the slowest individual DFS write operation + * - DFS_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest amount of time spent in DFS I/O (cumulative read, write, and meta times) + * - DFS_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest amount of time spent in DFS I/O (cumulative read, write, and meta times) + +**Table 10. DAOS module (if enabled)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - DAOS_OBJ_OPENS + - DAOS object open operation counts + * - DAOS_OBJ_FETCHES + - DAOS object fetch operation counts + * - DAOS_OBJ_UPDATES + - DAOS object update operation counts + * - DAOS_OBJ_PUNCHES + - DAOS object punch operation counts + * - DAOS_OBJ_DKEY_PUNCHES + - DAOS object dkey punch operation counts + * - DAOS_OBJ_AKEY_PUNCHES + - DAOS object akey punch operation counts + * - DAOS_OBJ_DKEY_LISTS + - DAOS object dkey list operation counts + * - DAOS_OBJ_AKEY_LISTS + - DAOS object akey list operation counts + * - DAOS_OBJ_RECX_LISTS + - DAOS object recx list operation counts + * - DAOS_ARRAY_OPENS + - DAOS array object open operation counts + * - DAOS_ARRAY_READS + - DAOS array object read operation counts + * - DAOS_ARRAY_WRITES + - DAOS array object write operation counts + * - DAOS_ARRAY_GET_SIZES + - DAOS array object get size operation counts + * - DAOS_ARRAY_SET_SIZES + - DAOS array object set size operation counts + * - DAOS_ARRAY_STATS + - DAOS array object stat operation counts + * - DAOS_ARRAY_PUNCHES + - DAOS array object punch operation counts + * - DAOS_ARRAY_DESTROYS + - DAOS array object destroy operation counts + * - DAOS_KV_OPENS + - DAOS kv object open operation counts + * - DAOS_KV_GETS + - DAOS kv object get operation counts + * - DAOS_KV_PUTS + - DAOS kv object put operation counts + * - DAOS_KV_REMOVES + - DAOS kv object remove operation counts + * - DAOS_KV_LISTS + - DAOS kv object list operation counts + * - DAOS_KV_DESTROYS + - DAOS kv object destroy operation counts + * - DAOS_NB_OPS + - DAOS non-blocking I/O operations (includes reads, writes, and metadata operations) + * - DAOS_BYTES_READ + - Total number of bytes that were read from the DAOS object + * - DAOS_BYTES_WRITTEN + - Total number of bytes that were written to the DAOS object + * - DAOS_RW_SWITCHES + - Number of times that access toggled between read and write in consecutive operations + * - DAOS_MAX_READ_TIME_SIZE + - Size of the slowest DAOS read operation + * - DAOS_MAX_WRITE_TIME_SIZE + - Size of the slowest DAOS write operation + * - DAOS_SIZE_READ_* + - Histogram of read access sizes at DAOS level + * - DAOS_SIZE_WRITE_* + - Histogram of write access sizes at DAOS level + * - DAOS_ACCESS[1-4]_ACCESS + - 4 most common DAOS access sizes + * - DAOS_ACCESS[1-4]_COUNT + - Count of 4 most common DAOS access sizes + * - DAOS_OBJ_OTYPE + - DAOS object otype ID + * - DAOS_ARRAY_CELL_SIZE + - For DAOS array objects, the array cell size + * - DAOS_ARRAY_CHUNK_SIZE + - For DAOS array objects, the array chunk size + * - DAOS_FASTEST_RANK + - The MPI rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times) + * - DAOS_FASTEST_RANK_BYTES + - The number of bytes transferred by the rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times) + * - DAOS_SLOWEST_RANK + - The MPI rank with largest time spent in DAOS I/O (cumulative read, write, and meta times) + * - DAOS_SLOWEST_RANK_BYTES + - The number of bytes transferred by the rank with the largest time spent in DAOS I/O (cumulative read, write, and meta times) + * - DAOS_F_*_START_TIMESTAMP + - Timestamp that the first DAOS object open/read/write/close operation began + * - DAOS_F_*_END_TIMESTAMP + - Timestamp that the last DAOS object open/read/write/close operation ended + * - DAOS_F_READ_TIME + - Cumulative time spent reading at the DAOS level + * - DAOS_F_WRITE_TIME + - Cumulative time spent writing at the DAOS level + * - DAOS_F_META_TIME + - Cumulative time spent in open, punch, list, get size, set size, stat, destroy, and remove at the DAOS level + * - DAOS_F_MAX_READ_TIME + - Duration of the slowest individual DAOS read operation + * - DAOS_F_MAX_WRITE_TIME + - Duration of the slowest individual DAOS write operation + * - DAOS_F_FASTEST_RANK_TIME + - The time of the rank which had the smallest amount of time spent in DAOS I/O (cumulative read, write, and meta times) + * - DAOS_F_SLOWEST_RANK_TIME + - The time of the rank which had the largest amount of time spent in DAOS I/O (cumulative read, write, and meta times) + + +Heatmap fields +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each heatmap module record reports a histogram of the number of bytes read or +written, per process, over time, for a given I/O API. It provides a synopsis +of I/O intensity regardless of how many files are accessed. Heatmap records +are never aggregated across ranks. + +The file name field is used to indicate the API that produced the histogram +record. For example, "heatmap:POSIX" indicates that the record is reporting +I/O traffic that passed through the POSIX module. + +The number of BIN fields present in each record may vary depending on the job's +execution time and the configurable maximum number of bins chosen at execution +time. + +**Table 11. HEATMAP module** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - HEATMAP_F_BIN_WIDTH_SECONDS + - time duration of each heatmap bin + * - HEATMAP_READ\|WRITE_BIN_* + - number of bytes read or written within specified heatmap bin + +Additional modules +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Table 12. APXC module header record (if enabled, for Cray XC systems)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - APXC_GROUPS + - total number of groups for the job + * - APXC_CHASSIS + - total number of chassis for the job + * - APXC_BLADES + - total number of blades for the job + * - APXC_MEMORY_MODE + - Intel Xeon memory mode + * - APXC_CLUSTER_MODE + - Intel Xeon NUMA configuration + * - APXC_MEMORY_MODE_CONSISTENT + - Intel Xeon memory mode consistent across all nodes + * - APXC_CLUSTER_MODE_CONSISTENT + - Intel Xeon cluster mode consistent across all nodes + +**Table 13. APXC module per-router record (if enabled, for Cray XC systems)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - APXC_GROUP + - group this router is on + * - APXC_CHASSIS + - chassis this router is on + * - APXC_BLADE + - blade this router is on + * - APXC_NODE + - node connected to this router + * - APXC_AR_RTR_x_y_INQ_PRF_INCOMING_FLIT_VC[0-7] + - flits on VCs of x y tile for router-router ports + * - APXC_AR_RTR_x_y_INQ_PRF_ROWBUS_STALL_CNT + - stalls on x y tile for router-router ports + * - APXC_AR_RTR_PT_x_y_INQ_PRF_INCOMING_FLIT_VC[0,4] + - flits on VCs of x y tile for router-nic ports + * - APXC_AR_RTR_PT_x_y_INQ_PRF_REQ_ROWBUS_STALL_CNT + - stalls on x y tile for router-nic ports + +**Table 14. APMPI module header record (if enabled, for MPI applications)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - MPI_TOTAL_COMM_TIME_VARIANCE + - variance in total communication time across all the processes + * - MPI_TOTAL_COMM_SYNC_TIME_VARIANCE + - variance in total sync time across all the processes, if enabled + +**Table 15. APMPI module per-process record (if enabled, for MPI applications)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - MPI_PROCESSOR_NAME + - name of the processor used by the MPI process + * - MPI_*_CALL_COUNT + - total call count for an MPI op + * - MPI_*_TOTAL_BYTES + - total bytes (i.e., cumulative across all calls) moved with an MPI op + * - MPI_*\_MSG_SIZE_AGG_* + - histogram of total bytes moved for all the calls of an MPI op + * - MPI_*_TOTAL_TIME + - total time (i.e, cumulative across all calls) of an MPI op + * - MPI_*_MIN_TIME + - minimum time across all calls of an MPI op + * - MPI_*_MAX_TIME + - maximum time across all calls of an MPI op + * - MPI_*_TOTAL_SYNC_TIME + - total sync time (cumulative across all calls of an op) of an MPI op, if enabled + * - MPI_TOTAL_COMM_TIME + - total communication (MPI) time of a process across all the MPI ops + * - MPI_TOTAL_COMM_SYNC_TIME + - total sync time of a process across all the MPI ops, if enabled + + +**Table 16. BG/Q module (if enabled on BG/Q systems)** + +.. list-table:: + :header-rows: 1 + :widths: 30, 70 + :class: longtable + :align: left + + * - counter name + - description + * - BGQ_CSJOBID + - Control system job ID + * - BGQ_NNODES + - Total number of BG/Q compute nodes + * - BGQ_RANKSPERNODE + - Number of MPI ranks per compute node + * - BGQ_DDRPERNODE + - Size of compute node DDR in MiB + * - BGQ_INODES + - Total number of BG/Q I/O nodes + * - BGQ_ANODES + - Dimension of A torus + * - BGQ_BNODES + - Dimension of B torus + * - BGQ_CNODES + - Dimension of C torus + * - BGQ_DNODES + - Dimension of D torus + * - BGQ_ENODES + - Dimension of E torus + * - BGQ_TORUSENABLED + - Bitfield indicating enabled torus dimensions + * - BGQ_F_TIMESTAMP + - Timestamp of when BG/Q data was collected + +.. _Sec Additional summary output: + +Additional summary output +------------------------------------- + +The following sections describe additional parser options that provide +summary I/O characterization data for the given log. + +.. note:: + These options are currently only supported by the POSIX, MPI-IO, and stdio + modules. + +Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Job performance information can be generated using the ``--perf`` command-line +option. + +**Example output** + +.. code-block:: + + # performance + # ----------- + # total_bytes: 134217728 + # + # I/O timing for unique files (seconds): + # ........................... + # unique files: slowest_rank_io_time: 0.000000 + # unique files: slowest_rank_meta_only_time: 0.000000 + # unique files: slowest_rank: 0 + # + # I/O timing for shared files (seconds): + # (multiple estimates shown; time_by_slowest is generally the most accurate) + # ........................... + # shared files: time_by_cumul_io_only: 0.042264 + # shared files: time_by_cumul_meta_only: 0.000325 + # shared files: time_by_open: 0.064986 + # shared files: time_by_open_lastio: 0.064966 + # shared files: time_by_slowest: 0.057998 + # + # Aggregate performance, including both shared and unique files (MiB/s): + # (multiple estimates shown; agg_perf_by_slowest is generally the most + # accurate) + # ........................... + # agg_perf_by_cumul: 3028.570529 + # agg_perf_by_open: 1969.648064 + # agg_perf_by_open_lastio: 1970.255248 + # agg_perf_by_slowest: 2206.983935 + +The ``total_bytes`` line shows the total number of bytes transferred +(read/written) by the job. That is followed by three sections: + +**I/O timing for unique files** + +This section reports information about any files that were **not** opened by +every rank in the job. This includes independent files (opened by 1 process) +and partially shared files (opened by a proper subset of the job's processes). +The I/O time for this category of file access is reported based on the +**slowest** rank of all processes that performed this type of file access. + +* unique files: slowest_rank_io_time: total I/O time for unique files + (including both metadata + data transfer time) +* unique files: slowest_rank_meta_only_time: metadata time for unique files +* unique files: slowest_rank: the rank of the slowest process + +**I/O timing for shared files** + +This section reports information about files that were globally shared (i.e. +opened by every rank in the job). This section estimates performance for +globally shared files using four different methods. The ``time_by_slowest`` is +generally the most accurate, but it may not available in some older Darshan log +files. + +* shared files: ``time_by_cumul_*``: adds the cumulative time across all + processes and divides by the number of processes (inaccurate when there is + high variance among processes). + + + shared files: ``time_by_cumul_io_only``: include metadata AND data transfer + time for global shared files + + shared files: ``time_by_cumul_meta_only``: metadata time for global shared + files +* shared files: ``time_by_open``: difference between timestamp of open and + close (inaccurate if file is left open without I/O activity) +* shared files: ``time_by_open_lastio``: difference between timestamp of open + and the timestamp of last I/O (similar to above but fixes case where file is + left open after I/O is complete) +* shared files: ``time_by_slowest``: measures time according to which rank was + the slowest to perform both metadata operations and data transfer for each + shared file. (most accurate but requires newer log version) + +**Aggregate performance** + +Performance is calculated by dividing the total bytes by the I/O time (shared +files and unique files combined) computed using each of the four methods +described in the previous output section. Note the unit for total bytes is Byte +and for the aggregate performance is MiB/s (1024*1024 Bytes/s). + +Files +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use the ``--file`` option to get totals based on file usage. Each line has 3 +columns. The first column is the count of files for that type of file, the +second column is number of bytes for that type, and the third column is the +maximum offset accessed. + +* total: All files +* read_only: Files that were only read from +* write_only: Files that were only written to +* read_write: Files that were both read and written +* unique: Files that were opened on only one rank +* shared: Files that were opened by more than one rank + + +**Example output** + +.. code-block:: + + # + # total: 5 4371499438884 4364699616485 + # read_only: 2 4370100334589 4364699616485 + # write_only: 1 1399104295 1399104295 + # read_write: 0 0 0 + # unique: 0 0 0 + # shared: 5 4371499438884 4364699616485 + +Totals +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use the ``--total`` option to get all statistics as an aggregate total rather +than broken down per file. Each field is either summed across files and +process (for values such as number of opens), set to global minimums and +maximums (for values such as open time and close time), or zeroed out (for +statistics that are nonsensical in aggregate). + +**Example output** + +.. code-block:: bash + + total_POSIX_OPENS: 1024 + total_POSIX_READS: 0 + total_POSIX_WRITES: 16384 + total_POSIX_SEEKS: 16384 + total_POSIX_STATS: 1024 + total_POSIX_MMAPS: 0 + total_POSIX_FOPENS: 0 + total_POSIX_FREADS: 0 + total_POSIX_FWRITES: 0 + total_POSIX_BYTES_READ: 0 + total_POSIX_BYTES_WRITTEN: 68719476736 + total_POSIX_MAX_BYTE_READ: 0 + total_POSIX_MAX_BYTE_WRITTEN: 67108863 + ... + +darshan-dxt-parser +======================================= + +The ``darshan-dxt-parser`` utility can be used to parse DXT traces out of +Darshan log files, assuming the corresponding application was executed with the +DXT modules enabled. The following example parses all DXT trace information out +of a Darshan log file and stores it in a text file: + +.. code-block:: bash + + darshan-dxt-parser shane_ior_id25016_1-31-38066-13864742673678115131_1.darshan > ~/ior-trace.txt + +Guide to darshan-dxt-parser output +======================================= + +The preamble to ``darshan-dxt-parser`` output is identical to that of the +traditional ``darshan-parser`` utility, which is described above. + +``darshan-dxt-parser`` displays detailed trace information contained within a +Darshan log that was generated with DXT instrumentation enabled. Trace data is +captured from both POSIX and MPI-IO interfaces. Example output is given below: + +**Example output** + +.. code-block:: bash + + # *************************************************** + # DXT_POSIX module data + # *************************************************** + + # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile + # DXT, rank: 0, hostname: shane-thinkpad + # DXT, write_count: 4, read_count: 4 + # DXT, mnt_pt: /, fs_type: ext4 + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) + X_POSIX 0 write 0 0 262144 0.0029 0.0032 + X_POSIX 0 write 1 262144 262144 0.0032 0.0035 + X_POSIX 0 write 2 524288 262144 0.0035 0.0038 + X_POSIX 0 write 3 786432 262144 0.0038 0.0040 + X_POSIX 0 read 0 0 262144 0.0048 0.0048 + X_POSIX 0 read 1 262144 262144 0.0049 0.0049 + X_POSIX 0 read 2 524288 262144 0.0049 0.0050 + X_POSIX 0 read 3 786432 262144 0.0050 0.0051 + + # *************************************************** + # DXT_MPIIO module data + # *************************************************** + + # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile + # DXT, rank: 0, hostname: shane-thinkpad + # DXT, write_count: 4, read_count: 4 + # DXT, mnt_pt: /, fs_type: ext4 + # Module Rank Wt/Rd Segment Length Start(s) End(s) + X_MPIIO 0 write 0 262144 0.0029 0.0032 + X_MPIIO 0 write 1 262144 0.0032 0.0035 + X_MPIIO 0 write 2 262144 0.0035 0.0038 + X_MPIIO 0 write 3 262144 0.0038 0.0040 + X_MPIIO 0 read 0 262144 0.0048 0.0049 + X_MPIIO 0 read 1 262144 0.0049 0.0049 + X_MPIIO 0 read 2 262144 0.0049 0.0050 + X_MPIIO 0 read 3 262144 0.0050 0.0051 + +DXT POSIX module +------------------------------------- + +This module provides details on each read or write access at the POSIX layer. +The trace output is organized first by file then by process rank. So, for each +file accessed by the application, DXT will provide each process's I/O trace +segments in separate blocks, ordered by increasing process rank. Within each +file/rank block, I/O trace segments are ordered chronologically. + +Before providing details on each I/O operation, DXT provides a short preamble +for each file/rank trace block with the following bits of information: the +Darshan identifier for the file (which is equivalent to the identifiers used by +Darshan in its traditional modules), the full file path, the corresponding MPI +rank the current block of trace data belongs to, the hostname associated with +this process rank, the number of individual POSIX read and write operations by +this process, and the mount point and file system type corresponding to the +traced file. + +The output format for each individual I/O operation segment is: + +.. code-block:: + + # Module Rank Wt/Rd Segment Offset Length Start(s) End(s) + +* Module: corresponding DXT module (DXT_POSIX or DXT_MPIIO) +* Rank: process rank responsible for I/O operation +* Wt/Rd: whether the operation was a write or read +* Segment: The operation number for this segment (first operation is segment 0) +* Offset: file offset the I/O operation occurred at +* Length: length of the I/O operation in bytes +* Start: timestamp of the start of the operation (w.r.t. application start time) +* End: timestamp of the end of the operation (w.r.t. application start time) + +DXT MPI-IO module +------------------------------------- + +If the MPI-IO interface is used by an application, this module provides details +on each read or write access at the MPI-IO layer. This data is often useful in +understanding how MPI-IO read or write operations map to underlying POSIX read +or write operations issued to the traced file. + +The output format for the DXT MPI-IO module is essentially identical to the DXT +POSIX module, except that the offset of file operations is not tracked. + +Other darshan-util utilities +======================================= + +The darshan-util package includes a number of other utilities that can be +summarized briefly as follows: + +* ``darshan-convert``: converts an existing log file to the newest log format. + If the ``--bzip2`` flag is given, then the output file will be re-compressed + in bzip2 format rather than libz format. It also has command line options for + anonymizing personal data, adding metadata annotation to the log header, and + restricting the output to a specific instrumented file. +* ``darshan-diff``: provides a text diff of two Darshan log files, comparing both + job-level metadata and module data records between the files. +* ``darshan-analyzer``: walks an entire directory tree of Darshan log files and + produces a summary of the types of access methods used in those log files. +* ``darshan-logutils*``: this is a library rather than an executable, but it + provides a C interface for opening and parsing Darshan log files. This is + the recommended method for writing custom utilities, as darshan-logutils + provides a relatively stable interface across different versions of Darshan + and different log formats. +* ``dxt_analyzer``: plots the read or write activity of a job using data obtained + from Darshan's DXT modules (if DXT is enabled). + +PyDarshan +======================================= + +PyDarshan is a Python package that provides functionality for analyzing Darshan +log files, first introduced as part of Darshan 3.3.0. This package provides +easier to use Python interfaces to Darshan log file data (compared to the +C-based ``darshan-util`` library), enabling Darshan users to develop their own +custom log file analysis utilities. + +PyDarshan has independent documentation outlining how to install and use this +package which can be found in :ref:`pydarshantoc`. + diff --git a/darshan-util/pydarshan/CHANGELOG.rst b/darshan-util/pydarshan/CHANGELOG.rst index 61d7e8706..a6bbbeb6d 100644 --- a/darshan-util/pydarshan/CHANGELOG.rst +++ b/darshan-util/pydarshan/CHANGELOG.rst @@ -2,15 +2,18 @@ PyDarshan-3.4.7.0 ================= * Support for extracting and analyzing new DAOS instrumentation module data + - summary CLI tool now integrates DAOS data into reports * New CLI tools for extracting and printing summary data for one or more Darshan logs + - `job_stats` prints high-level statistics on a per-job level - `file_stats` prints high-level statistics on a per-file level - output in either CSV format or using the Rich package (which is now a proper dependency of PyDarshan) * Add DarshanReport object capability to filter module records according to given regular expressions + - users can now pass the following parameters to DarshanReport objects to use this functionality: `filter_patterns` (a list of regex strings to match against) and `filter_mode` (either @@ -33,6 +36,7 @@ PyDarshan-3.4.4.0 PyDarshan-3.4.3.0 ================= * Various job summary tool improvements + - add new module overview table - add new file count summary table - add new plot of POSIX module sequential/consecutive accesses @@ -45,6 +49,7 @@ PyDarshan-3.4.3.0 easier to read * Integrated Python support for darshan-util accumulator API for aggregating file records and calculating derived metrics + - Added backend routine `accumulate_records`, which returns a derived metric structure and a summary record for an input set of records @@ -66,13 +71,16 @@ PyDarshan-3.4.1.0 ================= * Fixed memory leaks in the following backend CFFI bindings (reported by Jesse Hines): + - log_get_modules - log_get_mounts - log_get_record - log_get_name_records - log_lookup_name_records + * Added PnetCDF module information to job summary tool * Testing modifications: + - Switched to use of context managers for log Report objects to avoid test hangs in certain environments - Marked tests requiring lxml package as xfail when not installed diff --git a/darshan-util/pydarshan/Makefile b/darshan-util/pydarshan/Makefile index 30e8af164..13445be7c 100644 --- a/darshan-util/pydarshan/Makefile +++ b/darshan-util/pydarshan/Makefile @@ -63,7 +63,7 @@ coverage: # check code coverage quickly with the default Python docs: clean-docs # generate Sphinx HTML documentation, including API docs - sphinx-apidoc -M -H PyDarshan -o docs/api/pydarshan darshan + sphinx-apidoc -M -H "PyDarshan API Reference" -o docs/api/pydarshan darshan $(MAKE) -C docs clean $(MAKE) -C docs html diff --git a/darshan-util/pydarshan/darshan/discover_darshan.py b/darshan-util/pydarshan/darshan/discover_darshan.py index 5746b773b..00fd9f3f1 100644 --- a/darshan-util/pydarshan/darshan/discover_darshan.py +++ b/darshan-util/pydarshan/darshan/discover_darshan.py @@ -216,9 +216,15 @@ def find_utils(ffi, libdutil): os.chdir(save) except: libdutil = None - - - + + if libdutil is None: + try: + library_path = os.environ.get('DARSHAN_INSTALL_PREFIX') + logger.debug(f"Attempting library_path={library_path} via env variable DARSHAN_INSTALL_PREFIX.") + libdutil = ffi.dlopen(library_path + "/lib/libdarshan-util.so") + except: + libdutil = None + if libdutil is None: raise RuntimeError('Could not find libdarshan-util.so! Is darshan-util installed? Please ensure one of the the following: 1) export LD_LIBRARY_PATH=, or 2) darshan-parser can found using the PATH variable, or 3) pkg-config can resolve pkg-config --path darshan-util, or 4) install a wheel that includes darshan-utils via pip.') diff --git a/darshan-util/pydarshan/docs/conf.py b/darshan-util/pydarshan/docs/conf.py index db89dd76d..81b0c6d9c 100755 --- a/darshan-util/pydarshan/docs/conf.py +++ b/darshan-util/pydarshan/docs/conf.py @@ -63,7 +63,7 @@ master_doc = 'index' # General information about the project. -project = u'PyDarshan' +project = u'PyDarshan API Reference' copyright = u"2021, Argonne National Laboratory" author = u"" @@ -172,8 +172,7 @@ intersphinx_mapping = { - 'https://docs.python.org/': None, - 'http://matplotlib.org': None, + "python": ('https://docs.python.org/', None), + "matplotlib": ("https://matplotlib.org", None), } - diff --git a/doc/darshan-modularization.rst b/doc/darshan-modularization.rst new file mode 100644 index 000000000..649178512 --- /dev/null +++ b/doc/darshan-modularization.rst @@ -0,0 +1,683 @@ +******************************************************************** +Modularized I/O characterization using Darshan 3.x +******************************************************************** + +Introduction +============================================== + +Darshan is a lightweight toolkit for characterizing the I/O performance of +instrumented HPC applications. + +Starting with version 3.0.0, the Darshan runtime environment and log file +format have been redesigned such that new "instrumentation modules" can be +added without breaking existing tools. Developers are given a framework to +implement arbitrary instrumentation modules, which are responsible for +gathering I/O data from a specific system component (which could be from an I/O +library, platform-specific data, etc.). Darshan can then manage these modules +at runtime and create a valid Darshan log regardless of how many or what types +of modules are used. + +Overview of Darshan's modularized architecture +============================================== + +The Darshan source tree is organized into two primary components: + +* **darshan-runtime**: Darshan runtime framework necessary for instrumenting MPI + applications and generating I/O characterization logs. + +* **darshan-util**: Darshan utilities for analyzing the contents of a given + Darshan I/O characterization log. + +The following subsections provide detailed overviews of each of these +components to give a better understanding of the architecture of the +modularized version of Darshan. In :ref:`Sec add instr`, we actually outline +the necessary steps for integrating new instrumentation modules into Darshan. + +.. _Sec darshan-runtime: + +Darshan-runtime +------------------------------------- + +The primary responsibilities of the darshan-runtime component are: + +* intercepting I/O functions of interest from a target application; + +* extracting statistics, timing information, and other data characterizing the + application's I/O workload; + +* compressing I/O characterization data and corresponding metadata; + +* logging the compressed I/O characterization to file for future evaluation + +The first two responsibilities are the burden of module developers, while the +last two are handled automatically by Darshan. + +In general, instrumentation modules are composed of: + +* wrapper functions for intercepting I/O functions; + +* internal functions for initializing and maintaining internal data structures + and module-specific I/O characterization data; + +* a set of functions for interfacing with the Darshan runtime environment + +A block diagram illustrating the interaction of an example POSIX +instrumentation module and the Darshan runtime environment is given below in +Figure 1. + +**Figure 1. Darshan runtime environment** + +.. image:: darshan-dev-modular-runtime.png + :align: center + :width: 600 + :alt: A block diagram of Darshan runtime environment + +As shown in Figure 1, the Darshan runtime environment is just a library +(libdarshan) which intercepts and instruments functions of interest made by an +application to existing system libraries. Two primary components of this +library are ``darshan-core`` and ``darshan-common``. ``darshan-core`` is the +central component which manages the initialization/shutdown of Darshan, +coordinates with active instrumentation modules, and writes I/O +characterization logs to disk, among other things. ``darshan-core`` intercepts +``MPI_Init()`` to initialize key internal data structures and intercepts +``MPI_Finalize()`` to initiate Darshan's shutdown process. ``darshan-common`` +simply provides module developers with functionality that is likely to be +reused across modules to minimize development and maintenance costs. +Instrumentation modules must utilize ``darshan-core`` to register themselves +and corresponding I/O records with Darshan so they can be added to the output +I/O characterization. While not shown in Figure 1, numerous modules can be +registered with Darshan at any given time and Darshan is capable of correlating +records between these modules. + +In the next three subsections, we describe instrumentation modules, the +``darshan-core`` component, and the ``darshan-common`` component in more +detail. + +Instrumentation modules +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The new modularized version of Darshan allows for the generation of I/O +characterizations composed from numerous instrumentation modules, where an +instrumentation module is simply a Darshan component responsible for capturing +I/O data from some arbitrary source. For example, distinct instrumentation +modules may be defined for different I/O interfaces or to gather +system-specific I/O parameters from a given computing system. Each +instrumentation module interfaces with the ``darshan-core`` component to +coordinate its initialization and shutdown and to provide output I/O +characterization data to be written to log. + +In general, there are two different methods an instrumentation module can use +to initialize itself: static initialization at Darshan startup time or dynamic +initialization within intercepted function calls during application execution. +The initialization process should initialize module-specific data structures +and register the module with the ``darshan-core`` component so it is included +in the output I/O characterization. + +The static initialization approach is useful for modules that do not have +function calls that can be intercepted and instead can just grab all I/O +characterization data at Darshan startup or shutdown time. A module can be +statically initialized at Darshan startup time by adding its initialization +routine to the ``mod_static_init_fns`` array at the top of the +``lib/darshan-core.c`` source file. + +.. note:: + Modules may wish to add a corresponding configure option to disable the + module from attempting to gather I/O data. The ability to disable a module + using a configure option is especially necessary for system-specific modules + which can not be built or used on other systems. + +Most instrumentation modules can just bootstrap themselves within wrapper +functions during normal application execution. Each of Darshan's current I/O +library instrumentation modules (POSIX, MPI-IO, stdio, HDF5, PnetCDF) follow +this approach. Each wrapper function should just include logic to initialize +data structures and register with ``darshan-core`` if this initialization has +not already occurred. Darshan intercepts function calls of interest by +inserting these wrappers at compile time for statically linked executables +(e.g., using the linkers ``--wrap`` mechanism) and at runtime for dynamically +linked executables (using LD_PRELOAD). + +.. note:: + Modules should not perform any I/O or communication within wrapper + functions. Darshan records I/O data independently on each application + process, then merges the data from all processes when the job is shutting + down. This defers expensive I/O and communication operations to the shutdown + process, minimizing Darshan's impact on application I/O performance. + +When the instrumented application terminates and Darshan begins its shutdown +procedure, it requires a way to interface with any active modules that have +data to contribute to the output I/O characterization. The following function +is implemented by each module to finalize (and perhaps reorganize) module +records before returning the record memory back to darshan-core to be +compressed and written to file. + +.. code-block:: C + + typedef void (*darshan_module_shutdown)( + MPI_Comm mod_comm, + darshan_record_id *shared_recs, + int shared_rec_count, + void** mod_buf, + int* mod_buf_sz + ); + +This function can be used to run collective MPI operations on module data; for +instance, Darshan typically tries to reduce file records which are shared +across all application processes into a single data record (more details on the +shared file reduction mechanism are given in :ref:`Sec add instr`). This +function also serves as a final opportunity for modules to cleanup and free any +allocated data structures, etc. + +* ``mod_comm`` is the MPI communicator to use for collective communication + +* ``shared_recs`` is a list of Darshan record identifiers that are shared across + all application processes + +* ``shared_rec_count`` is the size of the shared record list + +* ``mod_buf`` is a pointer to the buffer address of the module's contiguous set + of data records + +* ``mod_buf_sz`` is a pointer to a variable storing the aggregate size of the + module's records. On input, the pointed to value indicates the aggregate size + of the module's registered records; on output, the value may be updated if, + for instance, certain records are discarded + +darshan-core +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Within darshan-runtime, the darshan-core component manages the initialization +and shutdown of the Darshan environment, provides an interface for modules to +register themselves and their data records with Darshan, and manages the +compressing and the writing of the resultant I/O characterization. As +illustrated in Figure 1, the darshan-core runtime environment intercepts +``MPI_Init`` and ``MPI_Finalize`` routines to initialize and shutdown the +Darshan runtime environment, respectively. + +Each of the functions provided by ``darshan-core`` to interface with +instrumentation modules are described in detail below. + +.. code-block:: C + + void darshan_core_register_module( + darshan_module_id mod_id, + darshan_module_shutdown mod_shutdown_func, + int *mod_mem_limit, + int *rank, + int *sys_mem_alignment); + +The ``darshan_core_register_module`` function registers Darshan instrumentation +modules with the ``darshan-core`` runtime environment. This function needs to +be called once for any module that will contribute data to Darshan's final I/O +characterization. + +* ``mod_id`` is a unique identifier for the given module, which is defined in the + Darshan log format header file (``darshan-log-format.h``). + +* ``mod_shutdown_func`` is the function pointer to the module shutdown function + described in the previous section. + +* ``inout_mod_buf_size`` is an input/output argument that stores the amount of + module memory being requested when calling the function and the amount of + memory actually reserved by darshan-core when returning. + +* ``rank`` is a pointer to an integer to store the calling process's application + MPI rank in. ``NULL`` may be passed in to ignore this value. + +* ``sys_mem_alignment`` is a pointer to an integer which will store the system + memory alignment value Darshan was configured with. ``NULL`` may be passed in + to ignore this value. + +.. code-block:: C + + void darshan_core_unregister_module(darshan_module_id mod_id); + +The ``darshan_core_unregister_module`` function disassociates the given module +from the ``darshan-core`` runtime. Consequentially, Darshan does not interface +with the given module at shutdown time and will not log any I/O data from the +module. This function should only be used if a module registers itself with +darshan-core but later decides it does not want to contribute any I/O data. +Note that, in the current implementation, Darshan does not have the ability to +reclaim the record memory allocated to the calling module to assign to other +modules. + +* ``mod_id`` is the unique identifier for the module being unregistered. + +.. code-block:: C + + darshan_record_id darshan_core_gen_record_id(const char *name); + +The ``darshan_core_gen_record_id`` function simply generates a unique record +identifier for a given record name. This function is generally called to +convert a name string to a unique record identifier that is needed to register +a data record with darshan-core. The generation of IDs is consistent, such that +modules which reference records with the same names will store these records +using the same unique IDs, simplifying the correlation of these records for +analysis. + +* ``name`` is the name of the corresponding data record (often times this is just + a file name). + +.. code-block:: C + + void *darshan_core_register_record( + darshan_record_id rec_id, + const char *name, + darshan_module_id mod_id, + int rec_len, + int *fs_info); + +The ``darshan_core_register_record`` function registers a data record with the +darshan-core runtime, allocating memory for the record so that it is persisted +in the output log file. This record could reference a POSIX file or perhaps an +object identifier for an object storage system, for instance. This function +should only be called once for each record being tracked by a module to avoid +duplicating record memory. This function returns the address which the record +should be stored at or ``NULL`` if there is insufficient memory for storing the +record. + +* ``rec_id`` is a unique integer identifier for this record (generally generated + using the ``darshan_core_gen_record_id`` function). + +* ``name`` is the string name of the data record, which could be a file path, + object ID, etc. If given, darshan-core will associate the given name with + the record identifier and store this mapping in the log file so it can be + retrieved for analysis. ``NULL`` may be passed in to generate an anonymous + (unnamed) record. + +* ``mod_id`` is the identifier for the module attempting to register this record. + +* ``rec_len`` is the length of the record. + +* ``fs_info`` is a pointer to a structure of relevant info for the file system + associated with the given record -- this structure is defined in the + ``darshan.h`` header. Note that this functionality only works for record + names that are absolute file paths, since we determine the file system by + matching the file path to the list of mount points Darshan is aware of. + ``NULL`` may be passed in to ignore this value. + +.. code-block:: C + + double darshan_core_wtime(void); + +The ``darshan_core_wtime`` function simply returns a floating point number of +seconds since Darshan was initialized. This functionality can be used to time +the duration of application I/O calls or to store timestamps of when functions +of interest were called. + +.. code-block:: C + + double darshan_core_excluded_path(const char *path); + +The ``darshan_core_excluded_path`` function checks to see if a given file path +is in Darshan's list of excluded file paths (i.e., paths that we don't +instrument I/O to/from, such as /etc, /dev, /usr, etc.). + +* ``path`` is the absolute file path we are checking. + +darshan-common +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``darshan-common`` is a utility component of darshan-runtime, providing module +developers with general functions that are likely to be reused across multiple +modules. These functions are distinct from darshan-core functions since they do +not require access to internal Darshan state. + +.. code-block:: C + + char* darshan_clean_file_path(const char* path); + +The ``darshan_clean_file_path`` function just cleans up the input path string, +converting relative paths to absolute paths and suppressing any potential noise +within the string. The address of the new string is returned and should be +freed by the user. + +* ``path_`` is the input path string to be cleaned up. + +``darshan-common`` also currently includes functions for maintaining counters +that store common I/O values (such as common I/O access sizes or strides used +by an application), as well as functions for calculating the variance of a +given counter across all processes. As more modules are contributed, it is +likely that more functionality can be refactored out of module implementations +and maintained in darshan-common, facilitating code reuse and simplifying +maintenance. + +Darshan-util +------------------------------------- + +The darshan-util component is composed of a helper library for accessing log +file data records (``libdarshan-util``) and a set of utilities that use this +library to analyze application I/O behavior. ``libdarhan-util`` includes a +generic interface (``darshan-logutils``) for retrieving specific components of +a given log file. Specifically, this interface allows utilities to retrieve a +log's header metadata, job details, record ID to name mapping, and any +module-specific data contained within the log. + +``libdarshan-util`` additionally includes the definition of a generic module +interface (``darshan-mod-logutils``) that may be implemented by modules to +provide a consistent way for Darshan utilities to interact with module data +stored in log files. This interface is necessary since each module has records +of varying size and format, so module-specific code is needed to interact with +the records in a generic manner. This interface is used by the +``darshan-parser`` utility, for instance, to extract data records from all +modules contained in a log file and to print these records in a consistent +format that is amenable to further analysis by other tools. + +darshan-logutils +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Here we define each function in the ``darshan-logutils`` interface, which can +be used to create new log utilities and to implement module-specific interfaces +into log files. + +.. code-block:: C + + darshan_fd darshan_log_open(const char *name); + +Opens Darshan log file stored at path ``name``. The log file must already exist +and is opened for reading only. As part of the open routine, the log file +header is read to set internal file descriptor data structures. Returns a +Darshan file descriptor on success or ``NULL`` on error. + +.. code-block:: C + + darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type, int partial_flag); + +Creates a new darshan log file for writing only at path ``name``. ``comp_type`` +denotes the underlying compression type used on the log file (currently either +libz or bzip2) and ``partial_flag`` denotes whether the log is storing partial +data (that is, all possible application file records were not tracked by +darshan). Returns a Darshan file descriptor on success or ``NULL`` on error. + +.. code-block:: C + + int darshan_log_get_job(darshan_fd fd, struct darshan_job *job); + int darshan_log_put_job(darshan_fd fd, struct darshan_job *job); + +Reads/writes ``job`` structure from/to the log file referenced by descriptor +``fd``. The ``darshan_job`` structure is defined in ``darshan-log-format.h``. +Returns ``0`` on success, ``-1`` on failure. + +.. code-block:: C + + int darshan_log_get_exe(darshan_fd fd, char *buf); + int darshan_log_put_exe(darshan_fd fd, char *buf); + +Reads/writes the corresponding executable string (exe name and command line +arguments) from/to the Darshan log referenced by ``fd``. Returns ``0`` on +success, ``-1`` on failure. + +.. code-block:: C + + int darshan_log_get_mounts(darshan_fd fd, char*** mnt_pts, char*** fs_types, int* count); + int darshan_log_put_mounts(darshan_fd fd, char** mnt_pts, char** fs_types, int count); + +Reads/writes mounted file system information for the Darshan log referenced by +``fd``. ``mnt_pnts`` points to an array of strings storing mount points, +``fs_types`` points to an array of strings storing file system types (e.g., +ext4, nfs, etc.), and ``count`` points to an integer storing the total number +of mounted file systems recorded by Darshan. Returns ``0`` on success, ``-1`` +on failure. + +.. code-block:: C + + int darshan_log_get_namehash(darshan_fd fd, struct darshan_name_record_ref **hash); + int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash); + +Reads/writes the hash table of Darshan record identifiers to full names for all +records contained in the Darshan log referenced by ``fd``. ``hash`` is a +pointer to the hash table (of type ``struct darshan_name_record_ref *``), which +should be initialized to ``NULL`` for reading. This hash table is defined by +the ``uthash`` hash table implementation and includes corresponding macros for +searching, iterating, and deleting records from the hash. For detailed +documentation on using this hash table, consult ``uthash`` documentation in +``darshan-util/uthash-1.9.2/doc/txt/userguide.txt``. The ``darshan-parser`` +utility (for parsing module information out of a Darshan log) provides an +example of how this hash table may be used. Returns ``0`` on success, ``-1`` on +failure. + +.. code-block:: C + + int darshan_log_get_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz); + int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz, int ver); + +Reads/writes a chunk of (uncompressed) module data for the module identified by +``mod_id`` from/to the Darshan log referenced by ``fd``. ``mod_buf`` is the +buffer to read data into or write data from, and ``mod_buf_sz`` is the +corresponding size of the buffer. The ``darshan_log_getmod`` routine can be +repeatedly called to retrieve chunks of uncompressed data from a specific +module region of the log file given by ``fd``. The ``darshan_log_putmod`` +routine just continually appends data to a specific module region in the log +file given by ``fd`` and accepts an additional ``ver`` parameter indicating the +version number for the module data records being written. These functions +return the number of bytes read/written on success, ``-1`` on failure. + +.. note:: + Darshan use a "reader makes right" conversion strategy to rectify Endianness + issues between the machine a log was generated on and a machine analyzing + the log. Accordingly, module-specific log utility functions will need to + check the ``swap_flag`` variable of the Darshan file descriptor to determine + if byte swapping is necessary. 32-bit and 64-bit byte swapping macros + (DARSHAN_BSWAP32/DARSHAN_BSWAP64) are provided in ``darshan-logutils.h``. + +.. code-block:: C + + void darshan_log_close(darshan_fd fd); + +Close Darshan file descriptor ``fd``. This routine *must* be called for newly +created log files, as it flushes pending writes and writes a corresponding log +file header before closing. + +.. note:: + For newly created Darshan log files, care must be taken to write log file + data in the correct order, since the log file write routines basically are + appending data to the log file. The correct order for writing all log file + data to file is: (1) job data, (2) exe string, (3) mount data, (4) record id + -> file name map, (5) each module's data, in increasing order of module + identifiers. + +darshan-mod-logutils +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``darshan-mod-logutils`` interface provides a convenient way to implement +new log functionality across all Darshan instrumentation modules, which can +potentially greatly simplify the development of new Darshan log utilities. +These functions are defined in the ``darshan_mod_logutil_funcs`` structure in +``darshan-logutils.h`` -- instrumentation modules simply provide their own +implementation of each function, then utilities can leverage this functionality +using the ``mod_logutils`` array defined in ``darshan-logutils.c``. A +description of some of the currently implemented functions are provided below. + +.. code-block:: C + + int log_get_record(darshan_fd fd, void **buf); + int log_put_record(darshan_fd fd, void *buf); + +Reads/writes the module record stored in ``buf`` to the log referenced by +``fd``. Notice that a size parameter is not needed since the utilities calling +this interface will likely not know the record size -- the module-specific log +utility code can determine the corresponding size before reading/writing the +record from/to file. + +.. note:: + ``log_get_record`` takes a pointer to a buffer address rather than just the + buffer address. If the pointed to address is equal to ``NULL``, then record + memory should be allocated instead. This functionality helps optimize memory + usage, since utilities often don't know the size of records being accessed + but still must provide a buffer to read them into. + +.. code-block:: C + + void log_print_record(void *rec, char *name, char *mnt_pt, char *fs_type); + +Prints all data associated with the record pointed to by ``rec``. ``name`` +holds the corresponding name string for this record. ``mnt_pt`` and ``fs_type`` +hold the corresponding mount point path and file system type strings associated +with the record (only valid for records with names that are absolute file +paths). + +.. code-block:: C + + void log_print_description(int ver); + +Prints a description of the data stored within records for this module (with +version number ``ver``). + +.. _Sec add instr: + +Adding new instrumentation modules +============================================== + +In this section we outline each step necessary for adding a module to Darshan. +To assist module developers, we have provided the example "NULL" module as part +of the Darshan source tree (``darshan-null-log-format.h``, +``darshan-runtime/lib/darshan-null.c``, and +``darshan-util/darshan-null-logutils.*``) This example can be used as a minimal +stubbed out module implementation that is heavily annotated to further clarify +how modules interact with Darshan and to provide best practices to future +module developers. For full-fledged module implementation examples, developers +are encouraged to examine the POSIX and MPI-IO modules. + +Log format headers +------------------------------------- + +The following modifications to Darshan log format headers are required for +defining the module's record structure: + +* Add a module identifier to the ``DARSHAN_MODULE_IDS`` macro at the top of the + ``darshan-log-format.h`` header. In this macro, the first field is a + corresponding enum value that can be used to identify the module, the second + field is a string name for the module, the third field is the current version + number of the given module's log format, and the fourth field is a + corresponding pointer to a Darshan log utility implementation for this module + (which can be set to ``NULL`` until the module has its own log utility + implementation). + +* Add a top-level header that defines an I/O data record structure for the + module. Consider the "NULL" module and POSIX module log format headers for + examples (``darshan-null-log-format.h`` and ``darshan-posix-log-format.h``, + respectively). + +These log format headers are defined at the top level of the Darshan source +tree, since both the darshan-runtime and darshan-util repositories depend on +their definitions. + +Darshan-runtime +------------------------------------- + +Build modifications +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following modifications to the darshan-runtime build system are necessary +to integrate new instrumentation modules: + +* Necessary linker flags for inserting this module's wrapper functions need to + be added to a module-specific file which is used when linking applications + with Darshan. For an example, consider + ``darshan-runtime/share/ld-opts/darshan-posix-ld-opts``, the required linker + options for the POSIX module. The base linker options file for Darshan + (``darshan-runtime/share/ld-opts/darshan-base-ld-opts.in``) must also be + updated to point to the new module-specific linker options file. + +* Targets must be added to ``Makefile.in`` to build static and shared objects + for the module's source files, which will be stored in the + ``darshan-runtime/lib/`` directory. The prerequisites to building static and + dynamic versions of ``libdarshan`` must be updated to include these objects, + as well. + + - If the module defines a linker options file, a rule must also be added to + install this file with libdarshan. + +Instrumentation module implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In addtion to the development notes from above and the exemplar "NULL" and +POSIX modules, we provide the following notes to assist module developers: + +* Modules only need to include the ``darshan.h`` header to interface with + darshan-core. + +* The file record identifier given when registering a record with darshan-core + should be used to store the record structure in a hash table or some other + structure. + + - Subsequent calls that need to modify this record can then use the + corresponding record identifier to lookup the record in this local hash + table. + - It may be necessary to maintain a separate hash table for other handles + which the module may use to refer to a given record. For instance, the + POSIX module may need to look up a file record based on a given file + descriptor, rather than a path name. + +Darshan-util +------------------------------------- + +Build modifications +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following modifications to the darshan-util build system are necessary to +integrate new instrumentation modules: + +* Update ``Makefile.in`` with new targets necessary for building + module-specific logutil source. + + - Make sure to add the module's logutil implementation objects as a + prerequisite for building ``libdarshan-util``. + - Make sure to update ``all``, ``clean``, and ``install`` rules to reference + updates. + +Module-specific logutils and utilities +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For a straightforward reference implementation of module-specific log utility +functions, consider the implementations for the NULL module +(``darshan-util/darshan-null-logutils.*``) and the POSIX module +(``darshan-util/darshan-posix-logutils.*``). These module-specific log utility +implementations are built on top of the ``darshan_log_getmod()`` and +``darshan_log_putmod()`` functions, and are used to read/write complete module +records from/to file. + +Also, consider the ``darshan-parser`` source code for an example of a utility +which can leverage ``libdarshan-util`` for analyzing the contents of a Darshan +I/O characterization log with data from arbitrary instrumentation modules. + +.. _Sec shared record: + +Shared record reductions +============================================== + +Since Darshan prefers to aggregate data records which are shared across all +processes into a single data record, module developers should consider +implementing this functionality eventually, though it is not strictly required. + +Module developers should implement the shared record reduction mechanism within +the module's ``darshan_module_shutdown()`` function, as it provides an MPI +communicator for the module to use for collective communication and a list of +record identifiers which are shared globally by the module (as described in +:ref:`Sec darshan-runtime`). + +In general, implementing a shared record reduction involves the following +steps: + +* reorganizing shared records into a contiguous region in the buffer of module + records + +* allocating a record buffer to store the reduction output on application rank + 0 + +* creating an MPI reduction operation using the ``MPI_Op_create()`` function + (see more in `MPI_Op_create manpage + `_). + +* reducing all shared records using the created MPI reduction operation and the + send and receive buffers described above + +For a more in-depth example of how to use the shared record reduction +mechanism, consider the implementations of this in the POSIX or MPI-IO modules. + +Other resources +============================================== + +* `Darshan GitLab page `_ +* `Darshan project website `_ +* :ref:`TOC Darshan Runtime` +* :ref:`TOC Darshan Utilities` diff --git a/index.rst b/index.rst index a590fdbcd..8563d3298 100644 --- a/index.rst +++ b/index.rst @@ -1,7 +1,76 @@ -Welcome to Darshan's documentation! +Welcome to Darshan's Documentation =================================== -.. note:: +The Darshan source tree is divided into two parts: - This project is under active development. +* ``darshan-runtime``: to be installed on systems where you intend to + instrument MPI applications. +* ``darshan-util``: to be installed on systems where you intend to analyze log + files produced by darshan-runtime. + + + ``darshan-util/pydarshan``: a Python package providing interfaces to + Darshan log file. + +:ref:`Modularized I/O characterization using Darshan 3.x ` +gives details on the design of the new modularized version of Darshan (3.x) +and how new instrumentation modules may be developed within Darshan. + +Site-specific documentation for facilities that deploy Darshan in production: + +* Argonne Leadership Computing Facility (ALCF): + `Theta `_, + `Cooley `_. +* National Energy Research Scientific Computing Center + (`NERSC `_) +* National Center for Supercomputing Applications + (`NCSA `_) +* Oak Ridge Leadership Computing Facility (OLCF): + `darshan-runtime `_, + `darshan-util `_. +* King Abdullah University of Science and Technology + (`KAUST `_) +* European Centre for Medium-Range Weather Forecasts + (`ECMWF `_) +* Ohio Supercomputer Center + (`OSC `_) +* Julich Supercomputing Centre + (`JSC `_) + +.. toctree:: + :maxdepth: 2 + :caption: Darshan Runtime + :name: TOC Darshan Runtime + + darshan-runtime/doc/darshan-runtime + +.. toctree:: + :maxdepth: 2 + :caption: Darshan Utilities + :name: TOC Darshan Utilities + + darshan-util/doc/darshan-util + +.. toctree:: + :maxdepth: 2 + :caption: Modularized I/O characterization + :name: TOC Modularization + + doc/darshan-modularization.rst + +.. toctree:: + :maxdepth: 2 + :caption: PyDarshan + :name: pydarshantoc + + darshan-util/pydarshan/docs/readme + darshan-util/pydarshan/docs/install + darshan-util/pydarshan/docs/usage + darshan-util/pydarshan/docs/api/pydarshan/modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/readthedocs/DEVELOPER_NOTES.md b/readthedocs/DEVELOPER_NOTES.md new file mode 100644 index 000000000..0b019295c --- /dev/null +++ b/readthedocs/DEVELOPER_NOTES.md @@ -0,0 +1,68 @@ +## Notes for Darshan developers working on documents published on readthedocs.io + +### Sign up/log into readthedocs.io + +* Visit [ReadtheDocs](https://about.readthedocs.com) to sign up or log into an + account. You can choose "Sign up with Github" option. +* After logged into the web site, it will leads you to the readthedocs dash + board, which shows a list of your projects. +* [Read the Docs tutorial](https://docs.readthedocs.com/platform/latest/tutorial/index.html) + contains detailed information about how to sign up a user count and set up + the connection to Darshan's github repo. + +### Create Darshan project on Read the Docs + +* Click "Add project" +* In field "Repository name", enter the Darshan github repo name, + "darshan-hpc/darshan", and then click "Continue". +* The default settings of Name, Repository URL, Default branch, and Language + will be pre-filled. Customize them if necessary. Then click "Next". +* As configure file `.readthedocs.yml` is required in the root folder of github + repo, click "This file exists" to let it add a default file. +* This will lead you to the readthedocs dashboard and the very first build + should be showing triggered and in progress. +* Note that this will also add a webhook to the Darshan github repo. See it + from Darshan github repo's Settings, and then Webhooks. + +### Change settings of Darshan project on readthedocs Dashboard + +* Visit your dashboard at https://app.readthedocs.org/dashboard/ +* Select Darshan project +* Click "Settings" on right. + + At the bottom of this page, select "Build pull requests for this project" + and click "Save". This will enable rebuild Darshan documents for all pull + requests. + + Add a new project maintainer + * Click "Maintainers" on left + * Click "Add maintainer" button. + + Enable Analytics + * Click "Addons" on left + * Click "Analytics" tab, select "Analytics enabled" button, and "Save". + + Environment variables + * Environment variables set here are for readthedocs to use, for example + `DARSHAN_INSTALL_PREFIX` is set to the location of installation location + of Darshan. + * Change existing variable must be done by first deleting it and add a new + one. + +### Configuration files + +* File `conf.py` must be stored in the root folder of Darshan's repo. +* File `.readthedocs.yaml` must be stored in the root folder of Darshan's repo. +* Darshan's documents require `darshan-util` to be built and installed first, + before installing pydarshan. See the settings of `pre_install` in file + `.readthedocs.yaml`. +* Building of `darshan-runtime` is not required. +* Mater file is `index.rst` must be stored in the root folder. +* File `index.rst` includes the following documents. + + darshan-runtime/doc/darshan-runtime + + darshan-util/doc/darshan-util + + docs/darshan-modularization.rst + + darshan-util/pydarshan/docs/readme + + darshan-util/pydarshan/docs/install + + darshan-util/pydarshan/docs/usage + + darshan-util/pydarshan/docs/api/pydarshan/modules + + + +