diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index abd0de459..5edd6333a 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -2,13 +2,30 @@ version: "2"
 
 build:
   os: "ubuntu-22.04"
+  apt_packages:
+    - autoconf
+    - automake
+    - libtool
+    - libtool-bin
+    - m4
 
   tools:
     python: "3.10"
+  jobs:
+    pre_install:
+      # configure and install of darshan-util is required
+      # Note DARSHAN_INSTALL_PREFIX is a readthedocs environment variable which
+      # must be defined in the readthedocs dashboard
+      - cd darshan-util && autoreconf -i
+      - mkdir -p build && cd build && ../darshan-util/configure --prefix=$DARSHAN_INSTALL_PREFIX && make -j4 install
+      # create pydarshan doc files
+      - cd darshan-util/pydarshan && pip install -r requirements_dev.txt && make docs
 
 python:
   install:
     - requirements: readthedocs/requirements.txt
+    - method: pip
+      path: darshan-util/pydarshan
 
 sphinx:
   configuration: conf.py
diff --git a/conf.py b/conf.py
index e09d6c414..efa467581 100644
--- a/conf.py
+++ b/conf.py
@@ -1,29 +1,131 @@
-# Configuration file for the Sphinx documentation builder.
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Documentation build configuration file.
+# This file must be stored at the root directory with name "conf.py".
+#
 
-# -- Project information
+import darshan
 
+# -- General configuration ---------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.napoleon',
+]
+
+
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = True
+napoleon_include_private_with_doc = True
+napoleon_include_special_with_doc = True
+napoleon_use_ivar = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = {'.rst': 'restructuredtext'}
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
 project = u'Darshan'
 copyright = u"2025, Argonne National Laboratory"
 author = u""
 
-release = '0.1'
-version = '0.1.0'
+# The version info for the project you're documenting, acts as replacement
+# for |version| and |release|, also used in various other places throughout
+# the built documents.
+#
+# The short X.Y version.
+version = darshan.__version__
+# The full version, including alpha/beta/rc tags.
+release = darshan.__version__
 
-# -- General configuration
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 
-extensions = [
-    'sphinx.ext.duration',
-    'sphinx.ext.doctest',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.intersphinx',
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output -------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+#html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a
+# theme further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# -- Options for HTMLHelp output ---------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'darshandoc'
+
+# -- Options for LaTeX output ------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass
+# [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'darshan.tex', u'Darshan Documentation', u'Argonne National Laboratory', 'manual'),
 ]
 
+# -- Options for Texinfo output ----------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'darshan', u'Darshan Documentation', author, 'darshan', 'Utilities for Darshan HPC I/O instrumentation.', 'Miscellaneous'),
+]
+
+
 intersphinx_mapping = {
-    'python': ('https://docs.python.org/3/', None),
-    'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+  "python": ('https://docs.python.org/', None),
+  "matplotlib": ("https://matplotlib.org", None),
 }
-intersphinx_disabled_domains = ['std']
-
-templates_path = ['_templates']
 
diff --git a/darshan-runtime/doc/DXT-overhead.jpg b/darshan-runtime/doc/DXT-overhead.jpg
new file mode 100644
index 000000000..3ffb18efe
Binary files /dev/null and b/darshan-runtime/doc/DXT-overhead.jpg differ
diff --git a/darshan-runtime/doc/darshan-runtime.rst b/darshan-runtime/doc/darshan-runtime.rst
new file mode 100644
index 000000000..3d14f9164
--- /dev/null
+++ b/darshan-runtime/doc/darshan-runtime.rst
@@ -0,0 +1,1153 @@
+**************************************
+Darshan-runtime installation and usage
+**************************************
+
+Introduction
+==============================================================================
+
+This document describes darshan-runtime, which is the instrumentation portion
+of the Darshan characterization tool.  It should be installed on the system
+where you intend to collect I/O characterization information.
+
+Darshan instruments applications via either compile time wrappers or dynamic
+library preloading.  An application that has been instrumented with Darshan
+will produce a single log file each time it is executed.  This log summarizes
+the I/O access patterns used by the application.
+
+The darshan-runtime instrumentation has traditionally only supported MPI
+applications (specifically, those that call ``MPI_Init()`` and
+``MPI_Finalize()``), but, as of version 3.2.0, Darshan also supports
+instrumentation of non-MPI applications. Regardless of whether MPI is used,
+Darshan provides detailed statistics about POSIX level file accesses made by
+the application.  In the case of MPI applications, Darshan additionally
+captures details on MPI-IO and HDF5 level access, as well as limited
+information about PnetCDF access.  Note that instrumentation of non-MPI
+applications is currently only supported in Darshan's shared library, which
+applications must ``LD_PRELOAD``.
+
+Starting in version 3.0.0, Darshan also exposes an API that can be used to
+develop and add new instrumentation modules (for other I/O library interfaces
+or to gather system-specific data, for instance), as detailed in
+`Darshan Modularization <http://www.mcs.anl.gov/research/projects/darshan/docs/darshan-modularization.html>`_.
+Newly contributed modules include a module for gathering system-specific
+parameters for jobs running on BG/Q systems, a module for gathering Lustre
+striping data for files on Lustre file systems, and a module for instrumenting
+stdio (i.e., stream I/O functions like ``fopen()``, ``fread()``, etc.)
+
+Starting in version 3.1.3, Darshan also allows for full tracing of application
+I/O workloads using the newly developed Darshan eXtended Tracing (DxT)
+instrumentation module. This module can be selectively enabled at runtime to
+provide high-fidelity traces of an application's I/O workload, as opposed to
+the coarse-grained I/O summary data that Darshan has traditionally provided.
+Currently, DxT only traces at the POSIX and MPI-IO layers. Figure below
+demonstrates the low overhead of DxT tracing, an example run collected from
+Cori at NERSC, which offers comparable performance to Darshan's traditional
+coarse-grained instrumentation methods.
+
+.. image:: DXT-overhead.jpg
+   :align: center
+   :width: 500
+   :alt: Measure DXT Overhead with IOR Benchmark on Cori
+
+Starting in version 3.4.5, Darshan facilitates real-time collection of
+comprehensive application I/O workload traces through the newly integrated
+Darshan LDMS data module, known as the darshanConnector. Leveraging the
+Lightweight Distributed Metric Service (LDMS) streams API, the darshanConnector
+collects, transports and/or stores traces of application I/O operations
+instrumented by Darshan at runtime. This module can only be enabled if the LDMS
+library is included in the Darshan build process. For more information about
+LDMS or LDMS streams please refer to the official `LDMS documentation
+<https://ovis-hpc.readthedocs.io/projects/ldms/en/latest/rst_man/index.html>`_.
+
+This document provides generic installation instructions, but "recipes" for
+several common HPC systems are provided at the end of the document as well.
+
+More information about Darshan can be found at the
+`Darshan web site <http://www.mcs.anl.gov/darshan>`_.
+
+Requirements
+==============================================================================
+
+* C compiler (preferably GCC-compatible)
+* zlib development headers and library
+
+Conventional installation
+==============================================================================
+
+Compilation
+----------------------------------------
+
+**Configure and build example (with MPI support)**
+
+ .. code-block:: bash
+
+    tar -xvzf darshan-<version-number>.tar.gz
+    cd darshan-<version-number>/
+    ./prepare.sh
+    cd darshan-runtime/
+    ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc
+    make
+    make install
+
+
+**Configure and build example (without MPI support)**
+
+ .. code-block:: bash
+
+    tar -xvzf darshan-<version-number>.tar.gz
+    cd darshan-<version-number>/
+    ./prepare.sh
+    cd darshan-runtime/
+    ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID --without-mpi CC=gcc
+    make
+    make install
+
+**Explanation of configure arguments:**
+
+* ``--with-mem-align=NUM``: This value is system-dependent and will be used by
+  Darshan to determine if the buffer for a read or write operation is
+  aligned in memory (default is 8).
+* ``--with-jobid-env=NAME`` (mandatory): this specifies the environment
+  variable that Darshan should check to determine the jobid of a job.  Common
+  values are ``PBS_JOBID`` or ``COBALT_JOBID``.  If you are not using a
+  scheduler (or your scheduler does not advertise the job ID) then you can
+  specify ``NONE`` here.  Darshan will fall back to using the pid of the rank 0
+  process if the specified environment variable is not set.
+* ``--with-username-env=NAME``: this specifies the environment variable that
+  Darshan should check to determine the username for a job. If not specified,
+  Darshan will use internal mechanisms to try to determine the username.
+
+  .. note::
+     Darshan relies on the ``LOGNAME`` environment variable to determine a
+     username, but this method isn't always reliable (e.g., on Slurm systems,
+     ``LOGNAME`` can be wiped when specifying additional environment
+     variables using the ``--export`` option to ``srun``).  This configure
+     option allows specification of an additional environment variable to
+     extract a username from (e.g., ``SLURM_JOB_USER``).
+* ``--with-log-path=DIR`` (this, or ``--with-log-path-by-env``, is mandatory):
+  This specifies the parent directory for the directory tree where Darshan logs
+  will be placed.
+
+  .. note::
+     After installation, any user can display the configured path with the
+     ``darshan-config --log-path`` command.
+* ``--with-log-path-by-env=NAME1,NAME2,...``: specifies a comma separated list
+  of environment variables to check at runtime for log path location before the
+  one set by ``--with-log-path=DIR`` at configure time.
+* ``--with-log-hints=hint1=x;hint2=y,...``: specifies hints to use when writing
+  the Darshan log file.  See ``./configure --help`` for details.
+* ``--with-mod-mem=NUM``: specifies the maximum amount of memory (in MiB) that
+  active Darshan instrumentation modules can collectively consume.
+* ``--with-zlib=DIR``: specifies an alternate location for the zlib development
+  header and library.
+* ``--without-mpi``: disables MPI support when building Darshan - MPI support is
+  assumed if not specified.
+* ``--enable-mmap-logs``: enables the use of Darshan's mmap log file mechanism.
+* ``--enable-cuserid``: enables use of cuserid() at runtime.
+* ``--disable-ld-preload``: disables building of the Darshan ``LD_PRELOAD`` library
+* ``--enable-group-readable-logs``: sets Darshan log file permissions to allow
+  group read access.
+* ``--disable-exit-wrapper``: disables wrapping of ``_exit()`` calls as last
+  ditch shutdown hook for the Darshan library when used in non-MPI mode.
+* ``CC=``: specifies the C compiler to use for compilation.
+
+**Configure arguments for controlling which Darshan modules to use:**
+
+* ``--disable-posix-mod``: disables compilation and use of Darshan's POSIX
+  module (default=enabled)
+* ``--disable-mpiio-mod``: disables compilation and use of Darshan's MPI-IO
+  module (default=enabled)
+* ``--disable-stdio-mod``: disables compilation and use of Darshan's STDIO
+  module (default=enabled)
+* ``--disable-dxt-mod``: disables compilation and use of Darshan's DXT module
+  (default=enabled)
+* ``--enable-hdf5-mod``: enables compilation and use of Darshan's HDF5 module
+  (default=disabled)
+* ``--with-hdf5=DIR``:
+  installation directory for HDF5
+
+  .. note::
+     * Users must call ``--enable-hdf5-mod`` to enable HDF5 modules,
+       ``--with-hdf5`` is only used to additionally provide an HDF5 install
+       prefix.
+     * HDF5 instrumentation only works on HDF5 library versions >= 1.8, and
+       further requires that the HDF5 library used to build Darshan and the
+       HDF5 library being linked in either both be version >=1.10 or both be
+       version < 1.10.
+     * This option does not work with the profile configuration
+       instrumentation method described in the "Instrumenting applications"
+       section :ref:`Sec Use Profile`.
+* ``--enable-pnetcdf-mod``: enables compilation and use of Darshan's PnetCDF
+  module (default=disabled)
+* ``--with-pnetcdf=DIR``:
+  installation directory for PnetCDF
+
+  .. note::
+     * Users must call ``--enable-pnetcdf-mod`` to enable PnetCDF modules,
+       ``--with-pnetcdf`` is only used to additionally provide a PnetCDF
+       install prefix.
+     * PnetCDF instrumentation only works on PnetCDF library versions >= 1.8.
+* ``--disable-lustre-mod``: disables compilation and use of Darshan's Lustre
+  module (default=enabled)
+* ``--enable-daos-mod``: enables compilation and use of Darshan's DAOS module
+  (default=disabled)
+* ``--with-daos=DIR``:
+  installation directory for DAOS
+
+  .. note::
+     Users must call ``--enable-daos-mod`` to enable DAOS modules,
+     ``--with-daos`` is only used to additionally provide a DAOS install
+     prefix.
+* ``--enable-mdhim-mod``: enables compilation and use of Darshan's MDHIM module
+  (default=disabled)
+* ``--enable-ldms-mod``:  enables compilation and use of Darshan’s LDMS runtime
+  module (default=disabled)
+* ``--with-ldms=DIR``:
+  installation directory for LDMS
+
+  .. note::
+     * Users must use the configuration flags ``--enable-ldms-mod`` and
+       ``--with-ldms=DIR`` to enable runtime data collection via LDMS.
+     * To collect runtime I/O information from Darshan, you will need to
+       configure, initialize, and connect to an LDMS streams daemon. For
+       detailed instructions please visit
+       `Running An LDMS Streams Daemon for Darshan <https://ovis-hpc.readthedocs.io/projects/ldms/en/latest/streams/ldms-streams-apps.html#darshan>`_.
+     * If LDMS is not installed on the system, please visit “Getting the
+       Source” and “Building the Source” in the
+       `LDMS Quick Start Guide <https://ovis-hpc.readthedocs.io/projects/ldms/en/latest/intro/quick-start.html>`_.
+
+Environment preparation
+----------------------------------------
+
+Once darshan-runtime has been installed, you must prepare a location in which
+to store the Darshan log files and configure an instrumentation method.
+
+This step can be safely skipped if you configured darshan-runtime using the
+``--with-log-path-by-env`` option.  A more typical configuration uses a static
+directory hierarchy for Darshan log files.
+
+The ``darshan-mk-log-dirs.pl`` utility will configure the path specified at
+configure time to include subdirectories organized by year, month, and day in
+which log files will be placed. The deepest subdirectories will have sticky
+permissions to enable multiple users to write to the same directory.  If the
+log directory is shared system-wide across many users then the following script
+should be run as root.
+
+ .. code-block:: bash
+
+    darshan-mk-log-dirs.pl
+
+.. note::
+    **A note about finding log paths after installation** -
+    Regardless of whether a Darshan installation is using the ``--with-log-path`` or
+    ``--with-log-path-by-env`` option, end users can display the path (and/or
+    environment variables) at any time by running ``darshan-config --log-path``
+    on the command line.
+
+.. note::
+    **A note about log directory permissions** -
+    All log files written by Darshan have permissions set to only allow
+    read access by the owner of the file.  You can modify this behavior,
+    however, by specifying the --enable-group-readable-logs option at
+    configure time.  One notable deployment scenario would be to configure
+    Darshan and the log directories to allow all logs to be readable by both the
+    end user and a Darshan administrators group.   This can be done with the
+    following steps:
+
+    * set the --enable-group-readable-logs option at configure time
+    * create the log directories with darshan-mk-log-dirs.pl
+    * recursively set the group ownership of the log directories to the Darshan
+      administrators group
+    * recursively set the setgid bit on the log directories
+
+Spack installation
+==============================================================================
+
+You can also install Darshan via `Spack <https://spack.io/>`_ as an alternative
+to manual download, compilation, and installation.  This may be especially
+convenient for single-user installs.  Darshan is divided into two separate
+packages for the command line utilities and runtime instrumentation.  You can
+install either or both as follows:
+
+ .. code-block:: bash
+
+    spack install darshan-util
+    spack install darshan-runtime
+
+.. note::
+    Darshan will generally compile and install fine using a variety of
+    compilers, but we advise using a gcc compiler in Spack to compile Darshan
+    (regardless of what compiler you will use for your applications) to
+    ensure maximum runtime compatibility.
+
+You can use the ``spack info darshan-runtime`` query to view the full list of
+variants available for the darshan-runtime Spack package.  For example, adding
+a ``+slurm`` to the command line (``spack install darshan-runtime+slurm``) will
+cause Darshan to be compiled with support for gathering job ID information from
+the Slurm scheduler.
+
+The following commands will load the Darshan packages once they have been
+installed:
+
+ .. code-block:: bash
+
+    spack load -r darshan-util
+    spack load -r darshan-runtime
+
+
+Note that the spack install of darshan-runtime will use an environment variable
+named ``$DARSHAN_LOG_DIR_PATH`` to indicate where it should store log files.
+This variable is set to the user's home directory by default when the package
+is loaded, but it may be overridden.
+
+On Cray systems, you can also perform an additional step to load a
+Cray-specific module file. This will make a module called ``darshan`` available
+as described later in this document in the Cray platform recipe.  It enables
+automatic instrumentation when using the standard Cray compiler wrappers.
+
+ .. code-block:: bash
+
+    module use `spack location -i darshan-runtime`/share/craype-2.x/modulefiles
+
+Instrumenting applications
+==============================================================================
+
+.. note::
+    More specific installation "recipes" are provided later in this document
+    for some platforms.  This section of the documentation covers general
+    techniques.
+
+Once Darshan has been installed and a log path has been prepared, the next step
+is to actually instrument applications. The preferred method is to instrument
+applications at compile time.
+
+Option 1: Instrumenting MPI applications at compile time
+--------------------------------------------------------
+
+This method is applicable to C, Fortran, and C++ MPI applications (regardless
+of whether they are static or dynamically linked) and is the most
+straightforward method to apply transparently system-wide.  It works by
+injecting additional libraries and options into the linker command line to
+intercept relevant I/O calls.
+
+On Cray platforms you can enable the compile time instrumentation by simply
+loading the Darshan module.  It can then be enabled for all users by placing
+that module in the default environment. As of Darshan 3.2.0 this will
+instrument both static and dynamic executables, while in previous versions of
+Darshan this was only sufficient for static executables.  See the Cray
+installation recipe for more details.
+
+For other general MPICH-based MPI implementations, you can generate
+Darshan-enabled variants of the standard mpicc/mpicxx/mpif90/mpif77 wrappers
+using the following commands:
+
+ .. code-block:: bash
+
+    darshan-gen-cc.pl `which mpicc` --output mpicc.darshan
+    darshan-gen-cxx.pl `which mpicxx` --output mpicxx.darshan
+    darshan-gen-fortran.pl `which mpif77` --output mpif77.darshan
+    darshan-gen-fortran.pl `which mpif90` --output mpif90.darshan
+
+
+The resulting ``*.darshan`` wrappers will transparently inject Darshan
+instrumentation into the link step without any explicit user intervention.
+They can be renamed and placed in an appropriate PATH to enable automatic
+instrumentation.  This method also works correctly for both static and dynamic
+executables as of Darshan 3.2.0.
+
+For other systems you can enable compile-time instrumentation by either
+manually adding the appropriate link options to your command line or modifying
+your default MPI compiler script.  The ``darshan-config`` command line tool can
+be used to display the options that you should use:
+
+ .. code-block:: bash
+
+    # Linker options to use for dynamic linking (default on most platforms)
+    #   These arguments should go *before* the MPI libraries in the underlying
+    #   linker command line to ensure that Darshan can be activated.  It should
+    #   also ideally go before other libraries that may issue I/O function calls.
+    darshan-config --dyn-ld-flags
+
+    # linker options to use for static linking
+    #   The first set of arguments should go early in the link command line
+    #   (before MPI, while the second set should go at the end of the link command
+    #   line
+    darshan-config --pre-ld-flags
+    darshan-config --post-ld-flags
+
+.. _Sec Use Profile:
+
+Using a profile configuration
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The MPICH MPI implementation supports the specification of a profiling library
+configuration that can be used to insert Darshan instrumentation without
+modifying the existing MPI compiler script. You can enable a profiling
+configuration using environment variables or command line arguments to the
+compiler scripts:
+
+Example for MPICH 3.1.1 or newer:
+
+ .. code-block:: bash
+
+    export MPICC_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-cc
+    export MPICXX_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-cxx
+    export MPIFORT_PROFILE=$DARSHAN_PREFIX/share/mpi-profile/darshan-f
+
+
+Examples for command line use:
+
+ .. code-block:: bash
+
+    mpicc -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-c <args>
+    mpicxx -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-cxx <args>
+    mpif77 -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-f <args>
+    mpif90 -profile=$DARSHAN_PREFIX/share/mpi-profile/darshan-f <args>
+
+
+Note that unlike the previously described methods in this section, this method
+*will not* automatically adapt to static and dynamic linking options.  The
+example profile configurations show above only support dynamic linking.
+
+Example profile configurations are also provided with a "-static" suffix if you
+need examples for static linking.
+
+Option 2: Instrumenting MPI applications at runtime
+--------------------------------------------------------
+
+This method is applicable to pre-compiled dynamically linked executables as
+well as interpreted languages such as Python.  You do not need to change your
+compile options in any way.  This method works by injecting instrumentation at
+runtime.  It will not work for statically linked executables.
+
+To use this mechanism, set the ``LD_PRELOAD`` environment variable to the full
+path to the Darshan shared library. The preferred method of inserting Darshan
+instrumentation in this case is to set the ``LD_PRELOAD`` variable specifically
+for the application of interest. Typically this is possible using command line
+arguments offered by the ``mpirun`` or ``mpiexec`` scripts or by the job
+scheduler:
+
+ .. code-block:: bash
+
+    mpiexec -n 4 -env LD_PRELOAD /home/carns/darshan-install/lib/libdarshan.so mpi-io-test
+
+
+ .. code-block:: bash
+
+    srun -n 4 --export=LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so mpi-io-test
+
+
+For sequential invocations of MPI programs, the following will set
+``LD_PRELOAD`` for process duration only:
+
+ .. code-block:: bash
+
+    env LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so mpi-io-test
+
+
+Other environments may have other specific options for controlling this
+behavior.  Please check your local site documentation for details.
+
+It is also possible to just export ``LD_PRELOAD`` as follows, but it is
+recommended against doing that to prevent Darshan and MPI symbols from being
+pulled into unrelated binaries:
+
+ .. code-block:: bash
+
+    export LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so
+
+
+.. note::
+    For SGI systems running the MPT environment, it may be necessary to set the
+    ``MPI_SHEPHERD`` environment variable equal to ``true`` to avoid deadlock
+    when preloading the Darshan shared library.
+
+Option 3: Instrumenting non-MPI applications at runtime
+--------------------------------------------------------
+
+Similar to the process described in the previous section, Darshan relies on the
+``LD_PRELOAD`` mechanism for instrumenting dynamically-linked non-MPI
+applications.  This allows Darshan to instrument dynamically-linked binaries
+produced by non-MPI compilers (e.g., gcc or clang), extending Darshan
+instrumentation to new contexts (like instrumentation of arbitrary Python
+programs or instrumenting serial file transfer utilities like ``cp`` and
+``scp``).
+
+The only additional step required of Darshan non-MPI users is to also set the
+``DARSHAN_ENABLE_NONMPI`` environment variable to signal to Darshan that
+non-MPI instrumentation is requested:
+
+ .. code-block:: bash
+
+    export DARSHAN_ENABLE_NONMPI=1
+
+
+As described in the previous section, it may be desirable to users to limit the
+scope of Darshan's instrumentation by only enabling ``LD_PRELOAD`` on the
+target executable:
+
+ .. code-block:: bash
+
+    env LD_PRELOAD=/home/carns/darshan-install/lib/libdarshan.so io-test
+
+
+.. note::
+    Recall that Darshan instrumentation of non-MPI applications is only
+    possible with dynamically-linked applications.
+
+Using other profiling tools at the same time as Darshan
+--------------------------------------------------------
+
+As of Darshan version 3.2.0, Darshan does not necessarily interfere with other
+profiling tools (particularly those using the PMPI profiling interface).
+Darshan itself does not use the PMPI interface, and instead uses dynamic linker
+symbol interception or --wrap function interception for static executables.
+
+As a rule of thumb most profiling tools should appear in the linker command
+line *before* -ldarshan if possible.
+
+Using the Darshan eXtended Tracing (DXT) module
+==============================================================================
+
+Darshan's DXT module provides full tracing of MPI-IO and POSIX read/write APIs.
+While the DXT module is able to capture finer-grained details compared to
+traditional Darshan instrumentation, it may exhibit higher runtime and memory
+overheads.  For this reason, DXT support is disabled by default in Darshan, but
+users can opt-in to DXT instrumentation at runtime by setting their environment
+as follows:
+
+ .. code-block:: bash
+
+    export DXT_ENABLE_IO_TRACE=1
+
+
+DXT will trace each I/O operation to files instrumented by Darshan's MPI-IO and
+POSIX modules, using a default memory limit of 2 MiB for each module (DXT_POSIX
+and DXT_MPIIO). Memory usage and a number of other aspects of DXT tracing can
+be configured as described in section :ref:`Sec Conf Runtime`.
+
+Using AutoPerf instrumentation modules
+==============================================================================
+
+AutoPerf offers two additional Darshan instrumentation modules that may be
+enabled for MPI applications.
+
+* APMPI: Instrumentation of over 70 MPI-3 communication routines, providing
+  operation counts, datatype sizes, and timing information for each application
+  MPI rank.
+* APXC: Instrumentation of Cray XC environments to provide network and compute
+  counters of interest, via PAPI.
+
+Users can request Darshan to build the APMPI and APXC modules by passing
+``--enable-apmpi-mod`` and ``--enable-apxc-mod`` options to configure,
+respectively. Note that these options can be requested independently (i.e., you
+can build Darshan with APMPI support but not APXC support, and vice versa).
+
+The only prerequisite for the APMPI module is that Darshan be configured with a
+MPI-3 compliant compiler. For APXC, the user must obviously be using a Cray XC
+system and must make the PAPI interface available to Darshan (i.e., by running
+``module load papi``, before building Darshan).
+
+If using the APMPI module, users can additionally specify the
+``--enable-apmpi-coll-sync`` configure option to force Darshan to synchronize
+before calling underlying MPI routines and to capture additional timing
+information on how synchronized processes are. Users should note this option
+will impose additional overheads, but can be useful to help diagnose whether
+applications are spending a lot of time synchronizing as part of collective
+communication calls. For this reason, we do not recommend users setting this
+particular option for production Darshan deployments.
+
+.. note::
+    The AutoPerf instrumentation modules are provided as Git submodules to
+    Darshan's main repository, so if building Darshan source that has been
+    cloned from Git, it is necessary to first retrieve the AutoPerf submodules
+    by running the following command:
+
+    .. code-block:: bash
+
+        git submodule update --init
+
+
+.. _Sec Conf Runtime:
+
+Configuring Darshan library at runtime
+==============================================================================
+
+To fine tune Darshan library settings (e.g., internal memory usage,
+instrumentation scope, etc.), Darshan provides a couple of mechanisms:
+
+* user environment variable overrides
+* a configuration file, which users must specify the path to using the
+  ``DARSHAN_CONFIG_PATH`` environment variable
+
+For settings that are specified via a config file and via an environment
+variable, the environment settings will take precedence.
+
+.. note::
+    Users of facility-provided Darshan installs should be mindful that these
+    installs could define their own default Darshan config file. In this case,
+    users should double check that ``DARSHAN_CONFIG_PATH`` environment variable
+    is not already set, and if it is, users should consider copying the default
+    config file as a starting point before applying their own settings.
+
+Darshan library config settings
+----------------------------------------
+
+The Darshan library honors the following settings to modify behavior at
+runtime:
+
+**Table 1. Darshan library config settings**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20, 10, 70
+   :class: longtable
+   :align: left
+
+   * - environment variable setting
+     - config file setting
+     - description
+   * - DARSHAN_DISABLE=1
+     - N/A
+     - Disables Darshan instrumentation.
+   * - DARSHAN_ENABLE_NONMPI=1
+     - N/A
+     - Enables Darshan's non-MPI mode, required for applications that do not
+
+       call MPI_Init and MPI_Finalize.
+   * - DARSHAN_CONFIG_PATH=<path>
+     - N/A
+     - Specifies the path to a Darshan config file to load settings from.
+   * - DARSHAN_DUMP_CONFIG=1
+     - DUMP_CONFIG
+     - Prints the Darshan configuration to stderr at runtime.
+   * - DARSHAN_DISABLE_SHARED_REDUCTION=1
+     - DISABLE_SHARED_REDUCTION
+     - Disables the step in Darshan aggregation in which files that were
+
+       accessed by all ranks are collapsed into a single cumulative file record
+
+       at rank 0. This option retains more per-process information at the
+
+       expense of creating larger log files.
+   * - DARSHAN_INTERNAL_TIMING=1
+     - INTERNAL_TIMING
+     - Enables internal instrumentation that will print the time required to
+
+       startup and shutdown Darshan to stderr at runtime.
+   * - DARSHAN_MODMEM=<val>
+     - MODMEM <val>
+     - Specifies the amount of memory (in MiB) Darshan instrumentation modules
+
+       can collectively consume (if not specified, a default 4 MiB quota is
+
+       used). Overrides any ``--with-mod-mem`` configure argument.
+   * - DARSHAN_NAMEMEM=<val>
+     - NAMEMEM <val>
+     - Specifies the amount of memory (in MiB) Darshan can consume for storing
+
+       record names (if not specified, a default 1 MiB quota is used).
+
+       Overrides any ``--with-name-mem`` configure argument.
+   * - DARSHAN_MEMALIGN=<val>
+     - MEMALIGN <val>
+     - Specifies a value for system memory alignment. Overrides any
+
+       ``--with-mem-align`` configure argument (default is 8 bytes).
+   * - DARSHAN_JOBID=<string>
+     - JOBID <string>
+     - Specifies the name of the environment variable to use for the job
+
+       identifier, such as PBS_JOBID. Overrides ``--with-jobid-env`` configure
+
+       argument.
+   * - DARSHAN_LOGHINTS=<string>
+     - LOGHINTS <string>
+     - Specifies the MPI-IO hints to use when storing the Darshan output file.
+
+       The format is a semicolon-delimited list of key=value pairs, for
+
+       example: hint1=value1;hint2=value2. Overrides any ``--with-log-hints``
+
+       configure argument.
+   * - DARSHAN_LOGPATH=<path>
+     - LOGPATH <path>
+     - Specifies the path to write Darshan log files to. Note that this
+
+       directory needs to be formatted using the darshan-mk-log-dirs script.
+
+       Overrides any ``--with-log-path`` configure argument.
+   * - DARSHAN_MMAP_LOGPATH=<path>
+     - MMAP_LOGPATH <path>
+     - If Darshan's mmap log file mechanism is enabled, this variable specifies
+
+       what path the mmap log files should be stored in (if not specified, log
+
+       files will be stored in ``/tmp``).
+   * - DARSHAN_LOGFILE=<path>
+     - N/A
+     - Specifies the path (directory + Darshan log file name) to write the
+
+       output Darshan log to. This overrides the default Darshan behavior of
+
+       automatically generating a log file name and adding it to a log file
+
+       directory formatted using darshan-mk-log-dirs script.
+   * - DARSHAN_MOD_DISABLE=<mod_csv>
+     - MOD_DISABLE <mod_csv>
+     - Specifies a list of comma-separated Darshan module names to disable at
+
+       runtime.
+   * - DARSHAN_MOD_ENABLE=<mod_csv>
+     - MOD_ENABLE <mod_csv>
+     - Specifies a list of comma-separated Darshan module names to enable at
+
+       runtime.
+   * - DARSHAN_APP_EXCLUDE=<regex_csv>
+     - APP_EXCLUDE <regex_csv>
+     - Specifies a list of comma-separated regexes that match application names
+
+       that should not be instrumented. This is useful if Darshan is
+
+       ``LD_PRELOAD``, in which case logs may be generated for many unintended
+
+       applications.
+   * - DARSHAN_APP_INCLUDE=<regex_csv>
+     - APP_INCLUDE <regex_csv>
+     - Specifies a list of comma-separated regexes that match application names
+
+       that should be instrumented. This setting is used to override any
+
+       APP_INCLUDE rules.
+   * - DARSHAN_RANK_EXCLUDE=<rank_csv>
+     - RANK_EXCLUDE <rank_csv>
+     - Specifies a list of comma-separated ranks (or rank ranges) that should
+
+       not be instrumented. Rank ranges are formatted like "start:end" (if
+
+       start or end are not specified, the first or last rank is assumed,
+
+       respectively). Note that the Darshan library will still run on all
+
+       processes of an application, this setting just controls whether specific
+
+       ranks are capturing instrumentation data.
+   * - DARSHAN_RANK_INCLUDE=<rank_csv>
+     - RANK_INCLUDE <rank_csv>
+     - Specifies a list of comma-separated ranks (or rank ranges) that should
+
+       be instrumented. This setting is used to override any RANK_INCLUDE
+
+       rules.
+   * - DARSHAN_DXT_SMALL_IO_TRIGGER=<val>
+     - DXT_SMALL_IO_TRIGGER <val>
+     - Specifies a floating point percentage (i.e., ".8" would be 80%)
+
+       indicating a threshold of small I/O operation accesses (defined as
+
+       accesses smaller than 10 KiB), with DXT trace data being discarded for
+
+       files that exhibit  a percentage of small I/O operations less than this
+
+       threshold.
+   * - DARSHAN_DXT_UNALIGNED_IO_TRIGGER=<val>
+     - DXT_UNALIGNED_IO_TRIGGER <val>
+     - Specifies a floating point percentage (i.e., ".8" would be 80%)
+
+       indicating a threshold of unaligned I/O operation accesses (defined as
+
+       accesses not aligned to the file alignment value determined by Darshan),
+
+       with DXT trace data being discarded for files that exhibit a percentage
+
+       of unaligned I/O operations less than this threshold.
+   * - N/A
+     - MAX_RECORDS <val> <mod_csv>
+     - Specifies the number of records to pre-allocate for each instrumentation
+
+       module given in a comma-separated list.  Most modules default to tracing
+
+       1024 file records per-process.
+   * - N/A
+     - NAME_EXCLUDE <regex_csv> <mod_csv>
+     - Specifies a list of comma-separated regexes that match record names that
+
+       should not be instrumented for instrumentation modules given in a
+
+       comma-separated module list.
+   * - N/A
+     - NAME_INCLUDE <regex_csv> <mod_csv>
+     - Specifies a list of comma-separated regexes that match record names that
+
+       should be instrumented for instrumentation modules given in a
+
+       comma-separated module list. This setting is used to override any
+
+       NAME_EXCLUDE rules.
+   * - DXT_ENABLE_IO_TRACE=1
+     - N/A
+     - (DEPRECATED) Setting this environment variable enables the DXT (Darshan
+
+       eXtended Tracing) modules at runtime for all files instrumented by
+
+       Darshan. Replaced by MODULE_ENABLE setting.
+   * - DARSHAN_EXCLUDE_DIRS=<path_csv>
+     - N/A
+     - (DEPRECATED) Specifies a list of comma-separated paths that Darshan will
+
+       not instrument at runtime (in addition to Darshan's default exclusion
+
+       list). Replaced by NAME_EXCLUDE setting.
+   * - DARSHAN_LDMS_ENABLE=
+     - N/A
+     - Switch to initialize LDMS. If not set, no runtime I/O data will be
+
+       collected. This only needs to be exported (i.e. setting to a
+
+       value/string is optional).
+   * - DARSHAN_LDMS_ENABLE_<mod_name>=
+     - N/A
+     - Specifies the module data that will be collected during runtime using
+
+       LDMS streams API. These only need to be exported (i.e.  setting to a
+
+       value/string is optional).
+
+.. note::
+ - Config file settings must be specified one per-line, with settings and
+   their parameters separated by any whitespace.
+ - Settings that take a comma-separated list of modules can use "*" as a
+   wildcard to represent all modules.
+ - Some config file settings (specifically, ``MOD_DISABLE``/``ENABLE``,
+   ``APP_EXCLUDE``/``INCLUDE``, ``RANK_EXCLUDE``/``INCLUDE``,
+   ``NAME_EXCLUDE``/``INCLUDE``, and ``MAX_RECORDS``) may be repeated multiple
+   times rather than providing comma-separated values, to ease readability.
+ - Improperly formatted config settings are ignored, with Darshan falling
+   back to its default configuration.
+ - All settings that take regular expressions as input expect them to be
+   formatted according to the POSIX ``regex.h`` interface -- refer to the
+   `regex.h manpage <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/regex.h.html>`_
+   for more details on regex syntax.
+
+
+Example Darshan configuration
+----------------------------------------
+
+An example configuration file with annotations is given below (note that
+comments are allowed by prefixing a line with ``#``):
+
+ .. code-block:: bash
+
+    # enable DXT modules, which are off by default
+    MOD_ENABLE      DXT_POSIX,DXT_MPIIO
+
+    # allocate 4096 file records for POSIX and MPI-IO modules
+    # (darshan only allocates 1024 per-module by default)
+    MAX_RECORDS     4096      POSIX,MPI-IO
+
+    # the '*' specifier can be used to apply settings for all modules
+    # in this case, we want all modules to ignore record names
+    # prefixed with "/home" (i.e., stored in our home directory),
+    # with a superseding inclusion for files with a ".out" suffix)
+    NAME_EXCLUDE    ^/home        *
+    NAME_INCLUDE    .out$         *
+
+    # bump up Darshan's default memory usage to 8 MiB
+    MODMEM  8
+
+    # avoid generating logs for git and ls binaries
+    APP_EXCLUDE     git,ls
+
+    # exclude instrumentation for all ranks first
+    RANK_EXCLUDE    0:
+    # then selectively re-include ranks 0-3 and 12:15
+    RANK_INCLUDE    0:3
+    RANK_INCLUDE    12:15
+
+    # only retain DXT traces for files that were accessed
+    # using small I/O ops 20+% of the time
+    DXT_SMALL_IO_TRIGGER    .2
+
+
+This configuration could be similarly set using environment variables, though
+note that both ``MAX_RECORDS`` and ``NAME_EXCLUDE``/``INCLUDE`` settings do not
+have environment variable counterparts:
+
+ .. code-block:: bash
+
+    export DARSHAN_MOD_ENABLE="DXT_POSIX,DXT_MPIIO"
+    export DARSHAN_MODMEM=8
+    export DARSHAN_APP_EXCLUDE="git,ls"
+    export DARSHAN_RANK_EXCLUDE="0:"
+    export DARSHAN_RANK_INCLUDE="0:3,12:15"
+    export DARSHAN_DXT_SMALL_IO_TRIGGER=.2
+
+
+Darshan installation recipes
+==============================================================================
+
+The following recipes provide examples for prominent HPC systems.  These are
+intended to be used as a starting point.  You will most likely have to adjust
+paths and options to reflect the specifics of your system.
+
+Cray platforms (XE, XC, or similar)
+----------------------------------------
+
+This section describes how to compile and install Darshan, as well as how to
+use a software module to enable and disable Darshan instrumentation on Cray
+systems.
+
+Building and installing Darshan
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please set your environment to use the GNU programming environment before
+configuring or compiling Darshan.  Although Darshan can be built with a variety
+of compilers, the GNU compiler is recommended because it will produce a Darshan
+library that is interoperable with the widest range of compilers and linkers.
+On most Cray systems you can enable the GNU programming environment with a
+command similar to ``module swap PrgEnv-intel PrgEnv-gnu``.  Please see your
+site documentation for information about how to switch programming
+environments.
+
+The following example shows how to configure and build Darshan on a Cray system
+using the GNU programming environment.  Adjust the ``--with-log-path`` and
+``--prefix`` arguments to point to the desired log file path and installation
+path, respectively.
+
+ .. code-block:: bash
+
+    module swap PrgEnv-pgi PrgEnv-gnu
+    ./configure \
+        --with-log-path=/shared-file-system/darshan-logs \
+        --prefix=/soft/darshan-3.3.0 \
+        --with-jobid-env=SLURM_JOBID \
+        --with-username-env=SLURM_JOB_USER \
+        CC=cc
+    make install
+    module swap PrgEnv-gnu PrgEnv-pgi
+
+
+Rationale
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+.. note::
+    The job ID is set to ``SLURM_JOBID`` for use with a Slurm based scheduler.
+    An additional environment variable for querying a job's username
+    (``SLURM_JOB_USER``) is provided as a fallback in case the default
+    environment variable ``LOGNAME`` is not properly set (e.g., as is the case
+    when using Slurm's ``--export`` option to ``srun``).  The ``CC`` variable
+    is configured to point the standard MPI compiler.
+
+If instrumentation of the HDF5 library is desired, additionally load an
+acceptable HDF5 module (e.g., ``module load cray-hdf5-parallel``) prior to
+building and use the ``--enable-hdf5-mod`` configure argument.  We additionally
+recommend that you modify Darshan's generated Cray software module to include a
+dependency on the HDF5 software module used -- this is necessary to ensure
+Darshan library dependencies are satisfied at application link and run time.
+
+ .. code-block:: bash
+
+    prereq cray-hdf5-parallel
+
+
+Note that the Darshan-enabled Cray compiler wrappers will always prefer
+user-supplied HDF5 libraries over the library used to build Darshan.  However,
+due to ABI changes in the HDF5 library, the two HDF5 libraries used must be
+compatible. Specifically, the HDF5 library versions need to be either both
+greater than or equal to 1.10 or both less than 1.10. If users use an HDF5
+version that is incompatible with Darshan, either link or runtime errors will
+occur and the user will have to  switch HDF5 versions or unload the Darshan
+module.
+
+Optional RDTSCP timers for Theta
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.. note::
+    Darshan's default mechanism (``clock_gettime()``) for retrieving timing
+    information may introduce more overhead than expected for statically linked
+    executables on some platforms.  The Theta system at the ALCF (as of July
+    2021) is a notable example.  It uses static linking by default (which
+    prevents the use of the standard vDSO optimization for ``clock_gettime()``
+    calls), and it's CPU architecture exhibits relatively high system call
+    overhead. For Theta and other similar platforms you can explicitly request
+    that Darshan use the ``RDTSCP`` instruction in place of ``clock_gettime()``
+    for timing purposes.  ``RDTSCP`` is a non-portable, Intel-specific
+    instruction.  It must be enabled explicitly at configure time, and the base
+    clock frequency of the compute node CPU must be specified.
+
+    This mechanism can be activated on Theta by adding the
+    ``--enable-rdtscp=1300000000`` to the configure command line (the KNL CPUs
+    on Theta have a base frequency of 1.3 GHz).
+
+    Note that timer overhead is unlikely to be a factor in overall performance
+    unless the application has an edge case workload with frequent sequential
+    I/O operations, such as small I/O accesses to cached data on a single
+    process.
+
+As in any Darshan installation, the darshan-mk-log-dirs.pl script can then be
+used to create the appropriate directory hierarchy for storing Darshan log
+files in the ``--with-log-path`` directory.
+
+Note that Darshan is not currently capable of detecting the stripe size (and
+therefore the Darshan FILE_ALIGNMENT value) on Lustre file systems.  If a
+Lustre file system is detected, then Darshan assumes an optimal file alignment
+of 1 MiB.
+
+Enabling Darshan instrumentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Darshan will automatically install example software module files in the
+following locations (depending on how you specified the --prefix option in the
+previous section):
+
+ .. code-block:: bash
+
+    /soft/darshan-3.3.0/share/craype-1.x/modulefiles/darshan
+    /soft/darshan-3.3.0/share/craype-2.x/modulefiles/darshan
+
+
+Select the one that is appropriate for your Cray programming environment (see
+the version number of the craype module in ``module list``).
+
+If you are using the Cray Programming Environment version 1.x, then you must
+modify the corresponding modulefile before using it.  Please see the comments
+at the end of the file and choose an environment variable method that is
+appropriate for your system.  If this is not done, then the compiler may fail
+to link some applications when the Darshan module is loaded.
+
+If you are using the Cray Programming Environment version 2.x then you can
+likely use the modulefile as is.  Note that it pulls most of its configuration
+from the lib/pkgconfig/darshan-runtime.pc file installed with Darshan.
+
+The modulefile that you select can be copied to a system location, or the
+install location can be added to your local module path with the following
+command:
+
+ .. code-block:: bash
+
+    module use /soft/darshan-3.3.0/share/craype-<VERSION>/modulefiles
+
+
+From this point, Darshan instrumentation can be enabled for all future
+application compilations by running "module load darshan".
+
+Linux clusters using MPICH
+----------------------------------------
+
+Most MPICH installations produce dynamic executables by default. To configure
+Darshan in this environment you can use the following example. We recommend
+using mpicc with GNU compilers to compile Darshan.
+
+ .. code-block:: bash
+
+    ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc
+
+
+The ``darshan-gen-*`` scripts described earlier in this document can be used to
+create variants of the standard mpicc/mpicxx/mpif77/mpif90 scripts that are
+Darshan enabled.  These scripts will work correctly for both dynamic and
+statically linked executables.
+
+Linux clusters using Intel MPI
+----------------------------------------
+
+Most Intel MPI installations produce dynamic executables by default.  To
+configure Darshan in this environment you can use the following example:
+
+.. code-block:: bash
+
+   ./configure --with-log-path=/darshan-logs --with-jobid-env=PBS_JOBID CC=mpicc
+
+
+Rationale
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. note::
+    There is nothing unusual in this configuration except that you should use
+    the underlying GNU compilers rather than the Intel ICC compilers to compile
+    Darshan itself.
+
+You can enable Darshan instrumentation at compile time by adding
+``darshan-config --dyn-ld-flags`` options to your linker command line.
+
+Alternatively you can use ``LD_PRELOAD`` runtime instrumentation method to
+instrument executables that have already been compiled.
+
+Linux clusters using Open MPI
+----------------------------------------
+
+Follow the generic instructions provided at the top of this document for
+compilation, and make sure that the ``CC`` used for compilation is based on a
+GNU compiler.
+
+You can enable Darshan instrumentation at compile time by adding
+``darshan-config --dyn-ld-flags`` options to your linker command line.
+
+Alternatively you can use ``LD_PRELOAD`` runtime instrumentation method to
+instrument executables that have already been compiled.
+
+Debugging
+==============================================================================
+
+No log file
+----------------------------------------
+
+In cases where Darshan is not generating a log file for an application, some
+common things to check are:
+
+* Make sure you are looking in the correct place for logs. Confirm the
+  location with the ``darshan-config --log-path`` command.
+
+* Check stderr to ensure Darshan isn't indicating any internal errors (e.g.,
+  invalid log file path)
+
+For statically linked executables:
+
+* Ensure that Darshan symbols are present in the underlying executable by
+  running ``nm`` on it:
+
+  .. code-block:: bash
+
+     > nm test | grep darshan
+     0000000000772260 b darshan_core
+     0000000000404440 t darshan_core_cleanup
+     00000000004049b0 T darshan_core_initialize
+     000000000076b660 d darshan_core_mutex
+     00000000004070a0 T darshan_core_register_module
+
+For dynamically linked executables:
+
+* Ensure that the Darshan library is present in the list of shared libraries
+  to be used by the application, and that it appears before the MPI library:
+
+ .. code-block:: bash
+
+    > ldd mpi-io-test
+            linux-vdso.so.1 (0x00007ffd83925000)
+            libdarshan.so => /home/carns/working/install/lib/libdarshan.so (0x00007f0f4a7a6000)
+            libmpi.so.12 => /home/carns/working/src/spack/opt/spack/linux-ubuntu19.10-skylake/gcc-9.2.1/mpich-3.3.2-h3dybprufq7i5kt4hcyfoyihnrnbaogk/lib/libmpi.so.12 (0x00007f0f4a44f000)
+            libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f0f4a241000)
+            ...
+
+
+General:
+
+* Ensure that the linker is correctly linking in Darshan's runtime libraries:
+
+  - A common mistake is to explicitly link in the underlying MPI libraries
+    (e.g., ``-lmpich`` or ``-lmpichf90``) in the link command, which can
+    interfere with Darshan's instrumentation
+
+    * These libraries are usually linked in automatically by the compiler
+
+    * MPICH's ``mpicc`` compiler's ``-show`` flag can be used to examine the
+      invoked link command, for instance
+
+  - The linker's ``-y`` option can be used to verify that Darshan is properly
+    intercepting MPI_Init function (e.g. by setting
+    ``CFLAGS='-Wl,-yMPI_Init'``), which it uses to initialize its runtime
+    structures
+
+  .. code-block:: bash
+
+     /usr/common/software/darshan/3.0.0-pre3/lib/libdarshan.a(darshan-core-init-finalize.o): definition of MPI_Init
+
diff --git a/darshan-test/RELEASE-CHECKLIST.txt b/darshan-test/RELEASE-CHECKLIST.txt
index 86fc62cbc..e2cdc209b 100644
--- a/darshan-test/RELEASE-CHECKLIST.txt
+++ b/darshan-test/RELEASE-CHECKLIST.txt
@@ -45,6 +45,9 @@ Notes on how to release a new version of Darshan
 12) generate web documentation from asciidoc by running make in
     darshan-util/doc/ and darshan-runtime/doc/ directories
     - upload .html files to CELS server (/nfs/pub_html/gce/projects/darshan/docs)
+    - This step is only applicable to release of 3.4.7 and prior. Documents of
+      later releases have been migrated to readthedocs.io, which are
+      automatically generated every time a new commit is pushed.
 13) update download page on darshan web site: www.mcs.anl.gov/darshan/download
 14) update darshan-runtime and darshan-util Spack packages with new release
     - consider whether new variants should be exposed based on new features
diff --git a/darshan-util/doc/darshan-util.rst b/darshan-util/doc/darshan-util.rst
new file mode 100644
index 000000000..2ed0bc347
--- /dev/null
+++ b/darshan-util/doc/darshan-util.rst
@@ -0,0 +1,1443 @@
+###################################
+Darshan-util installation and usage
+###################################
+
+**********************************
+Introduction
+**********************************
+
+This document describes darshan-util, a collection of tools for parsing and
+summarizing log files produced by Darshan instrumentation.  The darshan-util
+package can be installed and used on any system regardless of where the logs
+were originally generated.  Darshan log files are platform-independent.
+
+More information about Darshan can be found at the
+`Darshan web site <http://www.mcs.anl.gov/darshan>`_.
+
+**********************************
+Requirements
+**********************************
+
+Darshan-util has only been tested in Linux environments, but will likely
+work in other Unix-like environments as well.
+
+**Hard requirements**
+
+* C compiler
+* zlib development headers and library (zlib-dev or similar)
+
+**Optional requirements**
+
+* libbz2 development headers and library (libbz2-dev or similar)
+* Perl
+* pdflatex
+* gnuplot 4.2 or later
+* epstopdf
+
+**********************************
+Compilation and installation
+**********************************
+
+**Configure and build example**
+
+.. code-block:: bash
+
+   tar -xvzf darshan-<version-number>.tar.gz
+   cd darshan-<version-number>/
+   ./prepare.sh
+   cd darshan-util/
+   ./configure
+   make
+   make install
+
+The darshan-util package is intended to be used on a login node or workstation.
+For most use cases this means that you should either leave ``CC`` to its
+default setting or specify a local compiler.  This is in contrast to the
+darshan-runtime documentation, which suggests setting ``CC`` to mpicc because
+the runtime library will be used in the compute node environment.
+
+You can specify ``--prefix`` to install darshan-util in a specific location
+(such as in your home directory for non-root installations).  See ``./configure
+--help`` for additional optional arguments, including how to specify
+alternative paths for zlib and libbz2 development libraries.  darshan-util also
+supports VPATH or "out-of-tree" builds if you prefer that method of
+compilation.
+
+The ``--enable-shared`` argument to configure can be used to enable compilation
+of a shared version of the darshan-util library.
+
+The ``--enable-apmpi-mod`` and ``--enable-apxc-mod`` configure arguments must
+be specified to build darshan-util with support for AutoPerf APMPI and APXC
+modules, respectively.
+
+.. note::
+   AutoPerf log analysis code is provided as Git submodules to Darshan's main
+   repository, so if building Darshan source that has been cloned from Git, it
+   is necessary to first retrieve the AutoPerf submodules by running the
+   following command:
+
+   .. code-block:: bash
+
+      git submodule update --init
+
+
+**********************************
+Analyzing log files
+**********************************
+
+Each time a darshan-instrumented application is executed, it will generate a
+single log file summarizing the I/O activity from that application.  See the
+darshan-runtime documentation for more details, but the log file for a given
+application will likely be found in a centralized directory, with the path and
+log file name in the following format:
+
+.. code-block:: bash
+
+   <YEAR>/<MONTH>/<DAY>/<USERNAME>_<BINARY_NAME>_<JOB_ID>_<DATE>_<UNIQUE_ID>_<TIMING>.darshan
+
+
+This is a binary format file that summarizes I/O activity. As of version 2.0.0
+of Darshan, this file is portable and does not have to be analyzed on the same
+system that executed the job. Also, note that Darshan logs generated with
+Darshan versions preceding version 3.0 will have the extension ``darshan.gz``
+(or ``darshan.bz2`` if compressed using bzip2 format). These logs are not
+compatible with Darshan 3.0 utilities, and thus must be analyzed using an
+appropriate version (2.x) of the darshan-util package.
+
+darshan-job-summary.pl
+=======================================
+
+You can generate a graphical summary of the I/O activity for a job by using the
+``darshan-job-summary.pl`` graphical summary tool as in the following example:
+
+.. code-block:: bash
+
+   darshan-job-summary.pl carns_my-app_id114525_7-27-58921_19.darshan.gz
+
+This utility requires Perl, pdflatex, epstopdf, and gnuplot in order to
+generate its summary.  By default, the output is written to a multi-page pdf
+file based on the name of the input file (in this case it would produce a
+``carns_my-app_id114525_7-27-58921_19.pdf`` output file).  You can also
+manually specify the name of the output file using the ``--output`` argument.
+
+An example of the output produced by ``darshan-job-summary.pl`` can be found
+`HERE <http://www.mcs.anl.gov/research/projects/darshan/docs/ssnyder_ior-hdf5_id3655016_9-23-29011-12333993518351519212_1.darshan.pdf>`_.
+
+.. note::
+   The darshan-job-summary tool depends on a few LaTeX packages that may not be
+   available by default on all systems, including: lastpage, subfigure, and
+   threeparttable. These packages can be found and installed using your
+   system's package manager. For instance, the packages can be installed on
+   Debian or Ubuntu systems as follows: ``apt-get install texlive-latex-extra``
+
+darshan-summary-per-file.sh
+=======================================
+
+This utility is similar to darshan-job-summary.pl, except that it produces a
+separate pdf summary for every file accessed by an application.  It can be
+executed as follows:
+
+.. code-block:: bash
+
+   darshan-summary-per-file.sh carns_my-app_id114525_7-27-58921_19.darshan.gz output-dir
+
+The second argument is the name of a directory (to be created) that will
+contain the collection of pdf files.  Note that this utility probably is not
+appropriate if your application opens a large number of files.
+
+You can produce a summary for a specific file of interest with the following
+commands:
+
+.. code-block:: bash
+
+   darshan-convert --file HASH carns_my-app_id114525_7-27-58921_19.darshan.gz interesting_file.darshan.gz
+   darshan-job-summary.pl interesting_file.darshan.gz
+
+The "HASH" argument is the hash of a file name as reported by darshan-parser.
+The ``interesting_file.darshan.gz`` file produced by darshan-convert is like a
+normal Darshan log file, but it will only contain instrumentation for the
+specified file.
+
+darshan-parser
+=======================================
+
+You can use the ``darshan-parser`` command line utility to obtain a complete,
+human-readable, text-format dump of all information contained in a log file.
+The following example converts the contents of the log file into a fully
+expanded text file:
+
+.. code-block:: bash
+
+   darshan-parser carns_my-app_id114525_7-27-58921_19.darshan.gz > ~/job-characterization.txt
+
+The format of this output is described in the following section.
+
+Guide to darshan-parser output
+=======================================
+
+The beginning of the output from darshan-parser displays a summary of overall
+information about the job. Additional job-level summary information can also be
+produced using the ``--perf``, ``--file``, or ``--total`` command line options.
+See Section :ref:`Sec Additional summary output` for more information about
+those options.
+
+The following table defines the meaning of each line in the default header
+section of the output:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - output line
+     - description
+   * - "# darshan log version"
+     - internal version number of the Darshan log file
+   * - "# exe"
+     - name of the executable that generated the log file
+   * - "# uid"
+     - user id that the job ran as
+   * - "# jobid"
+     - job id from the scheduler
+   * - "# start_time"
+     - start time of the job, in seconds since the epoch
+   * - "# start_time_asci"
+     - start time of the job, in human readable format
+   * - "# end_time"
+     - end time of the job, in seconds since the epoch
+   * - "# end_time_asci"
+     - end time of the job, in human readable format
+   * - "# nprocs"
+     - number of MPI processes
+   * - "# run time"
+     - run time of the job in seconds
+
+Log file region sizes
+-------------------------------------
+
+The next portion of the parser output displays the size of each region
+contained within the given log file. Each log file will contain the following
+regions:
+
+* header - constant-sized uncompressed header providing data on how to properly
+  access the log
+* job data - job-level metadata (e.g., start/end time and exe name) for the log
+* record table - a table mapping Darshan record identifiers to full file name paths
+* module data - each module (e.g., POSIX, MPI-IO, etc.) stores their I/O
+  characterization data in distinct regions of the log
+
+All regions of the log file are compressed (in libz or bzip2 format), except
+the header.
+
+Table of mounted file systems
+-------------------------------------
+
+The next portion of the output shows a table of all general purpose file
+systems that were mounted while the job was running. Each line uses the
+following format:
+
+.. code-block:: bash
+
+   <mount point> <fs type>
+
+Format of I/O characterization fields
+-------------------------------------
+
+The remainder of the output will show characteristics for each file that was
+opened by the application. Each line uses the following format:
+
+.. code-block:: bash
+
+   <module> <rank> <record id> <counter name> <counter value> <file name> <mount point> <fs type>
+
+The ``<module>`` column specifies the module responsible for recording this
+piece of I/O characterization data. The ``<rank>`` column indicates the rank of
+the process that opened the file. A rank value of -1 indicates that all
+processes opened the same file. In that case, the value of the counter
+represents an aggregate across all processes. The ``<record id>`` is a 64 bit
+hash of the file path/name that was opened.  It is used as a way to uniquely
+differentiate each file. The ``<counter name>`` is the name of the statistic
+that the line is reporting, while the ``<counter value>`` is the value of that
+statistic. A value of -1 indicates that Darshan was unable to collect
+statistics for that particular counter, and the value should be ignored.  The
+``<file name>`` field shows the complete file name the record corresponds to.
+The ``<mount point>`` is the mount point of the file system that this file
+belongs to and ``<fs type>`` is the type of that file system.
+
+I/O characterization fields
+-------------------------------------
+
+The following tables show a list of integer statistics that are available for
+each of Darshan's current instrumentation modules, along with a description of
+each. Unless otherwise noted, counters include all variants of the call in
+question, such as ``read()``, ``pread()``, and ``readv()`` for POSIX_READS.
+
+**Table 1. POSIX module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - POSIX_OPENS
+     - Count of how many times the file was opened (INCLUDING ``fileno`` and ``dup`` operations)
+   * - POSIX_FILENOS
+     - Count of POSIX fileno operations
+   * - POSIX_DUPS
+     - Count of POSIX dup operations
+   * - POSIX_READS
+     - Count of POSIX read operations
+   * - POSIX_WRITES
+     - Count of POSIX write operations
+   * - POSIX_SEEKS
+     - Count of POSIX seek operations
+   * - POSIX_STATS
+     - Count of POSIX stat operations
+   * - POSIX_MMAPS
+     - Count of POSIX mmap operations
+   * - POSIX_FSYNCS
+     - Count of POSIX fsync operations
+   * - POSIX_FDSYNCS
+     - Count of POSIX fdatasync operations
+   * - POSIX_RENAME_SOURCES
+     - Number of times this file was the source of a rename operation
+   * - POSIX_RENAME_TARGETS
+     - Number of times this file was the target of a rename operation
+   * - POSIX_RENAMED_FROM
+     - If this file was a rename target, the Darshan record ID of the first rename source
+   * - POSIX_MODE
+     - Mode that the file was last opened in
+   * - POSIX_BYTES_READ
+     - Total number of bytes that were read from the file
+   * - POSIX_BYTES_WRITTEN
+     - Total number of bytes written to the file
+   * - POSIX_MAX_BYTE_READ
+     - Highest offset in the file that was read
+   * - POSIX_MAX_BYTE_WRITTEN
+     - Highest offset in the file that was written
+   * - POSIX_CONSEC_READS
+     - Number of consecutive reads (that were immediately adjacent to the previous access)
+   * - POSIX_CONSEC_WRITES
+     - Number of consecutive writes (that were immediately adjacent to the previous access)
+   * - POSIX_SEQ_READS
+     - Number of sequential reads (at a higher offset than where the previous access left off)
+   * - POSIX_SEQ_WRITES
+     - Number of sequential writes (at a higher offset than where the previous access left off)
+   * - POSIX_RW_SWITCHES
+     - Number of times that access toggled between read and write in consecutive operations
+   * - POSIX_MEM_NOT_ALIGNED
+     - Number of times that a read or write was not aligned in memory
+   * - POSIX_MEM_ALIGNMENT
+     - Memory alignment value (chosen at compile time)
+   * - POSIX_FILE_NOT_ALIGNED
+     - Number of times that a read or write was not aligned in file
+   * - POSIX_FILE_ALIGNMENT
+     - File alignment value.  This value is detected at runtime on most file systems. On Lustre, however, Darshan assumes a default value of 1 MiB for optimal file alignment.
+   * - POSIX_MAX_READ_TIME_SIZE
+     - Size of the slowest POSIX read operation
+   * - POSIX_MAX_WRITE_TIME_SIZE
+     - Size of the slowest POSIX write operation
+   * - POSIX_SIZE_READ_*
+     - Histogram of read access sizes at POSIX level
+   * - POSIX_SIZE_WRITE_*
+     - Histogram of write access sizes at POSIX level
+   * - POSIX_STRIDE[1-4]_STRIDE
+     - Size of 4 most common stride patterns
+   * - POSIX_STRIDE[1-4]_COUNT
+     - Count of 4 most common stride patterns
+   * - POSIX_ACCESS[1-4]_ACCESS
+     - 4 most common POSIX access sizes
+   * - POSIX_ACCESS[1-4]_COUNT
+     - Count of 4 most common POSIX access sizes
+   * - POSIX_FASTEST_RANK
+     - The MPI rank with smallest time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with smallest time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_SLOWEST_RANK
+     - The MPI rank with largest time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_F_*_START_TIMESTAMP
+     - Timestamp that the first POSIX file open/read/write/close operation began
+   * - POSIX_F_*_END_TIMESTAMP
+     - Timestamp that the last POSIX file open/read/write/close operation ended
+   * - POSIX_F_READ_TIME
+     - Cumulative time spent reading at the POSIX level
+   * - POSIX_F_WRITE_TIME
+     - Cumulative time spent in write, fsync, and fdatasync at the POSIX level
+   * - POSIX_F_META_TIME
+     - Cumulative time spent in open, close, stat, and seek at the POSIX level
+   * - POSIX_F_MAX_READ_TIME
+     - Duration of the slowest individual POSIX read operation
+   * - POSIX_F_MAX_WRITE_TIME
+     - Duration of the slowest individual POSIX write operation
+   * - POSIX_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest amount of time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest amount of time spent in POSIX I/O (cumulative read, write, and meta times)
+   * - POSIX_F_VARIANCE_RANK_TIME
+     - The population variance for POSIX I/O time of all the ranks
+   * - POSIX_F_VARIANCE_RANK_BYTES
+     - The population variance for bytes transferred of all the ranks
+
+**Table 2. MPI-IO module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - MPIIO_INDEP_OPENS
+     - Count of non-collective MPI opens
+   * - MPIIO_COLL_OPENS
+     - Count of collective MPI opens
+   * - MPIIO_INDEP_READS
+     - Count of non-collective MPI reads
+   * - MPIIO_INDEP_WRITES
+     - Count of non-collective MPI writes
+   * - MPIIO_COLL_READS
+     - Count of collective MPI reads
+   * - MPIIO_COLL_WRITES
+     - Count of collective MPI writes
+   * - MPIIO_SPLIT_READS
+     - Count of MPI split collective reads
+   * - MPIIO_SPLIT_WRITES
+     - Count of MPI split collective writes
+   * - MPIIO_NB_READS
+     - Count of MPI non-blocking reads
+   * - MPIIO_NB_WRITES
+     - Count of MPI non-blocking writes
+   * - MPIIO_SYNCS
+     - Count of MPI file syncs
+   * - MPIIO_HINTS
+     - Count of MPI file hints used
+   * - MPIIO_VIEWS
+     - Count of MPI file views used
+   * - MPIIO_MODE
+     - MPI mode that the file was last opened in
+   * - MPIIO_BYTES_READ
+     - Total number of bytes that were read from the file at MPI level
+   * - MPIIO_BYTES_WRITTEN
+     - Total number of bytes written to the file at MPI level
+   * - MPIIO_RW_SWITCHES
+     - Number of times that access toggled between read and write in consecutive MPI operations
+   * - MPIIO_MAX_READ_TIME_SIZE
+     - Size of the slowest MPI read operation
+   * - MPIIO_MAX_WRITE_TIME_SIZE
+     - Size of the slowest MPI write operation
+   * - MPIIO_SIZE_READ_AGG_*
+     - Histogram of total size of read accesses at MPI level, even if access is noncontiguous
+   * - MPIIO_SIZE_WRITE_AGG_*
+     - Histogram of total size of write accesses at MPI level, even if access is noncontiguous
+   * - MPIIO_ACCESS[1-4]_ACCESS
+     - 4 most common MPI aggregate access sizes
+   * - MPIIO_ACCESS[1-4]_COUNT
+     - Count of 4 most common MPI aggregate access sizes
+   * - MPIIO_FASTEST_RANK
+     - The MPI rank with smallest time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with smallest time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_SLOWEST_RANK
+     - The MPI rank with largest time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_F_*_START_TIMESTAMP
+     - Timestamp that the first MPIIO file open/read/write/close operation began
+   * - MPIIO_F_*_END_TIMESTAMP
+     - Timestamp that the last MPIIO file open/read/write/close operation ended
+   * - MPIIO_F_READ_TIME
+     - Cumulative time spent reading at MPI level
+   * - MPIIO_F_WRITE_TIME
+     - Cumulative time spent write and sync at MPI level
+   * - MPIIO_F_META_TIME
+     - Cumulative time spent in open and close at MPI level
+   * - MPIIO_F_MAX_READ_TIME
+     - Duration of the slowest individual MPI read operation
+   * - MPIIO_F_MAX_WRITE_TIME
+     - Duration of the slowest individual MPI write operation
+   * - MPIIO_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest amount of time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest amount of time spent in MPI I/O (cumulative read, write, and meta times)
+   * - MPIIO_F_VARIANCE_RANK_TIME
+     - The population variance for MPI I/O time of all the ranks
+   * - MPIIO_F_VARIANCE_RANK_BYTES
+     - The population variance for bytes transferred of all the ranks at MPI level
+
+**Table 3. STDIO module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - STDIO_OPENS
+     - Count of stdio file open operations (INCLUDING ``fdopen`` operations)
+   * - STDIO_FDOPENS
+     - Count of stdio fdopen operations
+   * - STDIO_READS
+     - Count of stdio read operations
+   * - STDIO_WRITES
+     - Count of stdio write operations
+   * - STDIO_SEEKS
+     - Count of stdio seek operations
+   * - STDIO_FLUSHES
+     - Count of stdio flush operations
+   * - STDIO_BYTES_WRITTEN
+     - Total number of bytes written to the file using stdio operations
+   * - STDIO_BYTES_READ
+     - Total number of bytes read from the file using stdio operations
+   * - STDIO_MAX_BYTE_READ
+     - Highest offset in the file that was read
+   * - STDIO_MAX_BYTE_WRITTEN
+     - Highest offset in the file that was written
+   * - STDIO_FASTEST_RANK
+     - The MPI rank with the smallest time spent in stdio operations (cumulative read, write, and meta times)
+   * - STDIO_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the smallest time spent in stdio operations (cumulative read, write, and meta times)
+   * - STDIO_SLOWEST_RANK
+     - The MPI rank with the largest time spent in stdio operations (cumulative read, write, and meta times)
+   * - STDIO_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in stdio operations (cumulative read, write, and meta times)
+   * - STDIO_F_META_TIME
+     - Cumulative time spent in stdio open/close/seek operations
+   * - STDIO_F_WRITE_TIME
+     - Cumulative time spent in stdio write operations
+   * - STDIO_F_READ_TIME
+     - Cumulative time spent in stdio read operations
+   * - STDIO_F_*_START_TIMESTAMP
+     - Timestamp that the first stdio file open/read/write/close operation began
+   * - STDIO_F_*_END_TIMESTAMP
+     - Timestamp that the last stdio file open/read/write/close operation ended
+   * - STDIO_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest time spent in stdio I/O (cumulative read, write, and meta times)
+   * - STDIO_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest time spent in stdio I/O (cumulative read, write, and meta times)
+   * - STDIO_F_VARIANCE_RANK_TIME
+     - The population variance for stdio I/O time of all the ranks
+   * - STDIO_F_VARIANCE_RANK_BYTES
+     - The population variance for bytes transferred of all the ranks
+
+**Table 4. H5F module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - H5F_OPENS
+     - Count of H5F opens
+   * - H5F_FLUSHES
+     - Count of H5F flushes
+   * - H5F_USE_MPIIO
+     - Flag indicating whether MPI-IO is used for accessing the file
+   * - H5F_F_*_START_TIMESTAMP
+     - Timestamp that the first H5F open/close operation began
+   * - H5F_F_*_END_TIMESTAMP
+     - Timestamp that the last H5F open/close operation ended
+   * - H5F_F_META_TIME
+     - Cumulative time spent in H5F open/close/flush operations
+
+**Table 5. H5D module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - H5D_OPENS
+     - Count of H5D opens
+   * - H5D_READS
+     - Count of H5D reads
+   * - H5D_WRITES
+     - Count of H5D writes
+   * - H5D_FLUSHES
+     - Count of H5D flushes
+   * - H5D_BYTES_READ
+     - Total number of bytes read from the dataset using H5D
+   * - H5D_BYTES_WRITTEN
+     - Total number of bytes written to the dataset using H5D
+   * - H5D_RW_SWITCHES
+     - Number of times that access toggled between read and write in consecutive H5D operations
+   * - H5D_REGULAR_HYPERSLAB_SELECTS
+     - Number of H5D read/write ops with regular hyperslab selections
+   * - H5D_IRREGULAR_HYPERSLAB_SELECTS
+     - Number of H5D read/write ops with irregular hyperslab selections
+   * - H5D_POINT_SELECTS
+     - Number of read/write ops with point selections
+   * - H5D_MAX_READ_TIME_SIZE
+     - Size of the slowest H5D read operation
+   * - H5D_MAX_WRITE_TIME_SIZE
+     - Size of the slowest H5D write operation
+   * - H5D_SIZE_READ_AGG_*
+     - Histogram of total size of read accesses at H5D level
+   * - H5D_SIZE_WRITE_AGG_*
+     - Histogram of total size of write accesses at H5D level
+   * - H5D_ACCESS[1-4]_ACCESS
+     - Sizes of 4 most common H5D accesses
+   * - H5D_ACCESS[1-4]_LENGTH_D[1-5]
+     - Access lengths along last 5 dimensions (D5 is fastest changing) of 4 most common H5D accesses
+   * - H5D_ACCESS[1-4]_STRIDE_D[1-5]
+     - Access strides along last 5 dimensions (D5 is fastest changing) of 4 most common H5D accesses
+   * - H5D_ACCESS[1-4]_COUNT
+     - Count of 4 most common H5D aggregate access sizes
+   * - H5D_DATASPACE_NDIMS
+     - Number of dimensions in dataset's dataspace
+   * - H5D_DATASPACE_NPOINTS
+     - Number of points in dataset's dataspace
+   * - H5D_DATATYPE_SIZE
+     - Total size of dataset elements in bytes
+   * - H5D_CHUNK_SIZE_D[1-5]
+     - Chunk sizes in the last 5 dimensions of the dataset (D5 is the fastest changing dimension)
+   * - H5D_USE_MPIIO_COLLECTIVE
+     - Flag indicating use of MPI-IO collectives
+   * - H5D_USE_DEPRECATED
+     - Flag indicating whether deprecated create/open calls were used
+   * - H5D_FASTEST_RANK
+     - The MPI rank with smallest time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with smallest time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_SLOWEST_RANK
+     - The MPI rank with largest time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_F_*_START_TIMESTAMP
+     - Timestamp that the first H5D open/read/write/close operation began
+   * - H5D_F_*_END_TIMESTAMP
+     - Timestamp that the last H5D open/read/write/close operation ended
+   * - H5D_F_READ_TIME
+     - Cumulative time spent reading at H5D level
+   * - H5D_F_WRITE_TIME
+     - Cumulative time spent writing at H5D level
+   * - H5D_F_META_TIME
+     - Cumulative time spent in open/close/flush at H5D level
+   * - H5D_F_MAX_READ_TIME
+     - Duration of the slowest individual H5D read operation
+   * - H5D_F_MAX_WRITE_TIME
+     - Duration of the slowest individual H5D write operation
+   * - H5D_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest amount of time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest amount of time spent in H5D I/O (cumulative read, write, and meta times)
+   * - H5D_F_VARIANCE_RANK_TIME
+     - The population variance for H5D I/O time of all the ranks
+   * - H5D_F_VARIANCE_RANK_BYTES
+     - The population variance for bytes transferred of all the ranks at H5D level
+   * - H5D_FILE_REC_ID
+     - Darshan file record ID of the file the dataset belongs to
+
+**Table 6. PNETCDF_FILE module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - PNETCDF_FILE_CREATES
+     - PnetCDF file create operation counts
+   * - PNETCDF_FILE_OPENS
+     - PnetCDF file open operation counts
+   * - PNETCDF_FILE_REDEFS
+     - PnetCDF file re-define operation counts
+   * - PNETCDF_FILE_INDEP_WAITS
+     - PnetCDF independent file wait operation counts (for flushing non-blocking I/O)
+   * - PNETCDF_FILE_COLL_WAITS
+     - PnetCDF collective file wait operation counts (for flushing non-blocking I/O)
+   * - PNETCDF_FILE_SYNCS
+     - PnetCDF file sync operation counts
+   * - PNETCDF_FILE_BYTES_READ
+     - PnetCDF total bytes read for all file variables (includes internal library metadata I/O)
+   * - PNETCDF_FILE_BYTES_WRITTEN
+     - PnetCDF total bytes written for all file variables (includes internal library metadata I/O)
+   * - PNETCDF_FILE_WAIT_FAILURES
+     - PnetCDF file wait operation failure counts (failures indicate that variable-level counters are unreliable)
+   * - PNETCDF_FILE_F_*_START_TIMESTAMP
+     - Timestamp that the first PNETCDF file open/close/wait operation began
+   * - PNETCDF_FILE_F_*_END_TIMESTAMP
+     - Timestamp that the last PNETCDF file open/close/wait operation ended
+   * - PNETCDF_FILE_F_META_TIME
+     - Cumulative time spent in file open/close/sync/redef/enddef metadata operations
+   * - PNETCDF_FILE_F_WAIT_TIME
+     - Cumulative time spent in file wait operations (for flushing non-blocking I/O)
+
+**Table 7. PNETCDF_VAR module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - PNETCDF_VAR_OPENS
+     - PnetCDF variable define/inquire operation counts
+   * - PNETCDF_VAR_INDEP_READS
+     - PnetCDF variable independent read operation counts
+   * - PNETCDF_VAR_INDEP_WRITES
+     - PnetCDF variable independent write operation counts
+   * - PNETCDF_VAR_COLL_READS
+     - PnetCDF variable collective read operation counts
+   * - PNETCDF_VAR_COLL_WRITES
+     - PnetCDF variable collective write operation counts
+   * - PNETCDF_VAR_NB_READS
+     - PnetCDF variable nonblocking read operation counts
+   * - PNETCDF_VAR_NB_WRITES
+     - PnetCDF variable nonblocking write operation counts
+   * - PNETCDF_VAR_BYTES_*
+     - total bytes read and written at PnetCDF variable layer (not including internal library metadata I/O)
+   * - PNETCDF_VAR_RW_SWITCHES
+     - number of times access alternated between read and write
+   * - PNETCDF_VAR_PUT_VAR*
+     - number of calls to different ncmpi_put_var* APIs (var, var1, vara, vars, varm, varn, vard)
+   * - PNETCDF_VAR_GET_VAR*
+     - number of calls to different ncmpi_get_var* APIs (var, var1, vara, vars, varm, varn, vard)
+   * - PNETCDF_VAR_IPUT_VAR*
+     - number of calls to different ncmpi_iput_var* APIs (var, var1, vara, vars, varm, varn)
+   * - PNETCDF_VAR_IGET_VAR*
+     - number of calls to different ncmpi_iget_var* APIs (var, var1, vara, vars, varm, varn)
+   * - PNETCDF_VAR_BPUT_VAR*
+     - number of calls to different ncmpi_bput_var* APIs (var, var1, vara, vars, varm, varn)
+   * - PNETCDF_VAR_MAX_*_TIME_SIZE
+     - size of the slowest read and write operations
+   * - PNETCDF_VAR_SIZE_*_AGG_*
+     - histogram of PnetCDf total access sizes for read and write operations
+   * - PNETCDF_VAR_ACCESS*_*
+     - the four most common total accesses, in terms of size and length/stride (in last 5 dimensions)
+   * - PNETCDF_VAR_ACCESS*_COUNT
+     - count of the four most common total access sizes
+   * - PNETCDF_VAR_NDIMS
+     - number of dimensions in the variable
+   * - PNETCDF_VAR_NPOINTS
+     - number of points in the variable
+   * - PNETCDF_VAR_DATATYPE_SIZE
+     - size of each variable element
+   * - PNETCDF_VAR_*_RANK
+     - rank of the processes that were the fastest and slowest at I/O (for shared datasets)
+   * - PNETCDF_VAR_*_RANK_BYTES
+     - total bytes transferred at PnetCDF layer by the fastest and slowest ranks (for shared datasets)
+   * - PNETCDF_VAR_F_*_START_TIMESTAMP
+     - timestamp of first PnetCDF variable open/read/write/close
+   * - PNETCDF_VAR_F_*_END_TIMESTAMP
+     - timestamp of last PnetCDF variable open/read/write/close
+   * - PNETCDF_VAR_F_READ/WRITE/META_TIME
+     - cumulative time spent in PnetCDF read, write, or metadata operations
+   * - PNETCDF_VAR_F_MAX_*_TIME
+     - duration of the slowest PnetCDF read and write operations
+   * - PNETCDF_VAR_F_*_RANK_TIME
+     - fastest and slowest I/O time for a single rank (for shared datasets)
+   * - PNETCDF_VAR_F_VARIANCE_RANK_*
+     - variance of total I/O time and bytes moved for all ranks (for shared datasets)
+   * - PNETCDF_VAR_FILE_REC_ID
+     - Darshan file record ID of the file the variable belongs to
+
+**Table 8. Lustre module (if enabled, for Lustre file systems)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - LUSTRE_NUM_COMPONENTS
+     - number of instrumented components in the Lustre layout
+   * - LUSTRE_NUM_STRIPES
+     - number of active stripes in the Lustre layout components
+   * - LUSTRE_COMP*_STRIPE_SIZE
+     - stripe size for this file layout component in bytes
+   * - LUSTRE_COMP*_STRIPE_COUNT
+     - number of OSTs over which the file layout component is striped
+   * - LUSTRE_COMP*_STRIPE_PATTERN
+     - pattern (e.g., raid0, mdt, overstriped) for this file layout component
+   * - LUSTRE_COMP*_FLAGS
+     - captured flags (e.g. init, prefwr, stale) for this file layout component
+   * - LUSTRE_COMP*_EXT_START
+     - starting file extent for this file layout component
+   * - LUSTRE_COMP*_EXT_END
+     - ending file extent for this file layout component (-1 means EOF)
+   * - LUSTRE_COMP*_MIRROR_ID
+     - mirror ID for this file layout component, if mirrors are enabled
+   * - LUSTRE_COMP*_POOL_NAME
+     - Lustre OST pool used for this file layout component
+   * - LUSTRE_COMP*\_OST_ID_*
+     - indices of OSTs over which this file layout component is striped
+
+**Table 9. DFS (DAOS File System) module (if enabled)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - DFS_OPENS
+     - DFS file open operation counts
+   * - DFS_GLOBAL_OPENS
+     - DFS file global open operation (i.e., ``dfs_obj_global2local()``) counts
+   * - DFS_LOOKUPS
+     - DFS file lookup operation counts
+   * - DFS_DUPS
+     - DFS file dup operation counts
+   * - DFS_READS
+     - DFS file read operation counts
+   * - DFS_READXS
+     - DFS non-contiguous file read operation counts
+   * - DFS_WRITES
+     - DFS file write operation counts
+   * - DFS_WRITEXS
+     - DFS non-contiguous file write operation counts
+   * - DFS_NB_READS
+     - DFS non-blocking file read operation counts (included in read/readx counts)
+   * - DFS_NB_WRITES
+     - DFS non-blocking file write operation counts (included in write/writex counts)
+   * - DFS_GET_SIZES
+     - DFS file get size operation counts
+   * - DFS_PUNCHES
+     - DFS file punch operation counts
+   * - DFS_REMOVES
+     - DFS file remove operation counts
+   * - DFS_STATS
+     - DFS file stat operation counts
+   * - DFS_BYTES_READ
+     - Total number of bytes that were read from the DFS file
+   * - DFS_BYTES_WRITTEN
+     - Total number of bytes that were written to the DFS file
+   * - DFS_RW_SWITCHES
+     - Number of times that access toggled between read and write in consecutive operations
+   * - DFS_MAX_READ_TIME_SIZE
+     - Size of the slowest DFS read operation
+   * - DFS_MAX_WRITE_TIME_SIZE
+     - Size of the slowest DFS write operation
+   * - DFS_SIZE_READ_*
+     - Histogram of read access sizes at DFS level
+   * - DFS_SIZE_WRITE_*
+     - Histogram of write access sizes at DFS level
+   * - DFS_ACCESS[1-4]_ACCESS
+     - 4 most common DFS access sizes
+   * - DFS_ACCESS[1-4]_COUNT
+     - Count of 4 most common DFS access sizes
+   * - DFS_CHUNK_SIZE
+     - DFS file chunk size
+   * - DFS_FASTEST_RANK
+     - The MPI rank with smallest time spent in DFS I/O (cumulative read, write, and meta times)
+   * - DFS_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with smallest time spent in DFS I/O (cumulative read, write, and meta times)
+   * - DFS_SLOWEST_RANK
+     - The MPI rank with largest time spent in DFS I/O (cumulative read, write, and meta times)
+   * - DFS_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in DFS I/O (cumulative read, write, and meta times)
+   * - DFS_F_*_START_TIMESTAMP
+     - Timestamp that the first DFS file open/read/write/close operation began
+   * - DFS_F_*_END_TIMESTAMP
+     - Timestamp that the last DFS file open/read/write/close operation ended
+   * - DFS_F_READ_TIME
+     - Cumulative time spent reading at the DFS level
+   * - DFS_F_WRITE_TIME
+     - Cumulative time spent writing at the DFS level
+   * - DFS_F_META_TIME
+     - Cumulative time spent in open, dup, lookup, get size, punch, release, remove, and stat at the DFS level
+   * - DFS_F_MAX_READ_TIME
+     - Duration of the slowest individual DFS read operation
+   * - DFS_F_MAX_WRITE_TIME
+     - Duration of the slowest individual DFS write operation
+   * - DFS_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest amount of time spent in DFS I/O (cumulative read, write, and meta times)
+   * - DFS_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest amount of time spent in DFS I/O (cumulative read, write, and meta times)
+
+**Table 10. DAOS module (if enabled)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - DAOS_OBJ_OPENS
+     - DAOS object open operation counts
+   * - DAOS_OBJ_FETCHES
+     - DAOS object fetch operation counts
+   * - DAOS_OBJ_UPDATES
+     - DAOS object update operation counts
+   * - DAOS_OBJ_PUNCHES
+     - DAOS object punch operation counts
+   * - DAOS_OBJ_DKEY_PUNCHES
+     - DAOS object dkey punch operation counts
+   * - DAOS_OBJ_AKEY_PUNCHES
+     - DAOS object akey punch operation counts
+   * - DAOS_OBJ_DKEY_LISTS
+     - DAOS object dkey list operation counts
+   * - DAOS_OBJ_AKEY_LISTS
+     - DAOS object akey list operation counts
+   * - DAOS_OBJ_RECX_LISTS
+     - DAOS object recx list operation counts
+   * - DAOS_ARRAY_OPENS
+     - DAOS array object open operation counts
+   * - DAOS_ARRAY_READS
+     - DAOS array object read operation counts
+   * - DAOS_ARRAY_WRITES
+     - DAOS array object write operation counts
+   * - DAOS_ARRAY_GET_SIZES
+     - DAOS array object get size operation counts
+   * - DAOS_ARRAY_SET_SIZES
+     - DAOS array object set size operation counts
+   * - DAOS_ARRAY_STATS
+     - DAOS array object stat operation counts
+   * - DAOS_ARRAY_PUNCHES
+     - DAOS array object punch operation counts
+   * - DAOS_ARRAY_DESTROYS
+     - DAOS array object destroy operation counts
+   * - DAOS_KV_OPENS
+     - DAOS kv object open operation counts
+   * - DAOS_KV_GETS
+     - DAOS kv object get operation counts
+   * - DAOS_KV_PUTS
+     - DAOS kv object put operation counts
+   * - DAOS_KV_REMOVES
+     - DAOS kv object remove operation counts
+   * - DAOS_KV_LISTS
+     - DAOS kv object list operation counts
+   * - DAOS_KV_DESTROYS
+     - DAOS kv object destroy operation counts
+   * - DAOS_NB_OPS
+     - DAOS non-blocking I/O operations (includes reads, writes, and metadata operations)
+   * - DAOS_BYTES_READ
+     - Total number of bytes that were read from the DAOS object
+   * - DAOS_BYTES_WRITTEN
+     - Total number of bytes that were written to the DAOS object
+   * - DAOS_RW_SWITCHES
+     - Number of times that access toggled between read and write in consecutive operations
+   * - DAOS_MAX_READ_TIME_SIZE
+     - Size of the slowest DAOS read operation
+   * - DAOS_MAX_WRITE_TIME_SIZE
+     - Size of the slowest DAOS write operation
+   * - DAOS_SIZE_READ_*
+     - Histogram of read access sizes at DAOS level
+   * - DAOS_SIZE_WRITE_*
+     - Histogram of write access sizes at DAOS level
+   * - DAOS_ACCESS[1-4]_ACCESS
+     - 4 most common DAOS access sizes
+   * - DAOS_ACCESS[1-4]_COUNT
+     - Count of 4 most common DAOS access sizes
+   * - DAOS_OBJ_OTYPE
+     - DAOS object otype ID
+   * - DAOS_ARRAY_CELL_SIZE
+     - For DAOS array objects, the array cell size
+   * - DAOS_ARRAY_CHUNK_SIZE
+     - For DAOS array objects, the array chunk size
+   * - DAOS_FASTEST_RANK
+     - The MPI rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times)
+   * - DAOS_FASTEST_RANK_BYTES
+     - The number of bytes transferred by the rank with smallest time spent in DAOS I/O (cumulative read, write, and meta times)
+   * - DAOS_SLOWEST_RANK
+     - The MPI rank with largest time spent in DAOS I/O (cumulative read, write, and meta times)
+   * - DAOS_SLOWEST_RANK_BYTES
+     - The number of bytes transferred by the rank with the largest time spent in DAOS I/O (cumulative read, write, and meta times)
+   * - DAOS_F_*_START_TIMESTAMP
+     - Timestamp that the first DAOS object open/read/write/close operation began
+   * - DAOS_F_*_END_TIMESTAMP
+     - Timestamp that the last DAOS object open/read/write/close operation ended
+   * - DAOS_F_READ_TIME
+     - Cumulative time spent reading at the DAOS level
+   * - DAOS_F_WRITE_TIME
+     - Cumulative time spent writing at the DAOS level
+   * - DAOS_F_META_TIME
+     - Cumulative time spent in open, punch, list, get size, set size, stat, destroy, and remove at the DAOS level
+   * - DAOS_F_MAX_READ_TIME
+     - Duration of the slowest individual DAOS read operation
+   * - DAOS_F_MAX_WRITE_TIME
+     - Duration of the slowest individual DAOS write operation
+   * - DAOS_F_FASTEST_RANK_TIME
+     - The time of the rank which had the smallest amount of time spent in DAOS I/O (cumulative read, write, and meta times)
+   * - DAOS_F_SLOWEST_RANK_TIME
+     - The time of the rank which had the largest amount of time spent in DAOS I/O (cumulative read, write, and meta times)
+
+
+Heatmap fields
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Each heatmap module record reports a histogram of the number of bytes read or
+written, per process, over time, for a given I/O API.  It provides a synopsis
+of I/O intensity regardless of how many files are accessed.  Heatmap records
+are never aggregated across ranks.
+
+The file name field is used to indicate the API that produced the histogram
+record.  For example, "heatmap:POSIX" indicates that the record is reporting
+I/O traffic that passed through the POSIX module.
+
+The number of BIN fields present in each record may vary depending on the job's
+execution time and the configurable maximum number of bins chosen at execution
+time.
+
+**Table 11. HEATMAP module**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - HEATMAP_F_BIN_WIDTH_SECONDS
+     - time duration of each heatmap bin
+   * - HEATMAP_READ\|WRITE_BIN_*
+     - number of bytes read or written within specified heatmap bin
+
+Additional modules
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Table 12. APXC module header record (if enabled, for Cray XC systems)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - APXC_GROUPS
+     - total number of groups for the job
+   * - APXC_CHASSIS
+     - total number of chassis for the job
+   * - APXC_BLADES
+     - total number of blades for the job
+   * - APXC_MEMORY_MODE
+     - Intel Xeon memory mode
+   * - APXC_CLUSTER_MODE
+     - Intel Xeon NUMA configuration
+   * - APXC_MEMORY_MODE_CONSISTENT
+     - Intel Xeon memory mode consistent across all nodes
+   * - APXC_CLUSTER_MODE_CONSISTENT
+     - Intel Xeon cluster mode consistent across all nodes
+
+**Table 13. APXC module per-router record (if enabled, for Cray XC systems)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - APXC_GROUP
+     - group this router is on
+   * - APXC_CHASSIS
+     - chassis this router is on
+   * - APXC_BLADE
+     - blade this router is on
+   * - APXC_NODE
+     - node connected to this router
+   * - APXC_AR_RTR_x_y_INQ_PRF_INCOMING_FLIT_VC[0-7]
+     - flits on VCs of x y tile for router-router ports
+   * - APXC_AR_RTR_x_y_INQ_PRF_ROWBUS_STALL_CNT
+     - stalls on x y tile for router-router ports
+   * - APXC_AR_RTR_PT_x_y_INQ_PRF_INCOMING_FLIT_VC[0,4]
+     - flits on VCs of x y tile for router-nic ports
+   * - APXC_AR_RTR_PT_x_y_INQ_PRF_REQ_ROWBUS_STALL_CNT
+     - stalls on x y tile for router-nic ports
+
+**Table 14. APMPI module header record (if enabled, for MPI applications)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - MPI_TOTAL_COMM_TIME_VARIANCE
+     - variance in total communication time across all the processes
+   * - MPI_TOTAL_COMM_SYNC_TIME_VARIANCE
+     - variance in total sync time across all the processes, if enabled
+
+**Table 15. APMPI module per-process record (if enabled, for MPI applications)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - MPI_PROCESSOR_NAME
+     - name of the processor used by the MPI process
+   * - MPI_*_CALL_COUNT
+     - total call count for an MPI op
+   * - MPI_*_TOTAL_BYTES
+     - total bytes (i.e., cumulative across all calls) moved with an MPI op
+   * - MPI_*\_MSG_SIZE_AGG_*
+     - histogram of total bytes moved for all the calls of an MPI op
+   * - MPI_*_TOTAL_TIME
+     - total time (i.e, cumulative across all calls) of an MPI op
+   * - MPI_*_MIN_TIME
+     - minimum time across all calls of an MPI op
+   * - MPI_*_MAX_TIME
+     - maximum time across all calls of an MPI op
+   * - MPI_*_TOTAL_SYNC_TIME
+     - total sync time (cumulative across all calls of an op) of an MPI op, if enabled
+   * - MPI_TOTAL_COMM_TIME
+     - total communication (MPI) time of a process across all the MPI ops
+   * - MPI_TOTAL_COMM_SYNC_TIME
+     - total sync time of a process across all the MPI ops, if enabled
+
+
+**Table 16. BG/Q module (if enabled on BG/Q systems)**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30, 70
+   :class: longtable
+   :align: left
+
+   * - counter name
+     - description
+   * - BGQ_CSJOBID
+     - Control system job ID
+   * - BGQ_NNODES
+     - Total number of BG/Q compute nodes
+   * - BGQ_RANKSPERNODE
+     - Number of MPI ranks per compute node
+   * - BGQ_DDRPERNODE
+     - Size of compute node DDR in MiB
+   * - BGQ_INODES
+     - Total number of BG/Q I/O nodes
+   * - BGQ_ANODES
+     - Dimension of A torus
+   * - BGQ_BNODES
+     - Dimension of B torus
+   * - BGQ_CNODES
+     - Dimension of C torus
+   * - BGQ_DNODES
+     - Dimension of D torus
+   * - BGQ_ENODES
+     - Dimension of E torus
+   * - BGQ_TORUSENABLED
+     - Bitfield indicating enabled torus dimensions
+   * - BGQ_F_TIMESTAMP
+     - Timestamp of when BG/Q data was collected
+
+.. _Sec Additional summary output:
+
+Additional summary output
+-------------------------------------
+
+The following sections describe additional parser options that provide
+summary I/O characterization data for the given log.
+
+.. note::
+   These options are currently only supported by the POSIX, MPI-IO, and stdio
+   modules.
+
+Performance
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Job performance information can be generated using the ``--perf`` command-line
+option.
+
+**Example output**
+
+.. code-block::
+
+   # performance
+   # -----------
+   # total_bytes: 134217728
+   #
+   # I/O timing for unique files (seconds):
+   # ...........................
+   # unique files: slowest_rank_io_time: 0.000000
+   # unique files: slowest_rank_meta_only_time: 0.000000
+   # unique files: slowest_rank: 0
+   #
+   # I/O timing for shared files (seconds):
+   # (multiple estimates shown; time_by_slowest is generally the most accurate)
+   # ...........................
+   # shared files: time_by_cumul_io_only: 0.042264
+   # shared files: time_by_cumul_meta_only: 0.000325
+   # shared files: time_by_open: 0.064986
+   # shared files: time_by_open_lastio: 0.064966
+   # shared files: time_by_slowest: 0.057998
+   #
+   # Aggregate performance, including both shared and unique files (MiB/s):
+   # (multiple estimates shown; agg_perf_by_slowest is generally the most
+   # accurate)
+   # ...........................
+   # agg_perf_by_cumul: 3028.570529
+   # agg_perf_by_open: 1969.648064
+   # agg_perf_by_open_lastio: 1970.255248
+   # agg_perf_by_slowest: 2206.983935
+
+The ``total_bytes`` line shows the total number of bytes transferred
+(read/written) by the job.  That is followed by three sections:
+
+**I/O timing for unique files**
+
+This section reports information about any files that were **not** opened by
+every rank in the job.  This includes independent files (opened by 1 process)
+and partially shared files (opened by a proper subset of the job's processes).
+The I/O time for this category of file access is reported based on the
+**slowest** rank of all processes that performed this type of file access.
+
+* unique files: slowest_rank_io_time: total I/O time for unique files
+  (including both metadata + data transfer time)
+* unique files: slowest_rank_meta_only_time: metadata time for unique files
+* unique files: slowest_rank: the rank of the slowest process
+
+**I/O timing for shared files**
+
+This section reports information about files that were globally shared (i.e.
+opened by every rank in the job).  This section estimates performance for
+globally shared files using four different methods.  The ``time_by_slowest`` is
+generally the most accurate, but it may not available in some older Darshan log
+files.
+
+* shared files: ``time_by_cumul_*``: adds the cumulative time across all
+  processes and divides by the number of processes (inaccurate when there is
+  high variance among processes).
+
+  + shared files: ``time_by_cumul_io_only``: include metadata AND data transfer
+    time for global shared files
+  + shared files: ``time_by_cumul_meta_only``: metadata time for global shared
+    files
+* shared files: ``time_by_open``: difference between timestamp of open and
+  close (inaccurate if file is left open without I/O activity)
+* shared files: ``time_by_open_lastio``: difference between timestamp of open
+  and the timestamp of last I/O (similar to above but fixes case where file is
+  left open after I/O is complete)
+* shared files: ``time_by_slowest``: measures time according to which rank was
+  the slowest to perform both metadata operations and data transfer for each
+  shared file. (most accurate but requires newer log version)
+
+**Aggregate performance**
+
+Performance is calculated by dividing the total bytes by the I/O time (shared
+files and unique files combined) computed using each of the four methods
+described in the previous output section. Note the unit for total bytes is Byte
+and for the aggregate performance is MiB/s (1024*1024 Bytes/s).
+
+Files
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Use the ``--file`` option to get totals based on file usage.  Each line has 3
+columns. The first column is the count of files for that type of file, the
+second column is number of bytes for that type, and the third column is the
+maximum offset accessed.
+
+* total: All files
+* read_only: Files that were only read from
+* write_only: Files that were only written to
+* read_write: Files that were both read and written
+* unique: Files that were opened on only one rank
+* shared: Files that were opened by more than one rank
+
+
+**Example output**
+
+.. code-block::
+
+   # <file_type> <file_count> <total_bytes> <max_byte_offset>
+   # total: 5 4371499438884 4364699616485
+   # read_only: 2 4370100334589 4364699616485
+   # write_only: 1 1399104295 1399104295
+   # read_write: 0 0 0
+   # unique: 0 0 0
+   # shared: 5 4371499438884 4364699616485
+
+Totals
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Use the ``--total`` option to get all statistics as an aggregate total rather
+than broken down per file.  Each field is either summed across files and
+process (for values such as number of opens), set to global minimums and
+maximums (for values such as open time and close time), or zeroed out (for
+statistics that are nonsensical in aggregate).
+
+**Example output**
+
+.. code-block:: bash
+
+   total_POSIX_OPENS: 1024
+   total_POSIX_READS: 0
+   total_POSIX_WRITES: 16384
+   total_POSIX_SEEKS: 16384
+   total_POSIX_STATS: 1024
+   total_POSIX_MMAPS: 0
+   total_POSIX_FOPENS: 0
+   total_POSIX_FREADS: 0
+   total_POSIX_FWRITES: 0
+   total_POSIX_BYTES_READ: 0
+   total_POSIX_BYTES_WRITTEN: 68719476736
+   total_POSIX_MAX_BYTE_READ: 0
+   total_POSIX_MAX_BYTE_WRITTEN: 67108863
+   ...
+
+darshan-dxt-parser
+=======================================
+
+The ``darshan-dxt-parser`` utility can be used to parse DXT traces out of
+Darshan log files, assuming the corresponding application was executed with the
+DXT modules enabled. The following example parses all DXT trace information out
+of a Darshan log file and stores it in a text file:
+
+.. code-block:: bash
+
+   darshan-dxt-parser shane_ior_id25016_1-31-38066-13864742673678115131_1.darshan > ~/ior-trace.txt
+
+Guide to darshan-dxt-parser output
+=======================================
+
+The preamble to ``darshan-dxt-parser`` output is identical to that of the
+traditional ``darshan-parser`` utility, which is described above.
+
+``darshan-dxt-parser`` displays detailed trace information contained within a
+Darshan log that was generated with DXT instrumentation enabled. Trace data is
+captured from both POSIX and MPI-IO interfaces. Example output is given below:
+
+**Example output**
+
+.. code-block:: bash
+
+   # ***************************************************
+   # DXT_POSIX module data
+   # ***************************************************
+
+   # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile
+   # DXT, rank: 0, hostname: shane-thinkpad
+   # DXT, write_count: 4, read_count: 4
+   # DXT, mnt_pt: /, fs_type: ext4
+   # Module    Rank  Wt/Rd  Segment          Offset       Length    Start(s)      End(s)
+    X_POSIX       0  write        0               0       262144      0.0029      0.0032
+    X_POSIX       0  write        1          262144       262144      0.0032      0.0035
+    X_POSIX       0  write        2          524288       262144      0.0035      0.0038
+    X_POSIX       0  write        3          786432       262144      0.0038      0.0040
+    X_POSIX       0   read        0               0       262144      0.0048      0.0048
+    X_POSIX       0   read        1          262144       262144      0.0049      0.0049
+    X_POSIX       0   read        2          524288       262144      0.0049      0.0050
+    X_POSIX       0   read        3          786432       262144      0.0050      0.0051
+
+   # ***************************************************
+   # DXT_MPIIO module data
+   # ***************************************************
+
+   # DXT, file_id: 16457598720760448348, file_name: /tmp/test/testFile
+   # DXT, rank: 0, hostname: shane-thinkpad
+   # DXT, write_count: 4, read_count: 4
+   # DXT, mnt_pt: /, fs_type: ext4
+   # Module    Rank  Wt/Rd  Segment       Length    Start(s)      End(s)
+    X_MPIIO       0  write        0       262144      0.0029      0.0032
+    X_MPIIO       0  write        1       262144      0.0032      0.0035
+    X_MPIIO       0  write        2       262144      0.0035      0.0038
+    X_MPIIO       0  write        3       262144      0.0038      0.0040
+    X_MPIIO       0   read        0       262144      0.0048      0.0049
+    X_MPIIO       0   read        1       262144      0.0049      0.0049
+    X_MPIIO       0   read        2       262144      0.0049      0.0050
+    X_MPIIO       0   read        3       262144      0.0050      0.0051
+
+DXT POSIX module
+-------------------------------------
+
+This module provides details on each read or write access at the POSIX layer.
+The trace output is organized first by file then by process rank. So, for each
+file accessed by the application, DXT will provide each process's I/O trace
+segments in separate blocks, ordered by increasing process rank. Within each
+file/rank block, I/O trace segments are ordered chronologically.
+
+Before providing details on each I/O operation, DXT provides a short preamble
+for each file/rank trace block with the following bits of information: the
+Darshan identifier for the file (which is equivalent to the identifiers used by
+Darshan in its traditional modules), the full file path, the corresponding MPI
+rank the current block of trace data belongs to, the hostname associated with
+this process rank, the number of individual POSIX read and write operations by
+this process, and the mount point and file system type corresponding to the
+traced file.
+
+The output format for each individual I/O operation segment is:
+
+.. code-block::
+
+   # Module    Rank  Wt/Rd  Segment          Offset       Length    Start(s)      End(s)
+
+* Module: corresponding DXT module (DXT_POSIX or DXT_MPIIO)
+* Rank: process rank responsible for I/O operation
+* Wt/Rd: whether the operation was a write or read
+* Segment: The operation number for this segment (first operation is segment 0)
+* Offset: file offset the I/O operation occurred at
+* Length: length of the I/O operation in bytes
+* Start: timestamp of the start of the operation (w.r.t. application start time)
+* End: timestamp of the end of the operation (w.r.t. application start time)
+
+DXT MPI-IO module
+-------------------------------------
+
+If the MPI-IO interface is used by an application, this module provides details
+on each read or write access at the MPI-IO layer. This data is often useful in
+understanding how MPI-IO read or write operations map to underlying POSIX read
+or write operations issued to the traced file.
+
+The output format for the DXT MPI-IO module is essentially identical to the DXT
+POSIX module, except that the offset of file operations is not tracked.
+
+Other darshan-util utilities
+=======================================
+
+The darshan-util package includes a number of other utilities that can be
+summarized briefly as follows:
+
+* ``darshan-convert``: converts an existing log file to the newest log format.
+  If the ``--bzip2`` flag is given, then the output file will be re-compressed
+  in bzip2 format rather than libz format.  It also has command line options for
+  anonymizing personal data, adding metadata annotation to the log header, and
+  restricting the output to a specific instrumented file.
+* ``darshan-diff``: provides a text diff of two Darshan log files, comparing both
+  job-level metadata and module data records between the files.
+* ``darshan-analyzer``: walks an entire directory tree of Darshan log files and
+  produces a summary of the types of access methods used in those log files.
+* ``darshan-logutils*``: this is a library rather than an executable, but it
+  provides a C interface for opening and parsing Darshan log files.  This is
+  the recommended method for writing custom utilities, as darshan-logutils
+  provides a relatively stable interface across different versions of Darshan
+  and different log formats.
+* ``dxt_analyzer``: plots the read or write activity of a job using data obtained
+  from Darshan's DXT modules (if DXT is enabled).
+
+PyDarshan
+=======================================
+
+PyDarshan is a Python package that provides functionality for analyzing Darshan
+log files, first introduced as part of Darshan 3.3.0. This package provides
+easier to use Python interfaces to Darshan log file data (compared to the
+C-based ``darshan-util`` library), enabling Darshan users to develop their own
+custom log file analysis utilities.
+
+PyDarshan has independent documentation outlining how to install and use this
+package which can be found in :ref:`pydarshantoc`.
+
diff --git a/darshan-util/pydarshan/CHANGELOG.rst b/darshan-util/pydarshan/CHANGELOG.rst
index 61d7e8706..a6bbbeb6d 100644
--- a/darshan-util/pydarshan/CHANGELOG.rst
+++ b/darshan-util/pydarshan/CHANGELOG.rst
@@ -2,15 +2,18 @@ PyDarshan-3.4.7.0
 =================
 * Support for extracting and analyzing new DAOS instrumentation
   module data
+
   - summary CLI tool now integrates DAOS data into reports
 * New CLI tools for extracting and printing summary data for
   one or more Darshan logs
+
   - `job_stats` prints high-level statistics on a per-job level
   - `file_stats` prints high-level statistics on a per-file level
   - output in either CSV format or using the Rich package (which
     is now a proper dependency of PyDarshan)
 * Add DarshanReport object capability to filter module records
   according to given regular expressions
+
   - users can now pass the following parameters to DarshanReport
     objects to use this functionality: `filter_patterns` (a list
     of regex strings to match against) and `filter_mode` (either
@@ -33,6 +36,7 @@ PyDarshan-3.4.4.0
 PyDarshan-3.4.3.0
 =================
 * Various job summary tool improvements
+
   - add new module overview table
   - add new file count summary table
   - add new plot of POSIX module sequential/consecutive accesses
@@ -45,6 +49,7 @@ PyDarshan-3.4.3.0
     easier to read
 * Integrated Python support for darshan-util accumulator API for
   aggregating file records and calculating derived metrics
+
   - Added backend routine `accumulate_records`, which returns
     a derived metric structure and a summary record for an
     input set of records
@@ -66,13 +71,16 @@ PyDarshan-3.4.1.0
 =================
  * Fixed memory leaks in the following backend CFFI bindings
    (reported by Jesse Hines):
+
   - log_get_modules
   - log_get_mounts
   - log_get_record
   - log_get_name_records
   - log_lookup_name_records
+
  * Added PnetCDF module information to job summary tool
  * Testing modifications:
+
   - Switched to use of context managers for log Report objects to
     avoid test hangs in certain environments
   - Marked tests requiring lxml package as xfail when not installed
diff --git a/darshan-util/pydarshan/Makefile b/darshan-util/pydarshan/Makefile
index 30e8af164..13445be7c 100644
--- a/darshan-util/pydarshan/Makefile
+++ b/darshan-util/pydarshan/Makefile
@@ -63,7 +63,7 @@ coverage:  # check code coverage quickly with the default Python
 
 
 docs: clean-docs # generate Sphinx HTML documentation, including API docs
-	sphinx-apidoc -M -H PyDarshan -o docs/api/pydarshan darshan
+	sphinx-apidoc -M -H "PyDarshan API Reference" -o docs/api/pydarshan darshan
 	$(MAKE) -C docs clean
 	$(MAKE) -C docs html
 
diff --git a/darshan-util/pydarshan/darshan/discover_darshan.py b/darshan-util/pydarshan/darshan/discover_darshan.py
index 5746b773b..00fd9f3f1 100644
--- a/darshan-util/pydarshan/darshan/discover_darshan.py
+++ b/darshan-util/pydarshan/darshan/discover_darshan.py
@@ -216,9 +216,15 @@ def find_utils(ffi, libdutil):
             os.chdir(save)
         except:
             libdutil = None
-  
-    
-    
+
+    if libdutil is None:
+        try:
+            library_path = os.environ.get('DARSHAN_INSTALL_PREFIX')
+            logger.debug(f"Attempting library_path={library_path} via env variable DARSHAN_INSTALL_PREFIX.")
+            libdutil = ffi.dlopen(library_path + "/lib/libdarshan-util.so")
+        except:
+            libdutil = None
+
     if libdutil is None:
         raise RuntimeError('Could not find libdarshan-util.so! Is darshan-util installed? Please ensure one of the the following: 1) export LD_LIBRARY_PATH=<path-to-libdarshan-util.so>, or 2) darshan-parser can found using the PATH variable, or 3) pkg-config can resolve pkg-config --path darshan-util, or 4) install a wheel that includes darshan-utils via pip.')
 
diff --git a/darshan-util/pydarshan/docs/conf.py b/darshan-util/pydarshan/docs/conf.py
index db89dd76d..81b0c6d9c 100755
--- a/darshan-util/pydarshan/docs/conf.py
+++ b/darshan-util/pydarshan/docs/conf.py
@@ -63,7 +63,7 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'PyDarshan'
+project = u'PyDarshan API Reference'
 copyright = u"2021, Argonne National Laboratory"
 author = u""
 
@@ -172,8 +172,7 @@
 
 
 intersphinx_mapping = {
-    'https://docs.python.org/': None,
-    'http://matplotlib.org': None,
+  "python": ('https://docs.python.org/', None),
+  "matplotlib": ("https://matplotlib.org", None),
 }
 
-
diff --git a/doc/darshan-modularization.rst b/doc/darshan-modularization.rst
new file mode 100644
index 000000000..649178512
--- /dev/null
+++ b/doc/darshan-modularization.rst
@@ -0,0 +1,683 @@
+********************************************************************
+Modularized I/O characterization using Darshan 3.x
+********************************************************************
+
+Introduction
+==============================================
+
+Darshan is a lightweight toolkit for characterizing the I/O performance of
+instrumented HPC applications.
+
+Starting with version 3.0.0, the Darshan runtime environment and log file
+format have been redesigned such that new "instrumentation modules" can be
+added without breaking existing tools. Developers are given a framework to
+implement arbitrary instrumentation modules, which are responsible for
+gathering I/O data from a specific system component (which could be from an I/O
+library, platform-specific data, etc.). Darshan can then manage these modules
+at runtime and create a valid Darshan log regardless of how many or what types
+of modules are used.
+
+Overview of Darshan's modularized architecture
+==============================================
+
+The Darshan source tree is organized into two primary components:
+
+* **darshan-runtime**: Darshan runtime framework necessary for instrumenting MPI
+  applications and generating I/O characterization logs.
+
+* **darshan-util**: Darshan utilities for analyzing the contents of a given
+  Darshan I/O characterization log.
+
+The following subsections provide detailed overviews of each of these
+components to give a better understanding of the architecture of the
+modularized version of Darshan.  In :ref:`Sec add instr`, we actually outline
+the necessary steps for integrating new instrumentation modules into Darshan.
+
+.. _Sec darshan-runtime:
+
+Darshan-runtime
+-------------------------------------
+
+The primary responsibilities of the darshan-runtime component are:
+
+* intercepting I/O functions of interest from a target application;
+
+* extracting statistics, timing information, and other data characterizing the
+  application's I/O workload;
+
+* compressing I/O characterization data and corresponding metadata;
+
+* logging the compressed I/O characterization to file for future evaluation
+
+The first two responsibilities are the burden of module developers, while the
+last two are handled automatically by Darshan.
+
+In general, instrumentation modules are composed of:
+
+* wrapper functions for intercepting I/O functions;
+
+* internal functions for initializing and maintaining internal data structures
+  and module-specific I/O characterization data;
+
+* a set of functions for interfacing with the Darshan runtime environment
+
+A block diagram illustrating the interaction of an example POSIX
+instrumentation module and the Darshan runtime environment is given below in
+Figure 1.
+
+**Figure 1. Darshan runtime environment**
+
+.. image:: darshan-dev-modular-runtime.png
+   :align: center
+   :width: 600
+   :alt: A block diagram of Darshan runtime environment
+
+As shown in Figure 1, the Darshan runtime environment is just a library
+(libdarshan) which intercepts and instruments functions of interest made by an
+application to existing system libraries. Two primary components of this
+library are ``darshan-core`` and ``darshan-common``.  ``darshan-core`` is the
+central component which manages the initialization/shutdown of Darshan,
+coordinates with active instrumentation modules, and writes I/O
+characterization logs to disk, among other things. ``darshan-core`` intercepts
+``MPI_Init()`` to initialize key internal data structures and intercepts
+``MPI_Finalize()`` to initiate Darshan's shutdown process. ``darshan-common``
+simply provides module developers with functionality that is likely to be
+reused across modules to minimize development and maintenance costs.
+Instrumentation modules must utilize ``darshan-core`` to register themselves
+and corresponding I/O records with Darshan so they can be added to the output
+I/O characterization. While not shown in Figure 1, numerous modules can be
+registered with Darshan at any given time and Darshan is capable of correlating
+records between these modules.
+
+In the next three subsections, we describe instrumentation modules, the
+``darshan-core`` component, and the ``darshan-common`` component in more
+detail.
+
+Instrumentation modules
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The new modularized version of Darshan allows for the generation of I/O
+characterizations composed from numerous instrumentation modules, where an
+instrumentation module is simply a Darshan component responsible for capturing
+I/O data from some arbitrary source. For example, distinct instrumentation
+modules may be defined for different I/O interfaces or to gather
+system-specific I/O parameters from a given computing system. Each
+instrumentation module interfaces with the ``darshan-core`` component to
+coordinate its initialization and shutdown and to provide output I/O
+characterization data to be written to log.
+
+In general, there are two different methods an instrumentation module can use
+to initialize itself: static initialization at Darshan startup time or dynamic
+initialization within intercepted function calls during application execution.
+The initialization process should initialize module-specific data structures
+and register the module with the ``darshan-core`` component so it is included
+in the output I/O characterization.
+
+The static initialization approach is useful for modules that do not have
+function calls that can be intercepted and instead can just grab all I/O
+characterization data at Darshan startup or shutdown time. A module can be
+statically initialized at Darshan startup time by adding its initialization
+routine to the ``mod_static_init_fns`` array at the top of the
+``lib/darshan-core.c`` source file.
+
+.. note::
+   Modules may wish to add a corresponding configure option to disable the
+   module from attempting to gather I/O data. The ability to disable a module
+   using a configure option is especially necessary for system-specific modules
+   which can not be built or used on other systems.
+
+Most instrumentation modules can just bootstrap themselves within wrapper
+functions during normal application execution. Each of Darshan's current I/O
+library instrumentation modules (POSIX, MPI-IO, stdio, HDF5, PnetCDF) follow
+this approach. Each wrapper function should just include logic to initialize
+data structures and register with ``darshan-core`` if this initialization has
+not already occurred. Darshan intercepts function calls of interest by
+inserting these wrappers at compile time for statically linked executables
+(e.g., using the linkers ``--wrap`` mechanism) and at runtime for dynamically
+linked executables (using LD_PRELOAD).
+
+.. note::
+   Modules should not perform any I/O or communication within wrapper
+   functions. Darshan records I/O data independently on each application
+   process, then merges the data from all processes when the job is shutting
+   down. This defers expensive I/O and communication operations to the shutdown
+   process, minimizing Darshan's impact on application I/O performance.
+
+When the instrumented application terminates and Darshan begins its shutdown
+procedure, it requires a way to interface with any active modules that have
+data to contribute to the output I/O characterization.  The following function
+is implemented by each module to finalize (and perhaps reorganize) module
+records before returning the record memory back to darshan-core to be
+compressed and written to file.
+
+.. code-block:: C
+
+    typedef void (*darshan_module_shutdown)(
+        MPI_Comm mod_comm,
+        darshan_record_id *shared_recs,
+        int shared_rec_count,
+        void** mod_buf,
+        int* mod_buf_sz
+    );
+
+This function can be used to run collective MPI operations on module data; for
+instance, Darshan typically tries to reduce file records which are shared
+across all application processes into a single data record (more details on the
+shared file reduction mechanism are given in :ref:`Sec add instr`).  This
+function also serves as a final opportunity for modules to cleanup and free any
+allocated data structures, etc.
+
+* ``mod_comm`` is the MPI communicator to use for collective communication
+
+* ``shared_recs`` is a list of Darshan record identifiers that are shared across
+  all application processes
+
+* ``shared_rec_count`` is the size of the shared record list
+
+* ``mod_buf`` is a pointer to the buffer address of the module's contiguous set
+  of data records
+
+* ``mod_buf_sz`` is a pointer to a variable storing the aggregate size of the
+  module's records. On input, the pointed to value indicates the aggregate size
+  of the module's registered records; on output, the value may be updated if,
+  for instance, certain records are discarded
+
+darshan-core
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Within darshan-runtime, the darshan-core component manages the initialization
+and shutdown of the Darshan environment, provides an interface for modules to
+register themselves and their data records with Darshan, and manages the
+compressing and the writing of the resultant I/O characterization. As
+illustrated in Figure 1, the darshan-core runtime environment intercepts
+``MPI_Init`` and ``MPI_Finalize`` routines to initialize and shutdown the
+Darshan runtime environment, respectively.
+
+Each of the functions provided by ``darshan-core`` to interface with
+instrumentation modules are described in detail below.
+
+.. code-block:: C
+
+    void darshan_core_register_module(
+        darshan_module_id mod_id,
+        darshan_module_shutdown mod_shutdown_func,
+        int *mod_mem_limit,
+        int *rank,
+        int *sys_mem_alignment);
+
+The ``darshan_core_register_module`` function registers Darshan instrumentation
+modules with the ``darshan-core`` runtime environment. This function needs to
+be called once for any module that will contribute data to Darshan's final I/O
+characterization.
+
+* ``mod_id`` is a unique identifier for the given module, which is defined in the
+  Darshan log format header file (``darshan-log-format.h``).
+
+* ``mod_shutdown_func`` is the function pointer to the module shutdown function
+  described in the previous section.
+
+* ``inout_mod_buf_size`` is an input/output argument that stores the amount of
+  module memory being requested when calling the function and the amount of
+  memory actually reserved by darshan-core when returning.
+
+* ``rank`` is a pointer to an integer to store the calling process's application
+  MPI rank in.  ``NULL`` may be passed in to ignore this value.
+
+* ``sys_mem_alignment`` is a pointer to an integer which will store the system
+  memory alignment value Darshan was configured with. ``NULL`` may be passed in
+  to ignore this value.
+
+.. code-block:: C
+
+    void darshan_core_unregister_module(darshan_module_id mod_id);
+
+The ``darshan_core_unregister_module`` function disassociates the given module
+from the ``darshan-core`` runtime. Consequentially, Darshan does not interface
+with the given module at shutdown time and will not log any I/O data from the
+module. This function should only be used if a module registers itself with
+darshan-core but later decides it does not want to contribute any I/O data.
+Note that, in the current implementation, Darshan does not have the ability to
+reclaim the record memory allocated to the calling module to assign to other
+modules.
+
+* ``mod_id`` is the unique identifier for the module being unregistered.
+
+.. code-block:: C
+
+    darshan_record_id darshan_core_gen_record_id(const char *name);
+
+The ``darshan_core_gen_record_id`` function simply generates a unique record
+identifier for a given record name. This function is generally called to
+convert a name string to a unique record identifier that is needed to register
+a data record with darshan-core. The generation of IDs is consistent, such that
+modules which reference records with the same names will store these records
+using the same unique IDs, simplifying the correlation of these records for
+analysis.
+
+* ``name`` is the name of the corresponding data record (often times this is just
+  a file name).
+
+.. code-block:: C
+
+    void *darshan_core_register_record(
+        darshan_record_id rec_id,
+        const char *name,
+        darshan_module_id mod_id,
+        int rec_len,
+        int *fs_info);
+
+The ``darshan_core_register_record`` function registers a data record with the
+darshan-core runtime, allocating memory for the record so that it is persisted
+in the output log file.  This record could reference a POSIX file or perhaps an
+object identifier for an object storage system, for instance. This function
+should only be called once for each record being tracked by a module to avoid
+duplicating record memory. This function returns the address which the record
+should be stored at or ``NULL`` if there is insufficient memory for storing the
+record.
+
+* ``rec_id`` is a unique integer identifier for this record (generally generated
+  using the ``darshan_core_gen_record_id`` function).
+
+* ``name`` is the string name of the data record, which could be a file path,
+  object ID, etc.  If given, darshan-core will associate the given name with
+  the record identifier and store this mapping in the log file so it can be
+  retrieved for analysis. ``NULL`` may be passed in to generate an anonymous
+  (unnamed) record.
+
+* ``mod_id`` is the identifier for the module attempting to register this record.
+
+* ``rec_len`` is the length of the record.
+
+* ``fs_info`` is a pointer to a structure of relevant info for the file system
+  associated with the given record -- this structure is defined in the
+  ``darshan.h`` header. Note that this functionality only works for record
+  names that are absolute file paths, since we determine the file system by
+  matching the file path to the list of mount points Darshan is aware of.
+  ``NULL`` may be passed in to ignore this value.
+
+.. code-block:: C
+
+    double darshan_core_wtime(void);
+
+The ``darshan_core_wtime`` function simply returns a floating point number of
+seconds since Darshan was initialized. This functionality can be used to time
+the duration of application I/O calls or to store timestamps of when functions
+of interest were called.
+
+.. code-block:: C
+
+    double darshan_core_excluded_path(const char *path);
+
+The ``darshan_core_excluded_path`` function checks to see if a given file path
+is in Darshan's list of excluded file paths (i.e., paths that we don't
+instrument I/O to/from, such as /etc, /dev, /usr, etc.).
+
+* ``path`` is the absolute file path we are checking.
+
+darshan-common
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``darshan-common`` is a utility component of darshan-runtime, providing module
+developers with general functions that are likely to be reused across multiple
+modules. These functions are distinct from darshan-core functions since they do
+not require access to internal Darshan state.
+
+.. code-block:: C
+
+    char* darshan_clean_file_path(const char* path);
+
+The ``darshan_clean_file_path`` function just cleans up the input path string,
+converting relative paths to absolute paths and suppressing any potential noise
+within the string.  The address of the new string is returned and should be
+freed by the user.
+
+* ``path_`` is the input path string to be cleaned up.
+
+``darshan-common`` also currently includes functions for maintaining counters
+that store common I/O values (such as common I/O access sizes or strides used
+by an application), as well as functions for calculating the variance of a
+given counter across all processes.  As more modules are contributed, it is
+likely that more functionality can be refactored out of module implementations
+and maintained in darshan-common, facilitating code reuse and simplifying
+maintenance.
+
+Darshan-util
+-------------------------------------
+
+The darshan-util component is composed of a helper library for accessing log
+file data records (``libdarshan-util``) and a set of utilities that use this
+library to analyze application I/O behavior. ``libdarhan-util`` includes a
+generic interface (``darshan-logutils``) for retrieving specific components of
+a given log file. Specifically, this interface allows utilities to retrieve a
+log's header metadata, job details, record ID to name mapping, and any
+module-specific data contained within the log.
+
+``libdarshan-util`` additionally includes the definition of a generic module
+interface (``darshan-mod-logutils``) that may be implemented by modules to
+provide a consistent way for Darshan utilities to interact with module data
+stored in log files. This interface is necessary since each module has records
+of varying size and format, so module-specific code is needed to interact with
+the records in a generic manner. This interface is used by the
+``darshan-parser`` utility, for instance, to extract data records from all
+modules contained in a log file and to print these records in a consistent
+format that is amenable to further analysis by other tools.
+
+darshan-logutils
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Here we define each function in the ``darshan-logutils`` interface, which can
+be used to create new log utilities and to implement module-specific interfaces
+into log files.
+
+.. code-block:: C
+
+    darshan_fd darshan_log_open(const char *name);
+
+Opens Darshan log file stored at path ``name``. The log file must already exist
+and is opened for reading only. As part of the open routine, the log file
+header is read to set internal file descriptor data structures. Returns a
+Darshan file descriptor on success or ``NULL`` on error.
+
+.. code-block:: C
+
+    darshan_fd darshan_log_create(const char *name, enum darshan_comp_type comp_type, int partial_flag);
+
+Creates a new darshan log file for writing only at path ``name``. ``comp_type``
+denotes the underlying compression type used on the log file (currently either
+libz or bzip2) and ``partial_flag`` denotes whether the log is storing partial
+data (that is, all possible application file records were not tracked by
+darshan). Returns a Darshan file descriptor on success or ``NULL`` on error.
+
+.. code-block:: C
+
+    int darshan_log_get_job(darshan_fd fd, struct darshan_job *job);
+    int darshan_log_put_job(darshan_fd fd, struct darshan_job *job);
+
+Reads/writes ``job`` structure from/to the log file referenced by descriptor
+``fd``. The ``darshan_job`` structure is defined in ``darshan-log-format.h``.
+Returns ``0`` on success, ``-1`` on failure.
+
+.. code-block:: C
+
+    int darshan_log_get_exe(darshan_fd fd, char *buf);
+    int darshan_log_put_exe(darshan_fd fd, char *buf);
+
+Reads/writes the corresponding executable string (exe name and command line
+arguments) from/to the Darshan log referenced by ``fd``. Returns ``0`` on
+success, ``-1`` on failure.
+
+.. code-block:: C
+
+    int darshan_log_get_mounts(darshan_fd fd, char*** mnt_pts, char*** fs_types, int* count);
+    int darshan_log_put_mounts(darshan_fd fd, char** mnt_pts, char** fs_types, int count);
+
+Reads/writes mounted file system information for the Darshan log referenced by
+``fd``. ``mnt_pnts`` points to an array of strings storing mount points,
+``fs_types`` points to an array of strings storing file system types (e.g.,
+ext4, nfs, etc.), and ``count`` points to an integer storing the total number
+of mounted file systems recorded by Darshan. Returns ``0`` on success, ``-1``
+on failure.
+
+.. code-block:: C
+
+    int darshan_log_get_namehash(darshan_fd fd, struct darshan_name_record_ref **hash);
+    int darshan_log_put_namehash(darshan_fd fd, struct darshan_name_record_ref *hash);
+
+Reads/writes the hash table of Darshan record identifiers to full names for all
+records contained in the Darshan log referenced by ``fd``. ``hash`` is a
+pointer to the hash table (of type ``struct darshan_name_record_ref *``), which
+should be initialized to ``NULL`` for reading. This hash table is defined by
+the ``uthash`` hash table implementation and includes corresponding macros for
+searching, iterating, and deleting records from the hash. For detailed
+documentation on using this hash table, consult ``uthash`` documentation in
+``darshan-util/uthash-1.9.2/doc/txt/userguide.txt``.  The ``darshan-parser``
+utility (for parsing module information out of a Darshan log) provides an
+example of how this hash table may be used. Returns ``0`` on success, ``-1`` on
+failure.
+
+.. code-block:: C
+
+    int darshan_log_get_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz);
+    int darshan_log_put_mod(darshan_fd fd, darshan_module_id mod_id, void *mod_buf, int mod_buf_sz, int ver);
+
+Reads/writes a chunk of (uncompressed) module data for the module identified by
+``mod_id`` from/to the Darshan log referenced by ``fd``. ``mod_buf`` is the
+buffer to read data into or write data from, and ``mod_buf_sz`` is the
+corresponding size of the buffer. The ``darshan_log_getmod`` routine can be
+repeatedly called to retrieve chunks of uncompressed data from a specific
+module region of the log file given by ``fd``. The ``darshan_log_putmod``
+routine just continually appends data to a specific module region in the log
+file given by ``fd`` and accepts an additional ``ver`` parameter indicating the
+version number for the module data records being written. These functions
+return the number of bytes read/written on success, ``-1`` on failure.
+
+.. note::
+   Darshan use a "reader makes right" conversion strategy to rectify Endianness
+   issues between the machine a log was generated on and a machine analyzing
+   the log. Accordingly, module-specific log utility functions will need to
+   check the ``swap_flag`` variable of the Darshan file descriptor to determine
+   if byte swapping is necessary. 32-bit and 64-bit byte swapping macros
+   (DARSHAN_BSWAP32/DARSHAN_BSWAP64) are provided in ``darshan-logutils.h``.
+
+.. code-block:: C
+
+    void darshan_log_close(darshan_fd fd);
+
+Close Darshan file descriptor ``fd``. This routine *must* be called for newly
+created log files, as it flushes pending writes and writes a corresponding log
+file header before closing.
+
+.. note::
+   For newly created Darshan log files, care must be taken to write log file
+   data in the correct order, since the log file write routines basically are
+   appending data to the log file.  The correct order for writing all log file
+   data to file is: (1) job data, (2) exe string, (3) mount data, (4) record id
+   -> file name map, (5) each module's data, in increasing order of module
+   identifiers.
+
+darshan-mod-logutils
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``darshan-mod-logutils`` interface provides a convenient way to implement
+new log functionality across all Darshan instrumentation modules, which can
+potentially greatly simplify the development of new Darshan log utilities.
+These functions are defined in the ``darshan_mod_logutil_funcs`` structure in
+``darshan-logutils.h`` -- instrumentation modules simply provide their own
+implementation of each function, then utilities can leverage this functionality
+using the ``mod_logutils`` array defined in ``darshan-logutils.c``. A
+description of some of the currently implemented functions are provided below.
+
+.. code-block:: C
+
+    int log_get_record(darshan_fd fd, void **buf);
+    int log_put_record(darshan_fd fd, void *buf);
+
+Reads/writes the module record stored in ``buf`` to the log referenced by
+``fd``. Notice that a size parameter is not needed since the utilities calling
+this interface will likely not know the record size -- the module-specific log
+utility code can determine the corresponding size before reading/writing the
+record from/to file.
+
+.. note::
+   ``log_get_record`` takes a pointer to a buffer address rather than just the
+   buffer address.  If the pointed to address is equal to ``NULL``, then record
+   memory should be allocated instead. This functionality helps optimize memory
+   usage, since utilities often don't know the size of records being accessed
+   but still must provide a buffer to read them into.
+
+.. code-block:: C
+
+    void log_print_record(void *rec, char *name, char *mnt_pt, char *fs_type);
+
+Prints all data associated with the record pointed to by ``rec``. ``name``
+holds the corresponding name string for this record. ``mnt_pt`` and ``fs_type``
+hold the corresponding mount point path and file system type strings associated
+with the record (only valid for records with names that are absolute file
+paths).
+
+.. code-block:: C
+
+    void log_print_description(int ver);
+
+Prints a description of the data stored within records for this module (with
+version number ``ver``).
+
+.. _Sec add instr:
+
+Adding new instrumentation modules
+==============================================
+
+In this section we outline each step necessary for adding a module to Darshan.
+To assist module developers, we have provided the example "NULL" module as part
+of the Darshan source tree (``darshan-null-log-format.h``,
+``darshan-runtime/lib/darshan-null.c``, and
+``darshan-util/darshan-null-logutils.*``) This example can be used as a minimal
+stubbed out module implementation that is heavily annotated to further clarify
+how modules interact with Darshan and to provide best practices to future
+module developers. For full-fledged module implementation examples, developers
+are encouraged to examine the POSIX and MPI-IO modules.
+
+Log format headers
+-------------------------------------
+
+The following modifications to Darshan log format headers are required for
+defining the module's record structure:
+
+* Add a module identifier to the ``DARSHAN_MODULE_IDS`` macro at the top of the
+  ``darshan-log-format.h`` header. In this macro, the first field is a
+  corresponding enum value that can be used to identify the module, the second
+  field is a string name for the module, the third field is the current version
+  number of the given module's log format, and the fourth field is a
+  corresponding pointer to a Darshan log utility implementation for this module
+  (which can be set to ``NULL`` until the module has its own log utility
+  implementation).
+
+* Add a top-level header that defines an I/O data record structure for the
+  module. Consider the "NULL" module and POSIX module log format headers for
+  examples (``darshan-null-log-format.h`` and ``darshan-posix-log-format.h``,
+  respectively).
+
+These log format headers are defined at the top level of the Darshan source
+tree, since both the darshan-runtime and darshan-util repositories depend on
+their definitions.
+
+Darshan-runtime
+-------------------------------------
+
+Build modifications
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following modifications to the darshan-runtime build system are necessary
+to integrate new instrumentation modules:
+
+* Necessary linker flags for inserting this module's wrapper functions need to
+  be added to a module-specific file which is used when linking applications
+  with Darshan.  For an example, consider
+  ``darshan-runtime/share/ld-opts/darshan-posix-ld-opts``, the required linker
+  options for the POSIX module. The base linker options file for Darshan
+  (``darshan-runtime/share/ld-opts/darshan-base-ld-opts.in``) must also be
+  updated to point to the new module-specific linker options file.
+
+* Targets must be added to ``Makefile.in`` to build static and shared objects
+  for the module's source files, which will be stored in the
+  ``darshan-runtime/lib/`` directory.  The prerequisites to building static and
+  dynamic versions of ``libdarshan`` must be updated to include these objects,
+  as well.
+
+  - If the module defines a linker options file, a rule must also be added to
+    install this file with libdarshan.
+
+Instrumentation module implementation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In addtion to the development notes from above and the exemplar "NULL" and
+POSIX modules, we provide the following notes to assist module developers:
+
+* Modules only need to include the ``darshan.h`` header to interface with
+  darshan-core.
+
+* The file record identifier given when registering a record with darshan-core
+  should be used to store the record structure in a hash table or some other
+  structure.
+
+  - Subsequent calls that need to modify this record can then use the
+    corresponding record identifier to lookup the record in this local hash
+    table.
+  - It may be necessary to maintain a separate hash table for other handles
+    which the module may use to refer to a given record. For instance, the
+    POSIX module may need to look up a file record based on a given file
+    descriptor, rather than a path name.
+
+Darshan-util
+-------------------------------------
+
+Build modifications
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following modifications to the darshan-util build system are necessary to
+integrate new instrumentation modules:
+
+* Update ``Makefile.in`` with new targets necessary for building
+  module-specific logutil source.
+
+  - Make sure to add the module's logutil implementation objects as a
+    prerequisite for building ``libdarshan-util``.
+  - Make sure to update ``all``, ``clean``, and ``install`` rules to reference
+    updates.
+
+Module-specific logutils and utilities
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For a straightforward reference implementation of module-specific log utility
+functions, consider the implementations for the NULL module
+(``darshan-util/darshan-null-logutils.*``) and the POSIX module
+(``darshan-util/darshan-posix-logutils.*``). These module-specific log utility
+implementations are built on top of the ``darshan_log_getmod()`` and
+``darshan_log_putmod()`` functions, and are used to read/write complete module
+records from/to file.
+
+Also, consider the ``darshan-parser`` source code for an example of a utility
+which can leverage ``libdarshan-util`` for analyzing the contents of a Darshan
+I/O characterization log with data from arbitrary instrumentation modules.
+
+.. _Sec shared record:
+
+Shared record reductions
+==============================================
+
+Since Darshan prefers to aggregate data records which are shared across all
+processes into a single data record, module developers should consider
+implementing this functionality eventually, though it is not strictly required.
+
+Module developers should implement the shared record reduction mechanism within
+the module's ``darshan_module_shutdown()`` function, as it provides an MPI
+communicator for the module to use for collective communication and a list of
+record identifiers which are shared globally by the module (as described in
+:ref:`Sec darshan-runtime`).
+
+In general, implementing a shared record reduction involves the following
+steps:
+
+* reorganizing shared records into a contiguous region in the buffer of module
+  records
+
+* allocating a record buffer to store the reduction output on application rank
+  0
+
+* creating an MPI reduction operation using the ``MPI_Op_create()`` function
+  (see more in `MPI_Op_create manpage
+  <http://www.mpich.org/static/docs/v3.1/www3/MPI_Op_create.html>`_).
+
+* reducing all shared records using the created MPI reduction operation and the
+  send and receive buffers described above
+
+For a more in-depth example of how to use the shared record reduction
+mechanism, consider the implementations of this in the POSIX or MPI-IO modules.
+
+Other resources
+==============================================
+
+* `Darshan GitLab page <https://xgitlab.cels.anl.gov/darshan/darshan>`_
+* `Darshan project website <http://www.mcs.anl.gov/research/projects/darshan/>`_
+* :ref:`TOC Darshan Runtime`
+* :ref:`TOC Darshan Utilities`
diff --git a/index.rst b/index.rst
index a590fdbcd..8563d3298 100644
--- a/index.rst
+++ b/index.rst
@@ -1,7 +1,76 @@
-Welcome to Darshan's documentation!
+Welcome to Darshan's Documentation
 ===================================
 
-.. note::
+The Darshan source tree is divided into two parts:
 
-   This project is under active development.
+* ``darshan-runtime``: to be installed on systems where you intend to
+  instrument MPI applications.
+* ``darshan-util``: to be installed on systems where you intend to analyze log
+  files produced by darshan-runtime.
+
+  + ``darshan-util/pydarshan``: a Python package providing interfaces to
+    Darshan log file.
+
+:ref:`Modularized I/O characterization using Darshan 3.x <TOC Modularization>`
+gives details on the design of the new modularized version of Darshan (3.x)
+and how new instrumentation modules may be developed within Darshan.
+
+Site-specific documentation for facilities that deploy Darshan in production:
+
+* Argonne Leadership Computing Facility (ALCF):
+  `Theta <https://www.alcf.anl.gov/support-center/theta/darshan-theta>`_,
+  `Cooley <https://www.alcf.anl.gov/support-center/cooley/darshan-cooley>`_.
+* National Energy Research Scientific Computing Center
+  (`NERSC <https://docs.nersc.gov/environment/#darshan-and-altd>`_)
+* National Center for Supercomputing Applications
+  (`NCSA <https://bluewaters.ncsa.illinois.edu/darshan>`_)
+* Oak Ridge Leadership Computing Facility (OLCF):
+  `darshan-runtime <https://www.olcf.ornl.gov/software_package/darshan-runtime/>`_,
+  `darshan-util <https://www.olcf.ornl.gov/software_package/darshan-util/>`_.
+* King Abdullah University of Science and Technology
+  (`KAUST <https://www.hpc.kaust.edu.sa/sites/default/files/files/public/Parallel_IO_bh.pdf>`_)
+* European Centre for Medium-Range Weather Forecasts
+  (`ECMWF <https://software.ecmwf.int/wiki/display/UDOC/How+to+use+Darshan+to+profile+IO>`_)
+* Ohio Supercomputer Center
+  (`OSC <https://www.osc.edu/resources/available_software/software_list/darshan>`_)
+* Julich Supercomputing Centre
+  (`JSC <https://apps.fz-juelich.de/unite/files/DebugAndPerformanceTools-latest.pdf>`_)
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Darshan Runtime
+   :name: TOC Darshan Runtime
+
+   darshan-runtime/doc/darshan-runtime
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Darshan Utilities
+   :name: TOC Darshan Utilities
+
+   darshan-util/doc/darshan-util
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Modularized I/O characterization
+   :name: TOC Modularization
+
+   doc/darshan-modularization.rst
+
+.. toctree::
+   :maxdepth: 2
+   :caption: PyDarshan
+   :name: pydarshantoc
+
+   darshan-util/pydarshan/docs/readme
+   darshan-util/pydarshan/docs/install
+   darshan-util/pydarshan/docs/usage
+   darshan-util/pydarshan/docs/api/pydarshan/modules
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
 
diff --git a/readthedocs/DEVELOPER_NOTES.md b/readthedocs/DEVELOPER_NOTES.md
new file mode 100644
index 000000000..0b019295c
--- /dev/null
+++ b/readthedocs/DEVELOPER_NOTES.md
@@ -0,0 +1,68 @@
+## Notes for Darshan developers working on documents published on readthedocs.io
+
+### Sign up/log into readthedocs.io
+
+* Visit [ReadtheDocs](https://about.readthedocs.com) to sign up or log into an
+  account. You can choose "Sign up with Github" option.
+* After logged into the web site, it will leads you to the readthedocs dash
+  board, which shows a list of your projects.
+* [Read the Docs tutorial](https://docs.readthedocs.com/platform/latest/tutorial/index.html)
+  contains detailed information about how to sign up a user count and set up
+  the connection to Darshan's github repo.
+
+### Create Darshan project on Read the Docs
+
+* Click "Add project"
+* In field "Repository name", enter the Darshan github repo name,
+  "darshan-hpc/darshan", and then click "Continue".
+* The default settings of Name, Repository URL, Default branch, and Language
+  will be pre-filled. Customize them if necessary. Then click "Next".
+* As configure file `.readthedocs.yml` is required in the root folder of github
+  repo, click "This file exists" to let it add a default file.
+* This will lead you to the readthedocs dashboard and the very first build
+  should be showing triggered and in progress.
+* Note that this will also add a webhook to the Darshan github repo. See it
+  from Darshan github repo's Settings, and then Webhooks.
+
+### Change settings of Darshan project on readthedocs Dashboard
+
+* Visit your dashboard at https://app.readthedocs.org/dashboard/
+* Select Darshan project
+* Click "Settings" on right.
+  + At the bottom of this page, select "Build pull requests for this project"
+    and click "Save". This will enable rebuild Darshan documents for all pull
+    requests.
+  + Add a new project maintainer
+    * Click "Maintainers" on left
+    * Click "Add maintainer" button.
+  + Enable Analytics
+    * Click "Addons" on left
+    * Click "Analytics" tab, select "Analytics enabled" button, and "Save".
+  + Environment variables
+    * Environment variables set here are for readthedocs to use, for example
+      `DARSHAN_INSTALL_PREFIX` is set to the location of installation location
+      of Darshan.
+    * Change existing variable must be done by first deleting it and add a new
+      one.
+
+### Configuration files
+
+* File `conf.py` must be stored in the root folder of Darshan's repo.
+* File `.readthedocs.yaml` must be stored in the root folder of Darshan's repo.
+* Darshan's documents require `darshan-util` to be built and installed first,
+  before installing pydarshan. See the settings of `pre_install` in file
+  `.readthedocs.yaml`.
+* Building of `darshan-runtime` is not required.
+* Mater file is `index.rst` must be stored in the root folder.
+* File `index.rst` includes the following documents.
+  + darshan-runtime/doc/darshan-runtime
+  + darshan-util/doc/darshan-util
+  + docs/darshan-modularization.rst
+  + darshan-util/pydarshan/docs/readme
+  + darshan-util/pydarshan/docs/install
+  + darshan-util/pydarshan/docs/usage
+  + darshan-util/pydarshan/docs/api/pydarshan/modules
+
+
+
+