diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4f81a1..c35775f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,3 +43,40 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml flags: unittests + + docs: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install -q --no-cache-dir --use-feature=2020-resolver -e .[docs] + python -m pip list + sudo apt-get update + sudo apt-get -qq install pandoc + - name: Test and build docs + run: | + python setup.py build_sphinx + touch docs/_build/html/.nojekyll + - name: Deploy docs to GitHub Pages + if: success() && github.event_name == 'push' && github.ref == 'refs/heads/master' + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/_build/html + force_orphan: true + user_name: 'github-actions[bot]' + user_email: 'github-actions[bot]@users.noreply.github.com' + commit_message: Deploy to GitHub pages diff --git a/README.md b/README.md index aab5333..0ec1867 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Options: ## Authors -`pyhf-benchmark` is openly developed by [Bo Zheng](https://iris-hep.org/fellows/BoZheng.html) and the [`pyhf` dev team](https://scikit-hep.org/pyhf/#authors). +`pyhf-benchmark` is openly developed by [Bo Zheng](https://iris-hep.org/fellows/BoZheng.html) and the [pyhf dev team](https://scikit-hep.org/pyhf/#authors). Please check the [contribution statistics for a list of contributors.](https://github.com/pyhf/pyhf-benchmark/graphs/contributors) diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..0099d02 --- /dev/null +++ b/README.rst @@ -0,0 +1,82 @@ +pyhf-benchmark +============== + +|GitHub Project| |GitHub Actions Status: CI| |Code Coverage| |Code +style: black| + +Benchmarking of hardware acceleration of ``pyhf`` + +Environment +----------- + +For the time being, until a library can be created, use the +``requirements.txt`` to also serve setup duty in your virtual +environment in addition to providing a reproducible benchmarking +environment. + +:: + + (pyhf-benchmark) $ python -m pip install -r requirements.txt + +Usage +----- + +:: + + $ pyhf-benchmark run --help + Usage: pyhf-benchmark run [OPTIONS] + + Automatic process of taking pyhf computation. + + Usage: + + $ pyhf-benchmark run -c [-b] [-p] [-u] [-m] [-n] [-mm] + + Examples: + + $ pyhf-benchmark run -c mle -b numpy -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100] | + $ pyhf-benchmark run -c mle -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100] | + $ pyhf-benchmark run -c mle -b numpy -p 1Lbb-likelihoods-hepdata -m [750,100] | + $ pyhf-benchmark run -c interpolation -b jax -n 0 -mm fast | + $ pyhf-benchmark run -c interpolation -b numpy -n 0 -mm slow | + + More information: + + https://github.com/pyhf/pyhf-benchmark + + + + Options: + -c, -computation TEXT Type of computation [required] + -b, --backend TEXT Name of the pyhf backend to run with. + -p, --path TEXT Local path of workspace. + -u, --url TEXT Online data link. + -m, --model-point TEXT Model point. + -n, --number TEXT Number. + -mm, --mode TEXT Mode. + -h, --help Show this message and exit. + +Authors +------- + +``pyhf-benchmark`` is openly developed by `Bo +Zheng `__ and the `pyhf dev +team `__. + +Please check the `contribution statistics for a list of +contributors. `__ + +Acknowledgements +---------------- + +Bo Zheng was awarded an `IRIS-HEP +Fellowship `__ for this work. + +.. |GitHub Project| image:: https://img.shields.io/badge/GitHub--blue?style=social&logo=GitHub + :target: https://github.com/pyhf/pyhf-benchmark +.. |GitHub Actions Status: CI| image:: https://github.com/pyhf/pyhf-benchmark/workflows/CI/badge.svg?branch=master + :target: https://github.com/pyhf/pyhf-benchmark/actions?query=workflow%3ACI+branch%3Amaster +.. |Code Coverage| image:: https://codecov.io/gh/pyhf/pyhf-benchmark/graph/badge.svg?branch=master + :target: https://codecov.io/gh/pyhf/pyhf-benchmark?branch=master +.. |Code style: black| image:: https://img.shields.io/badge/code%20style-black-000000.svg + :target: https://github.com/psf/black diff --git a/docs/.ackrc b/docs/.ackrc new file mode 100644 index 0000000..3796f7d --- /dev/null +++ b/docs/.ackrc @@ -0,0 +1 @@ +--ignore-directory=docs/_generated \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d14e46f --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,25 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build +GENERATEDDIR = _generated + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: clean +clean: + rm -rf $(GENERATEDDIR)/* \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/_templates/modifierclass.rst b/docs/source/_templates/modifierclass.rst new file mode 100644 index 0000000..8e46efd --- /dev/null +++ b/docs/source/_templates/modifierclass.rst @@ -0,0 +1,30 @@ +:github_url: https://github.com/pyhf/pyhf-benchmark/blob/master/{{module | replace(".", "/") }} + +{{ name | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ name }} + :show-inheritance: + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + {% for item in attributes %} + .. autoattribute:: {{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block methods %} + {% if methods %} + .. rubric:: Methods + + {% for item in methods %} + {% if item not in inherited_members %} + .. automethod:: {{ name }}.{{ item }} + {% endif %} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..6b72a9b --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,98 @@ +Python API +========== + +Run +----------------------------------------- + +.. currentmodule:: pyhf_benchmark.run + +.. autosummary:: + :toctree: _generated/ + + run + +Load +----------------------------------------- + +.. currentmodule:: pyhf_benchmark.load + +.. autosummary:: + :toctree: _generated/ + + download + open_local_file + delete_downloaded_file + +Maximum Likelihood Computation +----------------------------------------- + +.. currentmodule:: pyhf_benchmark.mle + +.. autosummary:: + :toctree: _generated/ + + get_bkg_and_signal + calculate_CLs + +Plot +-------- + +.. currentmodule:: pyhf_benchmark.plot + +.. autosummary:: + :toctree: _generated/ + + load + load_all + plot + subplot + plot_comb + subplot_comb + +Manager +---------- + +.. currentmodule:: pyhf_benchmark.manager + +.. autosummary:: + :toctree: _generated/ + :nosignatures: + :template: modifierclass.rst + + RunManager + +SystemStats +------------- + +.. currentmodule:: pyhf_benchmark.stats + +.. autosummary:: + :toctree: _generated/ + :nosignatures: + :template: modifierclass.rst + + SystemStats + gpu_in_use_by_this_process + +JsonlEventsFile +----------------- + +.. currentmodule:: pyhf_benchmark.jsonlfile + +.. autosummary:: + :toctree: _generated/ + :nosignatures: + :template: modifierclass.rst + + JsonlEventsFile + + +Utilities +--------- + +.. currentmodule:: pyhf_benchmark.util + +.. autosummary:: + :toctree: _generated/ + + random_histosets_alphasets_pair diff --git a/docs/source/cli.rst b/docs/source/cli.rst new file mode 100644 index 0000000..4775d38 --- /dev/null +++ b/docs/source/cli.rst @@ -0,0 +1,6 @@ +Command Line API +================ + +.. click:: pyhf_benchmark.cli.cli:pyhf_benchmark + :prog: pyhf_benchmark + :show-nested: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..1786222 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,99 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) +from pathlib import Path +import sys + +sys.path.insert(0, str(Path("../src").resolve())) +sys.path.insert(1, str(Path("./exts").resolve())) + + +def setup(app): + app.add_css_file( + "https://cdnjs.cloudflare.com/ajax/libs/github-fork-ribbon-css/0.2.2/gh-fork-ribbon.min.css" + ) + + +# -- Project information ----------------------------------------------------- + +project = "pyhf-benchmark" +copyright = "2020, Bo Zheng" +author = "Bo Zheng" + +# The full version, including alpha/beta/rc tags +release = "0.0.1" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + "sphinx.ext.githubpages", + "sphinxcontrib.bibtex", + "sphinx.ext.napoleon", + "sphinx_click.ext", + "nbsphinx", + "sphinx_issues", + "sphinx_copybutton", + "xref", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# external links +xref_links = {"arXiv:1007.1727": ("[1007.1727]", "https://arxiv.org/abs/1007.1727")} + +# Github repo +issues_github_path = "pyhf/pyhf-benchmark" + +# Generate the API documentation when building +autosummary_generate = True +numpydoc_show_class_members = False + +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +source_suffix = [".rst", ".md"] diff --git a/docs/source/development.rst b/docs/source/development.rst new file mode 100644 index 0000000..0b330ac --- /dev/null +++ b/docs/source/development.rst @@ -0,0 +1,2 @@ +Testing +------- \ No newline at end of file diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 0000000..a4150cb --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,15 @@ +Examples +======== + +Try out in Binder! |Binder| + +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: examples/notebooks/demo.ipynb + +Notebooks: + +.. toctree:: + :maxdepth: 2 + :glob: + + examples/notebooks/* diff --git a/docs/source/examples/notebooks/demo.ipynb b/docs/source/examples/notebooks/demo.ipynb new file mode 100644 index 0000000..008d7aa --- /dev/null +++ b/docs/source/examples/notebooks/demo.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Check pyhf-benchmark version" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pyhf-benchmark, version 0.0.1\n" + ] + } + ], + "source": [ + "pyhf-benchmark --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test performance of CPU for maximum likelihood computation using numpy backend" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset: 1Lbb-likelihoods-hepdata\n", + "Backend set to: numpy\n", + "\n", + "Starting fit\n", + "\n", + "fit 1Lbb-likelihoods-hepdata in 0:00:22.935410 seconds\n", + "\n", + "CLs_obs: 0.06628434618085545\n", + "CLs_exp: [9.517591120621866e-05, 0.0010228903813534387, 0.00946311787745848, 0.06581424960195661, 0.28239514754522566]\n" + ] + } + ], + "source": [ + "pyhf-benchmark run -c mle -b numpy -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "run_20200817_1597677709\n" + ] + } + ], + "source": [ + "cd output/\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1Lbb-likelihoods-hepdata_mle_numpy\n" + ] + } + ], + "source": [ + "cd run_20200817_1597677709\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU_Threads.png\t\t\tProc_Memory_MB.png\n", + "CPU_Utilization.png\t\tProc_Memory_Percent.png\n", + "Disk_IO_Utilization.png\t\tProc_Memory_available.png\n", + "Memory_Utilization.png\t\tevents.jsonl\n", + "Network_Traffic.png\n" + ] + } + ], + "source": [ + "cd 1Lbb-likelihoods-hepdata_mle_numpy\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test performance of CPU for maximum likelihood computation using numpy, jax, tensorflow and pytorch backends" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset: 1Lbb-likelihoods-hepdata\n", + "Backend set to: numpy\n", + "\n", + "Starting fit\n", + "\n", + "fit 1Lbb-likelihoods-hepdata in 0:00:22.913646 seconds\n", + "\n", + "CLs_obs: 0.06628434618085545\n", + "CLs_exp: [9.517591120621866e-05, 0.0010228903813534387, 0.00946311787745848, 0.06581424960195661, 0.28239514754522566]\n", + "Backend set to: jax\n", + "\n", + "Starting fit\n", + "\n", + "fit 1Lbb-likelihoods-hepdata in 0:00:21.528265 seconds\n", + "\n", + "CLs_obs: 0.0662840218994314\n", + "CLs_exp: [9.516724344736203e-05, 0.0010228157686477024, 0.009462595024861676, 0.06581172517487545, 0.2823886518706529]\n", + "Backend set to: tensorflow\n", + "\n", + "Starting fit\n", + "\n", + "WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/array_grad.py:644: _EagerTensorBase.cpu (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use tf.identity instead.\n", + "WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:297: setdiff1d (from tensorflow.python.ops.array_ops) is deprecated and will be removed after 2018-11-30.\n", + "Instructions for updating:\n", + "This op will be removed after the deprecation date. Please switch to tf.sets.difference().\n", + "fit 1Lbb-likelihoods-hepdata in 0:00:12.043619 seconds\n", + "\n", + "CLs_obs: 0.0662931278347969\n", + "CLs_exp: [9.5105112e-05 1.0222825e-03 9.4588548e-03 6.5793678e-02 2.8234217e-01]\n", + "Backend set to: pytorch\n", + "\n", + "Starting fit\n", + "\n", + "fit 1Lbb-likelihoods-hepdata in 0:00:05.200849 seconds\n", + "\n", + "CLs_obs: 0.0662882849574089\n", + "CLs_exp: [9.481733286520466e-05, 0.001019802875816822, 0.009441475383937359, 0.06570970267057419, 0.2821260690689087]\n" + ] + } + ], + "source": [ + "pyhf-benchmark run -c mle -b [numpy,jax,tensorflow,pytorch] -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "run_20200817_1597677709\trun_20200817_1597678017\n" + ] + } + ], + "source": [ + "cd output/\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1Lbb-likelihoods-hepdata_mle_jax\tDisk_IO_Utilization.png\n", + "1Lbb-likelihoods-hepdata_mle_numpy\tMemory_Utilization.png\n", + "1Lbb-likelihoods-hepdata_mle_pytorch\tNetwork_Traffic.png\n", + "1Lbb-likelihoods-hepdata_mle_tensorflow\tProc_Memory_MB.png\n", + "CPU_Threads.png\t\t\t\tProc_Memory_Percent.png\n", + "CPU_Utilization.png\t\t\tProc_Memory_available.png\n" + ] + } + ], + "source": [ + "cd run_20200817_1597678017\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test performance of CPU for interpolation computation using numpy backend" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "pyhf-benchmark run -c interpolation -b numpy -n 0 -mm slow" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "run_20200817_1597677709\trun_20200817_1597678017\trun_20200817_1597678299\n" + ] + } + ], + "source": [ + "cd output/\n", + "ls" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random_interpolation_numpy\n" + ] + } + ], + "source": [ + "cd run_20200817_1597678299\n", + "ls" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3" + }, + "language_info": { + "codemirror_mode": "shell", + "file_extension": ".sh", + "mimetype": "text/x-sh", + "name": "bash" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [ + "Demo\n", + "==============" + ], + "metadata": { + "collapsed": false + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/docs/source/exts/xref.py b/docs/source/exts/xref.py new file mode 100644 index 0000000..8c92629 --- /dev/null +++ b/docs/source/exts/xref.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +from docutils import nodes + +from sphinx.util import caption_ref_re + + +def xref(text, options=None, content=None): + # avoid mutable defaults + _ = {} if options is None else options + _ = [] if content is None else content + + title = target = text + # titleistarget = True + # look if explicit title and target are given with `foo ` syntax + brace = text.find("<") + if brace != -1: + # titleistarget = False + m = caption_ref_re.match(text) + if m: + target = m.group(2) + title = m.group(1) + else: + # fallback: everything after '<' is the target + target = text[brace + 1 :] + title = text[:brace] + + link = xref.links[target] + + if brace != -1: + pnode = nodes.reference(target, title, refuri=link[1]) + else: + pnode = nodes.reference(target, link[0], refuri=link[1]) + + return [pnode], [] + + +def get_refs(app): + + xref.links = app.config.xref_links + + +def setup(app): + + app.add_config_value("xref_links", {}, True) + app.add_role("xref", xref) + app.connect("builder-inited", get_refs) diff --git a/docs/source/faq.rst b/docs/source/faq.rst new file mode 100644 index 0000000..cde88d8 --- /dev/null +++ b/docs/source/faq.rst @@ -0,0 +1,2 @@ +FAQ +=== diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..723a9ed --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +.. pyhf-benchmark documentation master file, created by + sphinx-quickstart on Mon Jul 27 21:00:57 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. toctree:: + :hidden: + + intro + examples + installation + development + faq + cli + api + +.. raw:: html + + Fork me on GitHub + +.. include:: ../../README.rst + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/installation.rst b/docs/source/installation.rst new file mode 100644 index 0000000..11e4437 --- /dev/null +++ b/docs/source/installation.rst @@ -0,0 +1,2 @@ +Installation +============ diff --git a/docs/source/intro.rst b/docs/source/intro.rst new file mode 100644 index 0000000..e8b5eb2 --- /dev/null +++ b/docs/source/intro.rst @@ -0,0 +1,3 @@ +Introduction +============ + diff --git a/setup.cfg b/setup.cfg index 9a4f96b..9ce98f2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,8 +2,8 @@ name = pyhf_benchmark version = 0.0.1 description = Benchmarking of hardware acceleration of pyhf -long_description = file: README.md -long_description_content_type = text/markdown +long_description = file: README.rst +long_description_content_type = text/x-rst url = https://github.com/pyhf/pyhf-benchmark author = Bo Zheng, Lukas Heinrich, Matthew Feickert, Giordon Stark author_email = bozheng96@gmail.com, lukas.heinrich@cern.ch, matthew.feickert@cern.ch, gstark@cern.ch diff --git a/setup.py b/setup.py index c8df5b1..1d944ad 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ + ["check-manifest", "bumpversion~=0.5", "pre-commit", "twine",] ) ) +extras_require["docs"] = sorted(set(["sphinx"])) extras_require["complete"] = sorted(set(sum(extras_require.values(), []))) setuptools.setup(extras_require=extras_require,) diff --git a/src/pyhf_benchmark/jsonlfile.py b/src/pyhf_benchmark/jsonlfile.py index e9f6d8f..e6b060b 100644 --- a/src/pyhf_benchmark/jsonlfile.py +++ b/src/pyhf_benchmark/jsonlfile.py @@ -5,9 +5,15 @@ class JsonlEventsFile(object): - """Used to store events during a run. """ + """Used to store information of CPU and GPU work status during a run.""" def __init__(self, start_time, fname, out_dir="."): + """ + Args: + start_time: Start time of a run + fname: output JSON filename + out_dir: output directory + """ self._start_time = start_time self.fname = out_dir / fname self.buffer = [] @@ -21,6 +27,9 @@ def __init__(self, start_time, fname, out_dir="."): self.load() def load(self): + """ + Load output JSON file and set file pointer to the end of the file. + """ last_row = {} with self.fname.open("r+") as f: for line in f: @@ -33,6 +42,12 @@ def load(self): self._start_time -= last_row["_runtime"] def flatten(self, dictionary): + """ + Flatten nested dictionary. + + Args: + dictionary: CPU and GPU work status contents + """ if isinstance(dictionary, dict): for k, v in list(dictionary.items()): if isinstance(v, dict): @@ -42,7 +57,14 @@ def flatten(self, dictionary): dictionary[k + "." + k2] = v2 def track(self, event, properties, timestamp=None): + """ + Flush work status back to output file. + Args: + event: Event name + properties: CPU and GPU work status contents + timestamp: Time stamp + """ self.lock.acquire() try: row = {} @@ -58,6 +80,9 @@ def track(self, event, properties, timestamp=None): os.fsync(self._file.fileno()) def close(self): + """ + Close output file and release the lock. + """ self.lock.acquire() try: if self._file: diff --git a/src/pyhf_benchmark/load.py b/src/pyhf_benchmark/load.py index cbd35c7..93db59c 100644 --- a/src/pyhf_benchmark/load.py +++ b/src/pyhf_benchmark/load.py @@ -6,7 +6,15 @@ def download(url): - """Download online data""" + """ + Download online data. + + Args: + url: URL for downloaded dataset + + Returns: + directory_name: directory for downloaded dataset + """ response = requests.get(url, stream=True) assert response.status_code == 200 @@ -25,10 +33,24 @@ def download(url): def open_local_file(file_path): - """Open local source files""" + """ + Open local source files + + Args: + file_path: Local path for downloaded dataset + + Returns: + directory_name: directory for local dataset + """ directory_name = Path("../data/" + file_path) return directory_name def delete_downloaded_file(directory_name): + """ + Delete downloaded dataset. + + Args: + directory_name: directory for downloaded dataset + """ shutil.rmtree(directory_name) diff --git a/src/pyhf_benchmark/manager.py b/src/pyhf_benchmark/manager.py index 09fe2a7..19b5bab 100644 --- a/src/pyhf_benchmark/manager.py +++ b/src/pyhf_benchmark/manager.py @@ -5,7 +5,13 @@ class RunManager(object): + """Manages a run's process and plot the running results.""" + def __init__(self, meta=None): + """ + Args: + meta: Meta information for a run + """ self._stat = [] self._meta = meta self._start_time = time.time() @@ -15,16 +21,28 @@ def __init__(self, meta=None): ) def start(self, meta=None): + """ + Start a new run. + + Args: + meta: Meta information for a run + """ system = SystemStats(meta, self.directory) self.times += 1 self._stat.append(system) system.start() def close(self): + """ + End a run and plot the results. + """ system = self._stat.pop(0) system.shutdown() plot(system.dir) def shutdown(self): + """ + End a run and plot the results. + """ if self.times > 1: plot_comb(self.directory) diff --git a/src/pyhf_benchmark/mle.py b/src/pyhf_benchmark/mle.py index a679ac6..e590197 100644 --- a/src/pyhf_benchmark/mle.py +++ b/src/pyhf_benchmark/mle.py @@ -4,7 +4,17 @@ def get_bkg_and_signal(directory_name, model_point): - """Load background and signal""" + """ + Load background and signal + + Args: + directory_name: directory name for Background and PatchSet files + model_point: Model point + + Returns: + background_only: The JSON for the background only model + signal_patch_json: The JSON Patch for the signal model + """ bkgonly_path = directory_name / Path("BkgOnly.json") signal_path = directory_name / Path("patchset.json") @@ -33,9 +43,11 @@ def calculate_CLs(bkgonly_json, signal_patch_json): """ Calculate the observed CLs and the expected CLs band from a background only and signal patch. + Args: bkgonly_json: The JSON for the background only model signal_patch_json: The JSON Patch for the signal model + Returns: CLs_obs: The observed CLs value CLs_exp: List of the expected CLs value band diff --git a/src/pyhf_benchmark/plot.py b/src/pyhf_benchmark/plot.py index 14a2c2a..32bdd47 100644 --- a/src/pyhf_benchmark/plot.py +++ b/src/pyhf_benchmark/plot.py @@ -48,6 +48,15 @@ def load(directory_name): + """ + Load output JSON file. + + Args: + directory_name: Local path for output JSON file + + Returns: + CPU and GPU work status in Pandas DataFrame format + """ path = directory_name / "events.jsonl" output_dic = {} @@ -69,6 +78,16 @@ def load(directory_name): def load_all(directory_name): + """ + Load output JSON file if one run contains multiple running results. + + Args: + directory_name: Local path for output JSON file + + Returns: + contents: Storage for work status + backends: Storage for backends information + """ list_of_paths = directory_name.glob("*") contents = [] backends = [] @@ -80,6 +99,16 @@ def load_all(directory_name): def subplot(y_label, column, output, directory, filename): + """ + Plot each metrics for the running results. + + Args: + y_label: Y axis Name + column: Value for Y axis + output: CPU and GPU work status contents + directory: Directory for plot file + filename: Filename for plot file + """ fig, ax = plt.subplots() x_value = output["_runtime"] if y_label == "Network Traffic (bytes)": @@ -98,6 +127,17 @@ def subplot(y_label, column, output, directory, filename): def subplot_comb(y_label, column, outputs, backends, directory, filename): + """ + Plot each metrics for the running results if multiple lines are plotted in one graph. + + Args: + y_label: Y axis Name + column: Value for Y axis + outputs: CPU and GPU work status contents + backends: Backends information + directory: Directory for plot file + filename: Filename for plot file + """ fig, ax = plt.subplots() ax.set_xlabel("Time (minutes)") ax.set_ylabel(y_label) @@ -117,6 +157,12 @@ def subplot_comb(y_label, column, outputs, backends, directory, filename): def plot(directory): + """ + Plot CPU and GPU metrics for a run. + + Args: + directory: Directory for plot file + """ output = load(directory) idx = 0 while idx < len(ylabels): @@ -127,6 +173,12 @@ def plot(directory): def plot_comb(directory): + """ + Plot CPU and GPU metrics for a run if multiple lines are plotted in one graph. + + Args: + directory: Directory for plot file + """ outputs, backends = load_all(directory) idx = 0 while idx < len(ylabels): diff --git a/src/pyhf_benchmark/run.py b/src/pyhf_benchmark/run.py index 9fedb8d..f2e8935 100644 --- a/src/pyhf_benchmark/run.py +++ b/src/pyhf_benchmark/run.py @@ -37,14 +37,14 @@ @click.option("-n", "--number", "number", help="Number.", default="0", required=False) @click.option("-mm", "--mode", "mode", help="Mode.", default="fast", required=False) def run(computation, backend, path, url, model_point, number, mode): - """ + r""" Automatic process of taking pyhf computation. Usage: $ pyhf-benchmark run -c [-b] [-p] [-u] [-m] [-n] [-mm] - Examples: + Examples: $ pyhf-benchmark run -c mle -b numpy -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100] $ pyhf-benchmark run -c mle -u https://www.hepdata.net/record/resource/1267798?view=true -m [750,100] @@ -53,7 +53,7 @@ def run(computation, backend, path, url, model_point, number, mode): $ pyhf-benchmark run -c interpolation -b numpy -n 0 -mm slow - More information: + More information: https://github.com/pyhf/pyhf-benchmark diff --git a/src/pyhf_benchmark/stats.py b/src/pyhf_benchmark/stats.py index 6531a89..9c84990 100644 --- a/src/pyhf_benchmark/stats.py +++ b/src/pyhf_benchmark/stats.py @@ -11,6 +11,15 @@ def gpu_in_use_by_this_process(gpu_handle): + """ + Check if there is a GPU used by a process. + + Args: + gpu_handle: NVML Device handle + + Returns: + True or False + """ if not psutil: return False @@ -40,7 +49,14 @@ def gpu_in_use_by_this_process(gpu_handle): class SystemStats(object): + """CPU and GPU work status for each time stamp.""" + def __init__(self, meta=None, directory="."): + """ + Args: + meta: Meta data for a run. + directory: Output file Directory. + """ try: pynvml.nvmlInit() self.gpu_count = pynvml.nvmlDeviceGetCount() @@ -75,6 +91,9 @@ def __init__(self, meta=None, directory="."): ) def start(self): + """ + Start a thread to record CPU and GPU work status. + """ self._thread.start() @property @@ -87,15 +106,22 @@ def dir(self): @property def sample_rate_seconds(self): - """Sample system stats every this many seconds, default to 2""" + """ + Sample system stats every this many seconds, default to 2 + """ return self._sample_rate_seconds @property def samples_to_average(self): - """The number of samples to average before pushing, default to 3""" + """ + The number of samples to average before pushing, default to 3 + """ return self._samples_to_average def _thread_body(self): + """ + Control the logic of record and flush CPU and GPU work status. + """ while True: stats = self.stats() for stat, value in stats.items(): @@ -117,6 +143,9 @@ def _thread_body(self): break def shutdown(self): + """ + End the thread. + """ self._shutdown = True try: self._thread.join() @@ -125,6 +154,9 @@ def shutdown(self): pass def flush(self): + """ + Flush CPU and GPU work status back to output file for after a time interval. + """ stats = self.stats() for stat, value in stats.items(): if isinstance(value, Number): @@ -135,6 +167,12 @@ def flush(self): self.sampler = {} def stats(self): + """ + Record CPU and GPU work status after a time interval. + + Returns: + stats: CPU and GPU work status + """ stats = {} for i in range(0, self.gpu_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) diff --git a/src/pyhf_benchmark/util.py b/src/pyhf_benchmark/util.py index a67a57c..b7a9926 100644 --- a/src/pyhf_benchmark/util.py +++ b/src/pyhf_benchmark/util.py @@ -5,6 +5,14 @@ def random_histosets_alphasets_pair(): + """ + Generate random dataset for interpolation computation. + + Returns: + h: Histogram data + a: Alpha data + """ + def generate_shapes(histogramssets, alphasets): h_shape = [len(histogramssets), 0, 0, 0] a_shape = (len(alphasets), max(map(len, alphasets)))