diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
new file mode 100644
index 00000000..9b67ba73
--- /dev/null
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,38 @@
+name: Coverage
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ coverage:
+ runs-on: ubuntu-latest
+ env:
+ FORCE_COLOR: true
+ UV_SYSTEM_PYTHON: 1
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version-file: "pyproject.toml"
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+
+ - name: Install dependencies
+ run: |
+ uv pip install --group=test . "scikit-learn<1.8.0"
+
+ - name: Run tests with coverage
+ run: |
+ pytest --cov --cov-report=xml --verbose
+
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v4
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+ file: ./coverage.xml
+ fail_ci_if_error: true
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
new file mode 100644
index 00000000..3a1e33e1
--- /dev/null
+++ b/.github/workflows/doc.yml
@@ -0,0 +1,38 @@
+name: Documentation
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ docs:
+ runs-on: ubuntu-latest
+ env:
+ FORCE_COLOR: true
+ UV_SYSTEM_PYTHON: 1
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version-file: "pyproject.toml"
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+
+ - name: Install dependencies
+ run: |
+ uv pip install --group=docs --editable .
+
+ - name: Build documentation
+ run: |
+ make -C docs clean
+ make -C docs html
+
+ - name: Upload documentation artifacts
+ uses: actions/upload-artifact@v6
+ with:
+ name: docs-artifact
+ path: docs/build/html
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 00000000..1e756904
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,119 @@
+name: PyTest
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+env:
+ FORCE_COLOR: 1
+ UV_SYSTEM_PYTHON: 1
+
+jobs:
+ test-core:
+ strategy:
+ fail-fast: false
+ matrix:
+ platform: [ubuntu-latest, macos-latest, windows-latest]
+ python-version: ["3.10", "3.12", "3.14"]
+ runs-on: ${{ matrix.platform }}
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+
+ - name: Install dependencies
+ run: |
+ uv pip install --group=test . "scikit-learn<1.8.0"
+
+ - name: Test with pytest
+ run: uv run pytest --verbose
+
+ # Test against different dependency versions
+ test-dependency-combinations:
+ strategy:
+ fail-fast: false
+ matrix:
+ platform: [ubuntu-latest, windows-latest]
+ deps:
+ # Minimal supported versions and increasing gradually
+ - python-version: "3.10"
+ scipy: "scipy==1.8.0"
+ numpy: "numpy==1.22.4"
+ pandas: "pandas==2.1.1"
+ statsmodels: "statsmodels==0.14.1"
+ scikit-learn: "scikit-learn==1.2.2"
+ label: "really-old-versions"
+
+ - python-version: "3.11"
+ scipy: "scipy==1.9.3"
+ numpy: "numpy==1.25.2"
+ pandas: "pandas==2.2.0"
+ statsmodels: "statsmodels==0.14.3"
+ scikit-learn: "scikit-learn==1.4.2"
+ label: "old-versions"
+
+ - python-version: "3.12"
+ scipy: "scipy==1.14.1"
+ numpy: "numpy==2.1.3"
+ pandas: "pandas==2.3.3"
+ statsmodels: "statsmodels==0.14.4"
+ scikit-learn: "scikit-learn==1.5.2"
+ label: "mid-versions"
+
+ - python-version: "3.13"
+ scipy: "scipy==1.17.0"
+ numpy: "numpy==2.4.0"
+ pandas: "pandas==3.0.0"
+ statsmodels: "statsmodels==0.14.6"
+ scikit-learn: "scikit-learn==1.7.2"
+ label: "recent-versions"
+
+ - python-version: "3.14"
+ scipy: "scipy"
+ numpy: "numpy"
+ pandas: "pandas"
+ statsmodels: "statsmodels"
+ scikit-learn: "scikit-learn==1.7.2"
+ label: "latest-versions"
+
+ runs-on: ${{ matrix.platform }}
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.deps.python-version }}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+
+ - name: Install numpy (${{ matrix.deps.label }})
+ run: uv pip install "${{ matrix.deps.numpy }}"
+
+ - name: Install dependencies (${{ matrix.deps.label }})
+ run: >-
+ uv pip install --group=test .
+ "${{ matrix.deps.scipy }}"
+ "${{ matrix.deps.pandas }}"
+ "${{ matrix.deps.statsmodels }}"
+ "${{ matrix.deps.scikit-learn }}"
+
+ - name: Display installed versions
+ run: |
+ python -c "import scipy; print(f'scipy: {scipy.__version__}')"
+ python -c "import numpy; print(f'numpy: {numpy.__version__}')"
+ python -c "import pandas; print(f'pandas: {pandas.__version__}')"
+ python -c "import statsmodels; print(f'statsmodels: {statsmodels.__version__}')"
+ python -c "import sklearn; print(f'scikit-learn: {sklearn.__version__}')"
+
+ - name: Test with pytest
+ run: pytest --verbose
diff --git a/.github/workflows/pytest_prerelease.yml b/.github/workflows/pytest_prerelease.yml
new file mode 100644
index 00000000..b1093f6a
--- /dev/null
+++ b/.github/workflows/pytest_prerelease.yml
@@ -0,0 +1,72 @@
+name: PyTest Pre-Release
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+ schedule:
+ - cron: '0 3 * * SUN'
+ workflow_dispatch:
+
+env:
+ FORCE_COLOR: 1
+ UV_SYSTEM_PYTHON: 1
+
+jobs:
+ test-prerelease:
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+ strategy:
+ fail-fast: false
+ matrix:
+ platform: [ubuntu-latest, windows-latest]
+ python-version: ["3.10", "3.12", "3.14"]
+ runs-on: ${{ matrix.platform }}
+ continue-on-error: true
+ steps:
+ - uses: actions/checkout@v6
+
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+
+ - name: Install dependencies
+ run: |
+ uv pip install --group=test .
+
+ - name: Install scipy prerelease
+ run: |
+ uv pip uninstall scipy
+ uv pip install -U --pre scipy
+
+ - name: Install numpy prerelease
+ run: |
+ uv pip uninstall numpy
+ uv pip install -U --pre numpy
+
+ - name: Install pandas prerelease
+ run: |
+ uv pip uninstall pandas
+ uv pip install -U --pre pandas
+
+ - name: Install statsmodels prerelease
+ run: |
+ uv pip uninstall statsmodels
+ uv pip install -U --pre statsmodels
+
+ - name: Install scikit-learn prerelease
+ run: |
+ uv pip uninstall scikit-learn
+ uv pip install -U --pre scikit-learn
+
+ - name: Install seaborn prerelease
+ run: |
+ uv pip uninstall seaborn
+ uv pip install -U --pre seaborn
+
+ - name: Test with pytest
+ run: uv run pytest --verbose
\ No newline at end of file
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
deleted file mode 100644
index eba3c3ec..00000000
--- a/.github/workflows/python_tests.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-name: Python tests
-
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
-
-jobs:
- build:
- strategy:
- fail-fast: false
- matrix:
- platform: [ubuntu-latest, macos-latest, windows-latest]
- python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
-
- runs-on: ${{ matrix.platform }}
-
- env:
- FORCE_COLOR: true
-
- steps:
- - uses: actions/checkout@v6
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v6
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install .[test] "scikit-learn<1.8.0"
-
- - name: Test with pytest
- run: |
- pytest --cov --cov-report=xml --verbose
-
- - name: Build docs
- if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }}
- run: |
- pip install .[docs]
- make -C docs clean
- make -C docs html
-
- - name: Upload doc build artifacts
- if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }}
- uses: actions/upload-artifact@v4
- with:
- name: docs-artifact
- path: docs/build/html
-
- - name: Upload coverage report
- if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }}
- uses: codecov/codecov-action@v4
- with:
- token: c6ed6ca6-a040-4f23-9ebf-8c474c998097
- file: ./coverage.xml
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index 7823a768..bb28efa5 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -4,10 +4,9 @@ jobs:
ruff:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
+ - uses: astral-sh/ruff-action@v3
- name: "Linting"
- uses: astral-sh/ruff-action@v3
+ run: ruff check
- name: "Formatting"
- uses: astral-sh/ruff-action@v3
- with:
- args: "format --check"
+ run: ruff format --check
diff --git a/README.rst b/README.rst
index 5a246f4f..375bd9b8 100644
--- a/README.rst
+++ b/README.rst
@@ -76,22 +76,23 @@ Installation
Dependencies
------------
-The main dependencies of Pingouin are :
+The main dependencies of Pingouin are:
-* `NumPy `_
-* `SciPy `_
-* `Pandas `_
+* `NumPy `_ >= 1.22.4
+* `SciPy `_ >= 1.8.0
+* `Pandas `_ >= 2.1.1
* `Pandas-flavor `_
-* `Statsmodels `_
+* `Statsmodels `_ >= 0.14.1
* `Matplotlib `_
* `Seaborn `_
+* `Scikit-learn `_ >= 1.2.2
+* `Tabulate `_
-In addition, some functions require :
+Some functions additionally require:
-* `Scikit-learn `_
* `Mpmath `_
-Pingouin is a Python 3 package and is currently tested for Python 3.8-3.11.
+Pingouin is a Python 3 package and is currently tested for Python 3.10+.
User installation
-----------------
@@ -122,10 +123,18 @@ To build and install from source, clone this repository or download the source a
.. code-block:: shell
cd pingouin
- python -m build # optional, build a wheel and sdist
- pip install . # install the package
- pip install --editable . # or editable install
- pytest # test the package
+
+ # optional, build a wheel and sdist
+ python -m build
+
+ # install the package
+ pip install .
+
+ # or editable install with dev dependencies
+ pip install --group test --group docs --editable .
+
+ # test the package
+ pytest
Quick start
============
diff --git a/docs/conf.py b/docs/conf.py
index 7868ed58..2b9bfca9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -7,11 +7,20 @@
# -- Path setup --------------------------------------------------------------
+import inspect
import os
import sys
import time
+from pathlib import Path
+
import pingouin
+# Configure for source links
+GITHUB_USER = "raphaelvallat"
+GITHUB_REPO = "pingouin"
+GITHUB_BRANCH = "main"
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
sys.path.insert(0, os.path.abspath("sphinxext"))
@@ -36,7 +45,7 @@
extensions = [
"sphinx.ext.mathjax",
"sphinx.ext.doctest",
- "sphinx.ext.viewcode",
+ "sphinx.ext.linkcode",
"sphinx.ext.githubpages",
"sphinx.ext.autosummary",
"sphinx.ext.autodoc",
@@ -108,7 +117,7 @@
"icon": "fa-brands fa-github",
},
],
- "use_edit_page_button": True,
+ "use_edit_page_button": False,
"pygments_light_style": "vs",
"pygments_dark_style": "monokai",
}
@@ -122,12 +131,50 @@
"index": [],
}
-html_context = {
- "github_user": "raphaelvallat",
- "github_repo": "pingouin",
- "github_version": "main",
- "doc_path": "docs",
-}
+# -- Linkcode ------------------------------------------------
+
+
+def linkcode_resolve(domain, info):
+ """
+ Resolve source code links to GitHub for Python objects.
+
+ Returns a GitHub URL including line number references when available.
+ """
+ if domain != "py":
+ return None
+
+ module_name = info.get("module")
+ full_name = info.get("fullname")
+
+ if not module_name or not full_name:
+ return None
+
+ module = sys.modules.get(module_name)
+ if module is None:
+ return None
+
+ # Resolve the object
+ obj = module
+ for part in full_name.split("."):
+ try:
+ obj = inspect.getattr_static(obj, part)
+ except AttributeError:
+ return None
+
+ # Unwrap decorators (important for @wraps, dataclasses, etc.)
+ obj = inspect.unwrap(obj)
+ source_file = inspect.getsourcefile(obj) or inspect.getfile(obj)
+ source_lines, start_line = inspect.getsourcelines(obj)
+ source_path = Path(source_file).resolve()
+ relative_path = source_path.relative_to(REPO_ROOT)
+
+ end_line = start_line + len(source_lines) - 1
+
+ return (
+ f"https://github.com/{GITHUB_USER}/{GITHUB_REPO}"
+ f"/blob/{GITHUB_BRANCH}/{relative_path.as_posix()}"
+ f"#L{start_line}-L{end_line}"
+ )
# -- Intersphinx ------------------------------------------------
diff --git a/docs/contributing.rst b/docs/contributing.rst
index 83081e4c..1b15772e 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -12,11 +12,13 @@ Code guidelines
*Before starting new code*, we highly recommend opening an issue on `GitHub `_ to discuss potential changes.
-* Please use standard `pep8 `_ and `flake8 `_ Python style guidelines. Pingouin uses `ruff `_ for code formatting. Before submitting a PR, please make sure to run the following command in the root folder of Pingouin:
+* Please follow `PEP 8 `_ Python style guidelines. Pingouin uses `Ruff `_ for linting and formatting. Before submitting a PR, please run the following commands from the root folder of Pingouin to sort imports and format code:
.. code-block:: bash
- $ ruff format --line-length=100
+ $ ruff check --select I --fix
+
+ $ ruff format
* Use `NumPy style `_ for docstrings. Follow existing examples for simplest guidance.
@@ -30,28 +32,56 @@ Code guidelines
.. code-block:: bash
- $ pytest --doctest-modules
+ $ pytest --verbose
+
+Setting up a development environment
+-------------------------------------
+
+Pingouin uses `uv `_ for fast dependency management. To set up a local development environment, first clone the repository and then install the package in editable mode with the test dependencies:
+
+.. code-block:: bash
+
+ $ git clone https://github.com/raphaelvallat/pingouin.git
+ $ cd pingouin
+ $ uv pip install --group=test --editable .
+
+To also install the development tools (Ruff), add the ``dev`` group:
+
+.. code-block:: bash
+
+ $ uv pip install --group=dev --group=test --editable .
+
+Continuous Integration
+-----------------------
+
+Pingouin uses `GitHub Actions `_ for continuous integration. The following workflows run automatically on every push and pull request to the ``main`` branch:
+
+* **PyTest** — runs the test suite on Ubuntu, macOS and Windows across Python 3.10, 3.12 and 3.14, as well as against a range of historical dependency versions (from minimum supported to latest).
+* **Coverage** — measures test coverage and uploads the report to `Codecov `_.
+* **Ruff** — checks code style and formatting.
+* **Documentation** — builds the Sphinx documentation and uploads the result as a downloadable artifact.
+
+A separate **PyTest (pre-release)** workflow runs weekly against pre-release versions of all major dependencies to catch compatibility issues early.
Checking and building documentation
------------------------------------
+------------------------------------
-Pingouin's documentation (including docstring in code) uses ReStructuredText format,
+Pingouin's documentation (including docstrings in code) uses ReStructuredText format,
see `Sphinx documentation `_ to learn more about editing them. The code
follows the `NumPy docstring standard `_.
-
All changes to the codebase must be properly documented. To ensure that documentation is rendered correctly, the best bet is to follow the existing examples for function docstrings.
Build locally
^^^^^^^^^^^^^
-If you want to test the documentation locally, you will need to install additional dependencies. They can be installed with the docs extra:
+If you want to test the documentation locally, install the package with the ``docs`` dependency group:
.. code-block:: bash
- $ pip install --upgrade pingouin[docs]
+ $ uv pip install --group=docs --editable .
-and then within the ``pingouin/docs`` directory do:
+Then, within the ``pingouin/docs`` directory, run:
.. code-block:: bash
@@ -68,28 +98,13 @@ and then come back after executing the ``html`` recipe.
Inspect on GitHub
^^^^^^^^^^^^^^^^^
-Thanks to the `GitHub Actions `_ continuous integration service,
-the documentation is also built on GitHub servers after every commit you make as part of a Pull Request.
-To inspect these build artifacts, follow these steps:
+The documentation is also built automatically on GitHub after every commit you make as part of a Pull Request.
+To inspect the rendered documentation, follow these steps:
* Click on the "Show all checks" dropdown menu at the end of the Pull Request user interface
-
-.. figure:: /pictures/github_checks.png
- :align: center
- :alt: GitHub checks dropdown menu
-
- Screenshot of the GitHub checks dropdown menu
-
-* Click on the check that starts with ``Python tests / build (ubuntu-latest, 3.9)``
-* Now in the top right corner of the opening window, you will see a small dropdown menu called "Artifacts"
-
-.. figure:: /pictures/github_build_artifacts.png
- :align: center
- :alt: GitHub build artifacts dropdown menu
-
- Screenshot of the GitHub build artifacts dropdown menu
-
-* Click on that drowndown menu and download the ``docs-artifact`` zip file
+* Click on the check named **Documentation / docs**
+* In the top-right corner of the opening window, click the **Artifacts** dropdown menu
+* Download the ``docs-artifact`` zip file
You can then unpack that zip file on your computer, enter the directory, and open the ``index.html`` file that you will find there.
That should open the Pingouin documentation based on the changes from your Pull Request.
diff --git a/docs/faq.rst b/docs/faq.rst
index 093480ee..389d0afe 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -15,7 +15,7 @@ To install Pingouin, open a command prompt (or Terminal or Anaconda Prompt) and
.. code-block:: bash
- pip install pingouin --upgrade
+ pip install --upgrade pingouin
You should now be able to use Pingouin. To try it, you need to open an interactive Python console (either `IPython `_ or `Jupyter `_). For example, type the following command in a command prompt:
diff --git a/docs/index.rst b/docs/index.rst
index 63df0995..04057469 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -63,21 +63,22 @@ the :code:`ttest` function of Pingouin returns the T-value, the p-value, the deg
Installation
============
-Pingouin is a Python 3 package and is currently tested for Python 3.8-3.11.
+Pingouin is a Python 3 package and is currently tested for Python 3.10+.
-The main dependencies of Pingouin are :
+The main dependencies of Pingouin are:
-* `NumPy `_
-* `SciPy `_
-* `Pandas `_
+* `NumPy `_ >= 1.22.4
+* `SciPy `_ >= 1.8.0
+* `Pandas `_ >= 2.1.1
* `Pandas-flavor `_
-* `Statsmodels `_
+* `Statsmodels `_ >= 0.14.1
* `Matplotlib `_
* `Seaborn `_
+* `Scikit-learn `_ >= 1.2.2
+* `Tabulate `_
-In addition, some functions require :
+Some functions additionally require:
-* `Scikit-learn `_
* `Mpmath `_
Pingouin can be easily installed using pip
@@ -549,7 +550,7 @@ Several functions of Pingouin were inspired from R or Matlab toolboxes, includin
Functions
Guidelines
- FAQ
+ FAQ
Changelog
Contribute
Cite
\ No newline at end of file
diff --git a/docs/pictures/github_build_artifacts.png b/docs/pictures/github_build_artifacts.png
deleted file mode 100644
index 123cdcb1..00000000
Binary files a/docs/pictures/github_build_artifacts.png and /dev/null differ
diff --git a/docs/pictures/github_checks.png b/docs/pictures/github_checks.png
deleted file mode 100644
index 154d0d0c..00000000
Binary files a/docs/pictures/github_checks.png and /dev/null differ
diff --git a/pyproject.toml b/pyproject.toml
index feef0a7c..1e472cba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
[build-system]
-requires = ["setuptools>=61.0", "wheel"]
+requires = ["setuptools>=80.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
@@ -29,13 +29,13 @@ dynamic = ["version"]
requires-python = ">=3.10"
dependencies = [
"matplotlib",
- "numpy",
- "pandas>=1.5",
+ "numpy>=1.22.4",
+ "pandas>=2.1.1",
"pandas_flavor",
- "scikit-learn>=1.2",
- "scipy",
+ "scikit-learn>=1.2.2",
+ "scipy>=1.8.0",
"seaborn",
- "statsmodels",
+ "statsmodels>=0.14.1",
"tabulate",
]
@@ -43,6 +43,11 @@ dependencies = [
extras = [
"mpmath",
]
+
+[dependency-groups]
+dev = [
+ "ruff>=0.15.0",
+]
test = [
"pytest>=6",
"pytest-cov",
@@ -59,9 +64,12 @@ docs = [
"sphinx-notfound-page",
]
+
[project.urls]
Homepage = "https://pingouin-stats.org/index.html"
Downloads = "https://github.com/raphaelvallat/pingouin/"
+Issues = "https://github.com/raphaelvallat/pingouin/issues"
+Changelog = "https://pingouin-stats.org/build/html/changelog.html"
[tool.setuptools]
py-modules = ["pingouin"]
@@ -115,11 +123,30 @@ exclude = [
"notebooks", # Skip jupyter notebook examples
]
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint.flake8-import-conventions.aliases]
+"matplotlib.pyplot" = "plt"
+numpy = "np"
+"numpy.typing" = "npt"
+pandas = "pd"
+seaborn = "sns"
+scipy = "sp"
+
[tool.ruff.lint]
select = [
"E4", # Subset of pycodestyle rules
"E7", # Subset of pycodestyle rules
"E9", # Subset of pycodestyle rules
"F", # All Pyflakes rules
- "NPY201",
+ "NPY", # numpy
+ "W",
+ "I",
+ #"PD", # pandas imports
+ #"UP" # Upgrade pythonv versions
]
+
+ignore = [
+ "NPY002" # exlude numpy random
+]
\ No newline at end of file
diff --git a/src/pingouin/bayesian.py b/src/pingouin/bayesian.py
index 9635012a..6b775134 100644
--- a/src/pingouin/bayesian.py
+++ b/src/pingouin/bayesian.py
@@ -1,9 +1,10 @@
"""Bayesian functions."""
import warnings
+from math import exp, lgamma, log, pi
+
import numpy as np
from scipy.integrate import quad
-from math import pi, exp, log, lgamma
__all__ = ["bayesfactor_ttest", "bayesfactor_pearson", "bayesfactor_binom"]
@@ -237,18 +238,18 @@ def bayesfactor_pearson(r, n, alternative="two-sided", method="ly", kappa=1.0):
Compare to Wetzels method:
- >>> bf = bayesfactor_pearson(r, n, method='wetzels')
+ >>> bf = bayesfactor_pearson(r, n, method="wetzels")
>>> print("Bayes Factor: %.3f" % bf)
Bayes Factor: 8.221
One-sided test
- >>> bf10pos = bayesfactor_pearson(r, n, alternative='greater')
- >>> bf10neg = bayesfactor_pearson(r, n, alternative='less')
+ >>> bf10pos = bayesfactor_pearson(r, n, alternative="greater")
+ >>> bf10neg = bayesfactor_pearson(r, n, alternative="less")
>>> print("BF-pos: %.3f, BF-neg: %.3f" % (bf10pos, bf10neg))
BF-pos: 21.185, BF-neg: 0.082
"""
- from scipy.special import gamma, betaln, hyp2f1
+ from scipy.special import betaln, gamma, hyp2f1
assert method.lower() in ["ly", "wetzels"], "Method not recognized."
assert alternative in [
diff --git a/src/pingouin/circular.py b/src/pingouin/circular.py
index ac301610..ce10ac07 100644
--- a/src/pingouin/circular.py
+++ b/src/pingouin/circular.py
@@ -108,7 +108,7 @@ def convert_angles(angles, low=0, high=360, positive=False):
>>> import numpy as np
>>> rad = [0.1, 3.14, 5, 2, 6]
- >>> convert_angles(rad, low=0, high=2*np.pi)
+ >>> convert_angles(rad, low=0, high=2 * np.pi)
array([ 0.1 , 3.14 , -1.28318531, 2. , -0.28318531])
4. Convert degrees from a 2-D array
@@ -168,8 +168,8 @@ def circ_axial(angles, n):
>>> import numpy as np
>>> from pingouin import read_dataset
>>> from pingouin.circular import circ_axial
- >>> df = read_dataset('circular')
- >>> angles = df['Orientation'].to_numpy()
+ >>> df = read_dataset("circular")
+ >>> angles = df["Orientation"].to_numpy()
>>> angles = circ_axial(np.deg2rad(angles), 2)
"""
angles = np.asarray(angles)
@@ -258,7 +258,7 @@ def circ_mean(angles, w=None, axis=0):
>>> from scipy.stats import circmean
>>> import numpy as np
- >>> round(circmean(angles, low=0, high=2*np.pi), 4)
+ >>> round(circmean(angles, low=0, high=2 * np.pi), 4)
1.013
2. Using a 2-D array of angles in degrees
@@ -590,7 +590,7 @@ def circ_corrcl(x, y):
>>> print(round(r, 3), round(pval, 3))
0.109 0.971
"""
- from scipy.stats import pearsonr, chi2
+ from scipy.stats import chi2, pearsonr
x = np.asarray(x)
y = np.asarray(y)
@@ -662,7 +662,7 @@ def circ_rayleigh(angles, w=None, d=None):
2. Specifying w and d
- >>> z, pval = circ_rayleigh(x, w=[.1, .2, .3, .4, .5], d=0.2)
+ >>> z, pval = circ_rayleigh(x, w=[0.1, 0.2, 0.3, 0.4, 0.5], d=0.2)
>>> print(round(z, 3), round(pval, 6))
0.278 0.806997
"""
@@ -741,7 +741,7 @@ def circ_vtest(angles, dir=0.0, w=None, d=None):
2. Specifying w and d
- >>> v, pval = circ_vtest(x, dir=0.5, w=[.1, .2, .3, .4, .5], d=0.2)
+ >>> v, pval = circ_vtest(x, dir=0.5, w=[0.1, 0.2, 0.3, 0.4, 0.5], d=0.2)
>>> print(round(v, 3), round(pval, 5))
0.637 0.23086
"""
diff --git a/src/pingouin/contingency.py b/src/pingouin/contingency.py
index db636e1a..428fa7f9 100644
--- a/src/pingouin/contingency.py
+++ b/src/pingouin/contingency.py
@@ -1,13 +1,13 @@
# Date: May 2019
import warnings
+
import numpy as np
import pandas as pd
-
+from scipy.stats import binom, power_divergence
+from scipy.stats import chi2 as sp_chi2
from scipy.stats.contingency import expected_freq
-from scipy.stats import power_divergence, binom, chi2 as sp_chi2
-
-from pingouin import power_chi2, _postprocess_dataframe
+from pingouin import _postprocess_dataframe, power_chi2
__all__ = ["chi2_independence", "chi2_mcnemar", "dichotomous_crosstab"]
@@ -292,8 +292,8 @@ def chi2_mcnemar(data, x, y, correction=True):
Examples
--------
>>> import pingouin as pg
- >>> data = pg.read_dataset('chi2_mcnemar')
- >>> observed, stats = pg.chi2_mcnemar(data, 'treatment_X', 'treatment_Y')
+ >>> data = pg.read_dataset("chi2_mcnemar")
+ >>> observed, stats = pg.chi2_mcnemar(data, "treatment_X", "treatment_Y")
>>> observed
treatment_Y 0 1
treatment_X
diff --git a/src/pingouin/correlation.py b/src/pingouin/correlation.py
index c40b9135..481cd2d8 100644
--- a/src/pingouin/correlation.py
+++ b/src/pingouin/correlation.py
@@ -1,18 +1,18 @@
# Author: Raphael Vallat
import warnings
+
import numpy as np
import pandas as pd
import pandas_flavor as pf
from scipy.spatial.distance import pdist, squareform
-from scipy.stats import pearsonr, spearmanr, kendalltau
+from scipy.stats import kendalltau, pearsonr, spearmanr
+from pingouin.bayesian import bayesfactor_pearson
from pingouin.config import options
-from pingouin.power import power_corr
-from pingouin.multicomp import multicomp
from pingouin.effsize import compute_esci
-from pingouin.utils import remove_na, _perm_pval, _postprocess_dataframe
-from pingouin.bayesian import bayesfactor_pearson
-
+from pingouin.multicomp import multicomp
+from pingouin.power import power_corr
+from pingouin.utils import _perm_pval, _postprocess_dataframe, remove_na
__all__ = ["corr", "partial_corr", "pcorr", "rcorr", "rm_corr", "distance_corr"]
@@ -517,7 +517,7 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs):
>>> import pingouin as pg
>>> # Generate random correlated samples
>>> np.random.seed(123)
- >>> mean, cov = [4, 6], [(1, .5), (.5, 1)]
+ >>> mean, cov = [4, 6], [(1, 0.5), (0.5, 1)]
>>> x, y = np.random.multivariate_normal(mean, cov, 30).T
>>> # Compute Pearson correlation
>>> pg.corr(x, y).round(3)
@@ -545,29 +545,29 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs):
5. Percentage bend correlation (robust)
- >>> pg.corr(x, y, method='percbend').round(3)
+ >>> pg.corr(x, y, method="percbend").round(3)
n r CI95 p_val power
percbend 30 0.389 [0.03, 0.66] 0.034 0.581
6. Shepherd's pi correlation (robust)
- >>> pg.corr(x, y, method='shepherd').round(3)
+ >>> pg.corr(x, y, method="shepherd").round(3)
n outliers r CI95 p_val power
shepherd 30 2 0.437 [0.08, 0.7] 0.02 0.662
7. Skipped spearman correlation (robust)
- >>> pg.corr(x, y, method='skipped').round(3)
+ >>> pg.corr(x, y, method="skipped").round(3)
n outliers r CI95 p_val power
skipped 30 2 0.437 [0.08, 0.7] 0.02 0.662
8. One-tailed Pearson correlation
- >>> pg.corr(x, y, alternative="greater", method='pearson').round(3)
+ >>> pg.corr(x, y, alternative="greater", method="pearson").round(3)
n r CI95 p_val BF10 power
pearson 30 0.147 [-0.17, 1.0] 0.22 0.467 0.194
- >>> pg.corr(x, y, alternative="less", method='pearson').round(3)
+ >>> pg.corr(x, y, alternative="less", method="pearson").round(3)
n r CI95 p_val BF10 power
pearson 30 0.147 [-1.0, 0.43] 0.78 0.137 0.008
@@ -580,8 +580,8 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs):
10. Using columns of a pandas dataframe
>>> import pandas as pd
- >>> data = pd.DataFrame({'x': x, 'y': y})
- >>> pg.corr(data['x'], data['y']).round(3)
+ >>> data = pd.DataFrame({"x": x, "y": y})
+ >>> pg.corr(data["x"], data["y"]).round(3)
n r CI95 p_val BF10 power
pearson 30 0.147 [-0.23, 0.48] 0.439 0.302 0.121
"""
@@ -776,34 +776,47 @@ def partial_corr(
1. Partial correlation with one covariate
>>> import pingouin as pg
- >>> df = pg.read_dataset('partial_corr')
- >>> pg.partial_corr(data=df, x='x', y='y', covar='cv1').round(3)
+ >>> df = pg.read_dataset("partial_corr")
+ >>> pg.partial_corr(data=df, x="x", y="y", covar="cv1").round(3)
n r CI95 p_val
pearson 30 0.568 [0.25, 0.77] 0.001
2. Spearman partial correlation with several covariates
>>> # Partial correlation of x and y controlling for cv1, cv2 and cv3
- >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'],
- ... method='spearman').round(3)
+ >>> pg.partial_corr(
+ ... data=df, x="x", y="y", covar=["cv1", "cv2", "cv3"], method="spearman"
+ ... ).round(3)
n r CI95 p_val
spearman 30 0.521 [0.18, 0.75] 0.005
3. Same but one-sided test
- >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'],
- ... alternative="greater", method='spearman').round(3)
+ >>> pg.partial_corr(
+ ... data=df,
+ ... x="x",
+ ... y="y",
+ ... covar=["cv1", "cv2", "cv3"],
+ ... alternative="greater",
+ ... method="spearman",
+ ... ).round(3)
n r CI95 p_val
spearman 30 0.521 [0.24, 1.0] 0.003
- >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'],
- ... alternative="less", method='spearman').round(3)
+ >>> pg.partial_corr(
+ ... data=df,
+ ... x="x",
+ ... y="y",
+ ... covar=["cv1", "cv2", "cv3"],
+ ... alternative="less",
+ ... method="spearman",
+ ... ).round(3)
n r CI95 p_val
spearman 30 0.521 [-1.0, 0.72] 0.997
4. As a pandas method
- >>> df.partial_corr(x='x', y='y', covar=['cv1'], method='spearman').round(3)
+ >>> df.partial_corr(x="x", y="y", covar=["cv1"], method="spearman").round(3)
n r CI95 p_val
spearman 30 0.578 [0.27, 0.78] 0.001
@@ -819,7 +832,7 @@ def partial_corr(
6. Semi-partial correlation on x
- >>> pg.partial_corr(data=df, x='x', y='y', x_covar=['cv1', 'cv2', 'cv3']).round(3)
+ >>> pg.partial_corr(data=df, x="x", y="y", x_covar=["cv1", "cv2", "cv3"]).round(3)
n r CI95 p_val
pearson 30 0.463 [0.1, 0.72] 0.015
"""
@@ -940,7 +953,7 @@ def pcorr(self):
Examples
--------
>>> import pingouin as pg
- >>> data = pg.read_dataset('mediation')
+ >>> data = pg.read_dataset("mediation")
>>> data.pcorr().round(3)
X M Y Mbin Ybin W1 W2
X 1.000 0.359 0.074 -0.019 -0.147 -0.148 -0.067
@@ -953,7 +966,7 @@ def pcorr(self):
On a subset of columns
- >>> data[['X', 'Y', 'M']].pcorr()
+ >>> data[["X", "Y", "M"]].pcorr()
X Y M
X 1.000000 0.036649 0.412804
Y 0.036649 1.000000 0.540140
@@ -1026,7 +1039,7 @@ def rcorr(
>>> import pandas as pd
>>> import pingouin as pg
>>> # Load an example dataset of personality dimensions
- >>> df = pg.read_dataset('pairwise_corr').iloc[:, 1:]
+ >>> df = pg.read_dataset("pairwise_corr").iloc[:, 1:]
>>> # Add some missing values
>>> df.iloc[[2, 5, 20], 2] = np.nan
>>> df.iloc[[1, 4, 10], 3] = np.nan
@@ -1047,7 +1060,7 @@ def rcorr(
Agreeableness -0.134 0.054 0.161 -
>>> # Spearman correlation and Holm adjustement for multiple comparisons
- >>> df.iloc[:, 0:4].rcorr(method='spearman', padjust='holm')
+ >>> df.iloc[:, 0:4].rcorr(method="spearman", padjust="holm")
Neuroticism Extraversion Openness Agreeableness
Neuroticism - *** **
Extraversion -0.325 - ***
@@ -1055,9 +1068,8 @@ def rcorr(
Agreeableness -0.15 0.06 0.173 -
>>> # Compare with the pg.pairwise_corr function
- >>> pairwise = df.iloc[:, 0:4].pairwise_corr(method='spearman',
- ... padjust='holm')
- >>> pairwise[['X', 'Y', 'r', 'p_corr']].round(3) # Do not show all columns
+ >>> pairwise = df.iloc[:, 0:4].pairwise_corr(method="spearman", padjust="holm")
+ >>> pairwise[["X", "Y", "r", "p_corr"]].round(3) # Do not show all columns
X Y r p_corr
0 Neuroticism Extraversion -0.325 0.000
1 Neuroticism Openness -0.027 0.543
@@ -1074,7 +1086,7 @@ def rcorr(
Agreeableness -0.134 0.0539 -
>>> # With the sample size on the upper triangle instead of the p-values
- >>> df.iloc[:, [0, 1, 2]].rcorr(upper='n')
+ >>> df.iloc[:, [0, 1, 2]].rcorr(upper="n")
Neuroticism Extraversion Openness
Neuroticism - 500 497
Extraversion -0.35 - 497
@@ -1182,8 +1194,8 @@ def rm_corr(data=None, x=None, y=None, subject=None):
Examples
--------
>>> import pingouin as pg
- >>> df = pg.read_dataset('rm_corr')
- >>> pg.rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
+ >>> df = pg.read_dataset("rm_corr")
+ >>> pg.rm_corr(data=df, x="pH", y="PacO2", subject="Subject")
r dof pval CI95 power
rm_corr -0.50677 38 0.000847 [-0.71, -0.23] 0.929579
@@ -1192,8 +1204,8 @@ def rm_corr(data=None, x=None, y=None, subject=None):
.. plot::
>>> import pingouin as pg
- >>> df = pg.read_dataset('rm_corr')
- >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
+ >>> df = pg.read_dataset("rm_corr")
+ >>> g = pg.plot_rm_corr(data=df, x="pH", y="PacO2", subject="Subject")
"""
from pingouin import ancova, power_corr
diff --git a/src/pingouin/distribution.py b/src/pingouin/distribution.py
index 32209e33..55d1e44d 100644
--- a/src/pingouin/distribution.py
+++ b/src/pingouin/distribution.py
@@ -1,11 +1,12 @@
import warnings
-import scipy.stats
+from collections import namedtuple
+
import numpy as np
import pandas as pd
-from collections import namedtuple
-from pingouin.utils import _flatten_list as _fl
-from pingouin.utils import remove_na, _postprocess_dataframe
+import scipy.stats
+from pingouin.utils import _flatten_list as _fl
+from pingouin.utils import _postprocess_dataframe, remove_na
__all__ = ["gzscore", "normality", "homoscedasticity", "anderson", "epsilon", "sphericity"]
@@ -172,9 +173,9 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05):
2. Omnibus test on a wide-format dataframe with missing values
- >>> data = pg.read_dataset('mediation')
- >>> data.loc[1, 'X'] = np.nan
- >>> pg.normality(data, method='normaltest').round(3)
+ >>> data = pg.read_dataset("mediation")
+ >>> data.loc[1, "X"] = np.nan
+ >>> pg.normality(data, method="normaltest").round(3)
W pval normal
X 1.792 0.408 True
M 0.492 0.782 True
@@ -186,14 +187,14 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05):
3. Pandas Series
- >>> pg.normality(data['X'], method='normaltest')
+ >>> pg.normality(data["X"], method="normaltest")
W pval normal
X 1.791839 0.408232 True
4. Long-format dataframe
- >>> data = pg.read_dataset('rm_anova2')
- >>> pg.normality(data, dv='Performance', group='Time')
+ >>> data = pg.read_dataset("rm_anova2")
+ >>> pg.normality(data, dv="Performance", group="Time")
W pval normal
Time
Pre 0.967718 0.478773 True
@@ -201,7 +202,7 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05):
5. Same but using the Jarque-Bera test
- >>> pg.normality(data, dv='Performance', group='Time', method="jarque_bera")
+ >>> pg.normality(data, dv="Performance", group="Time", method="jarque_bera")
W pval normal
Time
Pre 0.304021 0.858979 True
@@ -346,14 +347,14 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=0.05, **k
>>> import numpy as np
>>> import pingouin as pg
- >>> data = pg.read_dataset('mediation')
- >>> pg.homoscedasticity(data[['X', 'Y', 'M']])
+ >>> data = pg.read_dataset("mediation")
+ >>> pg.homoscedasticity(data[["X", "Y", "M"]])
W pval equal_var
levene 1.173518 0.310707 True
2. Same data but using a long-format dataframe
- >>> data_long = data[['X', 'Y', 'M']].melt()
+ >>> data_long = data[["X", "Y", "M"]].melt()
>>> pg.homoscedasticity(data_long, dv="value", group="variable")
W pval equal_var
levene 1.173518 0.310707 True
@@ -367,7 +368,7 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=0.05, **k
4. Bartlett test using a list of iterables
>>> data = [[4, 8, 9, 20, 14], np.array([5, 8, 15, 45, 12])]
- >>> pg.homoscedasticity(data, method="bartlett", alpha=.05)
+ >>> pg.homoscedasticity(data, method="bartlett", alpha=0.05)
T pval equal_var
bartlett 2.873569 0.090045 True
"""
@@ -629,18 +630,22 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"):
>>> import pandas as pd
>>> import pingouin as pg
- >>> data = pd.DataFrame({'A': [2.2, 3.1, 4.3, 4.1, 7.2],
- ... 'B': [1.1, 2.5, 4.1, 5.2, 6.4],
- ... 'C': [8.2, 4.5, 3.4, 6.2, 7.2]})
- >>> gg = pg.epsilon(data, correction='gg')
- >>> hf = pg.epsilon(data, correction='hf')
- >>> lb = pg.epsilon(data, correction='lb')
+ >>> data = pd.DataFrame(
+ ... {
+ ... "A": [2.2, 3.1, 4.3, 4.1, 7.2],
+ ... "B": [1.1, 2.5, 4.1, 5.2, 6.4],
+ ... "C": [8.2, 4.5, 3.4, 6.2, 7.2],
+ ... }
+ ... )
+ >>> gg = pg.epsilon(data, correction="gg")
+ >>> hf = pg.epsilon(data, correction="hf")
+ >>> lb = pg.epsilon(data, correction="lb")
>>> print("%.2f %.2f %.2f" % (lb, gg, hf))
0.50 0.56 0.62
Now using a long-format dataframe
- >>> data = pg.read_dataset('rm_anova2')
+ >>> data = pg.read_dataset("rm_anova2")
>>> data.head()
Subject Time Metric Performance
0 1 Pre Product 13
@@ -651,8 +656,7 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"):
Let's first calculate the epsilon of the *Time* within-subject factor
- >>> pg.epsilon(data, dv='Performance', subject='Subject',
- ... within='Time')
+ >>> pg.epsilon(data, dv="Performance", subject="Subject", within="Time")
1.0
Since *Time* has only two levels (Pre and Post), the sphericity assumption
@@ -660,8 +664,7 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"):
The *Metric* factor, however, has three levels:
- >>> round(pg.epsilon(data, dv='Performance', subject='Subject',
- ... within=['Metric']), 3)
+ >>> round(pg.epsilon(data, dv="Performance", subject="Subject", within=["Metric"]), 3)
0.969
The epsilon value is very close to 1, meaning that there is no major
@@ -670,15 +673,14 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"):
Now, let's calculate the epsilon for the interaction between the two
repeated measures factor:
- >>> round(pg.epsilon(data, dv='Performance', subject='Subject',
- ... within=['Time', 'Metric']), 3)
+ >>> round(pg.epsilon(data, dv="Performance", subject="Subject", within=["Time", "Metric"]), 3)
0.727
Alternatively, we could use a wide-format dataframe with two column
levels:
>>> # Pivot from long-format to wide-format
- >>> piv = data.pivot(index='Subject', columns=['Time', 'Metric'], values='Performance')
+ >>> piv = data.pivot(index="Subject", columns=["Time", "Metric"], values="Performance")
>>> piv.head()
Time Pre Post
Metric Product Client Action Product Client Action
@@ -872,21 +874,25 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha
>>> import pandas as pd
>>> import pingouin as pg
- >>> data = pd.DataFrame({'A': [2.2, 3.1, 4.3, 4.1, 7.2],
- ... 'B': [1.1, 2.5, 4.1, 5.2, 6.4],
- ... 'C': [8.2, 4.5, 3.4, 6.2, 7.2]})
+ >>> data = pd.DataFrame(
+ ... {
+ ... "A": [2.2, 3.1, 4.3, 4.1, 7.2],
+ ... "B": [1.1, 2.5, 4.1, 5.2, 6.4],
+ ... "C": [8.2, 4.5, 3.4, 6.2, 7.2],
+ ... }
+ ... )
>>> spher, W, chisq, dof, pval = pg.sphericity(data)
>>> print(spher, round(W, 3), round(chisq, 3), dof, round(pval, 3))
True 0.21 4.677 2 0.096
John, Nagao and Sugiura (JNS) test
- >>> round(pg.sphericity(data, method='jns')[-1], 3) # P-value only
+ >>> round(pg.sphericity(data, method="jns")[-1], 3) # P-value only
0.046
Now using a long-format dataframe
- >>> data = pg.read_dataset('rm_anova2')
+ >>> data = pg.read_dataset("rm_anova2")
>>> data.head()
Subject Time Metric Performance
0 1 Pre Product 13
@@ -897,8 +903,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha
Let's first test sphericity for the *Time* within-subject factor
- >>> pg.sphericity(data, dv='Performance', subject='Subject',
- ... within='Time')
+ >>> pg.sphericity(data, dv="Performance", subject="Subject", within="Time")
(True, nan, nan, 1, 1.0)
Since *Time* has only two levels (Pre and Post), the sphericity assumption
@@ -906,8 +911,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha
The *Metric* factor, however, has three levels:
- >>> round(pg.sphericity(data, dv='Performance', subject='Subject',
- ... within=['Metric'])[-1], 3)
+ >>> round(pg.sphericity(data, dv="Performance", subject="Subject", within=["Metric"])[-1], 3)
0.878
The p-value value is very large, and the test therefore indicates that
@@ -918,9 +922,9 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha
if at least one of the two within-subject factors has no more than two
levels.
- >>> spher, _, chisq, dof, pval = pg.sphericity(data, dv='Performance',
- ... subject='Subject',
- ... within=['Time', 'Metric'])
+ >>> spher, _, chisq, dof, pval = pg.sphericity(
+ ... data, dv="Performance", subject="Subject", within=["Time", "Metric"]
+ ... )
>>> print(spher, round(chisq, 3), dof, round(pval, 3))
True 3.763 2 0.152
@@ -931,7 +935,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha
levels:
>>> # Pivot from long-format to wide-format
- >>> piv = data.pivot(index='Subject', columns=['Time', 'Metric'], values='Performance')
+ >>> piv = data.pivot(index="Subject", columns=["Time", "Metric"], values="Performance")
>>> piv.head()
Time Pre Post
Metric Product Client Action Product Client Action
diff --git a/src/pingouin/effsize.py b/src/pingouin/effsize.py
index 1e26457e..317af5f2 100644
--- a/src/pingouin/effsize.py
+++ b/src/pingouin/effsize.py
@@ -1,8 +1,10 @@
# Author: Raphael Vallat
# Date: April 2018
import warnings
+
import numpy as np
from scipy.stats import pearsonr
+
from pingouin.utils import _check_eftype, remove_na
# from pingouin.distribution import homoscedasticity
@@ -124,15 +126,15 @@ def compute_esci(
>>> x = [3, 4, 6, 7, 5, 6, 7, 3, 5, 4, 2]
>>> y = [4, 6, 6, 7, 6, 5, 5, 2, 3, 4, 1]
>>> nx, ny = len(x), len(y)
- >>> stat = pg.compute_effsize(x, y, eftype='r')
- >>> ci = pg.compute_esci(stat=stat, nx=nx, ny=ny, eftype='r')
+ >>> stat = pg.compute_effsize(x, y, eftype="r")
+ >>> ci = pg.compute_esci(stat=stat, nx=nx, ny=ny, eftype="r")
>>> print(round(stat, 4), ci)
0.7468 [0.27 0.93]
2. Confidence interval of a Cohen d
- >>> stat = pg.compute_effsize(x, y, eftype='cohen')
- >>> ci = pg.compute_esci(stat, nx=nx, ny=ny, eftype='cohen', decimals=3)
+ >>> stat = pg.compute_effsize(x, y, eftype="cohen")
+ >>> ci = pg.compute_esci(stat, nx=nx, ny=ny, eftype="cohen", decimals=3)
>>> print(round(stat, 4), ci)
0.1538 [-0.737 1.045]
"""
@@ -288,7 +290,7 @@ def compute_bootci(
>>> x = rng.normal(loc=4, scale=2, size=100)
>>> y = rng.normal(loc=3, scale=1, size=100)
>>> stat = np.corrcoef(x, y)[0][1]
- >>> ci = pg.compute_bootci(x, y, func='pearson', paired=True, seed=42, decimals=4)
+ >>> ci = pg.compute_bootci(x, y, func="pearson", paired=True, seed=42, decimals=4)
>>> print(round(stat, 4), ci)
0.0945 [-0.098 0.2738]
@@ -296,15 +298,21 @@ def compute_bootci(
>>> from scipy.stats import bootstrap
>>> bt_scipy = bootstrap(
- ... data=(x, y), statistic=lambda x, y: np.corrcoef(x, y)[0][1],
- ... method="basic", vectorized=False, n_resamples=2000, paired=True, random_state=42)
+ ... data=(x, y),
+ ... statistic=lambda x, y: np.corrcoef(x, y)[0][1],
+ ... method="basic",
+ ... vectorized=False,
+ ... n_resamples=2000,
+ ... paired=True,
+ ... random_state=42,
+ ... )
>>> np.round(bt_scipy.confidence_interval, 4)
array([-0.0952, 0.2883])
2. Bootstrapped 95% confidence interval of a Cohen d
- >>> stat = pg.compute_effsize(x, y, eftype='cohen')
- >>> ci = pg.compute_bootci(x, y, func='cohen', seed=42, decimals=3)
+ >>> stat = pg.compute_effsize(x, y, eftype="cohen")
+ >>> ci = pg.compute_bootci(x, y, func="cohen", seed=42, decimals=3)
>>> print(round(stat, 4), ci)
0.7009 [0.403 1.009]
@@ -312,7 +320,7 @@ def compute_bootci(
>>> import numpy as np
>>> stat = np.std(x, ddof=1)
- >>> ci = pg.compute_bootci(x, func='std', seed=123)
+ >>> ci = pg.compute_bootci(x, func="std", seed=123)
>>> print(round(stat, 4), ci)
1.5534 [1.38 1.8 ]
@@ -321,16 +329,16 @@ def compute_bootci(
>>> def std(x, axis):
... return np.std(x, ddof=1, axis=axis)
- >>> bt_scipy = bootstrap(data=(x, ), statistic=std, n_resamples=2000, random_state=123)
+ >>> bt_scipy = bootstrap(data=(x,), statistic=std, n_resamples=2000, random_state=123)
>>> np.round(bt_scipy.confidence_interval, 2)
array([1.39, 1.81])
Changing the confidence intervals type in Pingouin
- >>> pg.compute_bootci(x, func='std', seed=123, method="norm")
+ >>> pg.compute_bootci(x, func="std", seed=123, method="norm")
array([1.37, 1.76])
- >>> pg.compute_bootci(x, func='std', seed=123, method="percentile")
+ >>> pg.compute_bootci(x, func="std", seed=123, method="percentile")
array([1.35, 1.75])
4. Bootstrapped confidence interval using a custom univariate function
@@ -352,11 +360,14 @@ def compute_bootci(
We can also get the bootstrapped distribution
>>> ci, bt = pg.compute_bootci(x, y2, func=mean_diff, n_boot=10000, return_dist=True, seed=9)
- >>> print(f"The bootstrap distribution has {bt.size} samples. The mean and standard "
- ... f"{bt.mean():.4f} ± {bt.std():.4f}")
+ >>> print(
+ ... f"The bootstrap distribution has {bt.size} samples. The mean and standard "
+ ... f"{bt.mean():.4f} ± {bt.std():.4f}"
+ ... )
The bootstrap distribution has 10000 samples. The mean and standard 0.8807 ± 0.1704
"""
from inspect import isfunction, isroutine
+
from scipy.stats import norm
# Check other arguments
@@ -569,27 +580,27 @@ def convert_effsize(ef, input_type, output_type, nx=None, ny=None):
1. Convert from Cohen d to eta-square
>>> import pingouin as pg
- >>> d = .45
- >>> eta = pg.convert_effsize(d, 'cohen', 'eta_square')
+ >>> d = 0.45
+ >>> eta = pg.convert_effsize(d, "cohen", "eta_square")
>>> print(eta)
0.048185603807257595
2. Convert from Cohen d to Hegdes g (requires the sample sizes of each
group)
- >>> pg.convert_effsize(.45, 'cohen', 'hedges', nx=10, ny=10)
+ >>> pg.convert_effsize(0.45, "cohen", "hedges", nx=10, ny=10)
0.4309859154929578
3. Convert a point-biserial correlation to Cohen d
>>> rpb = 0.40
- >>> d = pg.convert_effsize(rpb, 'pointbiserialr', 'cohen')
+ >>> d = pg.convert_effsize(rpb, "pointbiserialr", "cohen")
>>> print(d)
0.8728715609439696
4. Reverse operation: convert Cohen d to a point-biserial correlation
- >>> pg.convert_effsize(d, 'cohen', 'pointbiserialr')
+ >>> pg.convert_effsize(d, "cohen", "pointbiserialr")
0.4000000000000001
"""
it = input_type.lower()
@@ -738,32 +749,32 @@ def compute_effsize(x, y, paired=False, eftype="cohen"):
>>> import pingouin as pg
>>> x = [1, 2, 3, 4]
>>> y = [3, 4, 5, 6, 7]
- >>> pg.compute_effsize(x, y, paired=False, eftype='cohen')
+ >>> pg.compute_effsize(x, y, paired=False, eftype="cohen")
-1.707825127659933
The sign of the Cohen d will be opposite if we reverse the order of
``x`` and ``y``:
- >>> pg.compute_effsize(y, x, paired=False, eftype='cohen')
+ >>> pg.compute_effsize(y, x, paired=False, eftype="cohen")
1.707825127659933
2. Hedges g from two paired samples.
>>> x = [1, 2, 3, 4, 5, 6, 7]
>>> y = [1, 3, 5, 7, 9, 11, 13]
- >>> pg.compute_effsize(x, y, paired=True, eftype='hedges')
+ >>> pg.compute_effsize(x, y, paired=True, eftype="hedges")
-0.8222477210374874
3. Common Language Effect Size.
- >>> pg.compute_effsize(x, y, eftype='cles')
+ >>> pg.compute_effsize(x, y, eftype="cles")
0.2857142857142857
In other words, there are ~29% of pairs where ``x`` is higher than ``y``,
which means that there are ~71% of pairs where ``x`` is *lower* than ``y``.
This can be easily verified by changing the order of ``x`` and ``y``:
- >>> pg.compute_effsize(y, x, eftype='cles')
+ >>> pg.compute_effsize(y, x, eftype="cles")
0.7142857142857143
"""
# Check arguments
@@ -850,14 +861,14 @@ def compute_effsize_from_t(tval, nx=None, ny=None, N=None, eftype="cohen"):
>>> from pingouin import compute_effsize_from_t
>>> tval, nx, ny = 2.90, 35, 25
- >>> d = compute_effsize_from_t(tval, nx=nx, ny=ny, eftype='cohen')
+ >>> d = compute_effsize_from_t(tval, nx=nx, ny=ny, eftype="cohen")
>>> print(d)
0.7593982580212534
2. Compute effect size when only total sample size is known (nx+ny)
>>> tval, N = 2.90, 60
- >>> d = compute_effsize_from_t(tval, N=N, eftype='cohen')
+ >>> d = compute_effsize_from_t(tval, N=N, eftype="cohen")
>>> print(d)
0.7487767802667672
"""
diff --git a/src/pingouin/equivalence.py b/src/pingouin/equivalence.py
index 70f5ee58..b2460c6d 100644
--- a/src/pingouin/equivalence.py
+++ b/src/pingouin/equivalence.py
@@ -2,10 +2,10 @@
# Date: July 2019
import numpy as np
import pandas as pd
+
from pingouin.parametric import ttest
from pingouin.utils import _postprocess_dataframe
-
__all__ = ["tost"]
diff --git a/src/pingouin/multicomp.py b/src/pingouin/multicomp.py
index 04415287..4b63bb42 100644
--- a/src/pingouin/multicomp.py
+++ b/src/pingouin/multicomp.py
@@ -80,8 +80,8 @@ def fdr(pvals, alpha=0.05, method="fdr_bh"):
FDR correction of an array of p-values
>>> import pingouin as pg
- >>> pvals = [.50, .003, .32, .054, .0003]
- >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh', alpha=.05)
+ >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003]
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="fdr_bh", alpha=0.05)
>>> print(reject, pvals_corr)
[False True False False True] [0.5 0.0075 0.4 0.09 0.0015]
"""
@@ -178,8 +178,8 @@ def bonf(pvals, alpha=0.05):
Examples
--------
>>> import pingouin as pg
- >>> pvals = [.50, .003, .32, .054, .0003]
- >>> reject, pvals_corr = pg.multicomp(pvals, method='bonf', alpha=.05)
+ >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003]
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="bonf", alpha=0.05)
>>> print(reject, pvals_corr)
[False True False False True] [1. 0.015 1. 0.27 0.0015]
"""
@@ -251,8 +251,8 @@ def holm(pvals, alpha=0.05):
Examples
--------
>>> import pingouin as pg
- >>> pvals = [.50, .003, .32, .054, .0003]
- >>> reject, pvals_corr = pg.multicomp(pvals, method='holm', alpha=.05)
+ >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003]
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="holm", alpha=0.05)
>>> print(reject, pvals_corr)
[False True False False True] [0.64 0.012 0.64 0.162 0.0015]
"""
@@ -327,8 +327,8 @@ def sidak(pvals, alpha=0.05):
--------
>>> import numpy as np
>>> import pingouin as pg
- >>> pvals = [.50, .003, .32, .054, .0003]
- >>> reject, pvals_corr = pg.multicomp(pvals, method='sidak', alpha=.05)
+ >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003]
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="sidak", alpha=0.05)
>>> print(reject, np.round(pvals_corr, 4))
[False True False False True] [0.9688 0.0149 0.8546 0.2424 0.0015]
"""
@@ -459,8 +459,8 @@ def multicomp(pvals, alpha=0.05, method="holm"):
FDR correction of an array of p-values
>>> import pingouin as pg
- >>> pvals = [.50, .003, .32, .054, .0003]
- >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh')
+ >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003]
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="fdr_bh")
>>> print(reject, pvals_corr)
[False True False False True] [0.5 0.0075 0.4 0.09 0.0015]
@@ -468,7 +468,7 @@ def multicomp(pvals, alpha=0.05, method="holm"):
>>> import numpy as np
>>> pvals[2] = np.nan
- >>> reject, pvals_corr = pg.multicomp(pvals, method='holm')
+ >>> reject, pvals_corr = pg.multicomp(pvals, method="holm")
>>> print(reject, pvals_corr)
[False True False False True] [0.5 0.009 nan 0.108 0.0012]
"""
diff --git a/src/pingouin/multivariate.py b/src/pingouin/multivariate.py
index f2ec6da0..f0ec4c99 100644
--- a/src/pingouin/multivariate.py
+++ b/src/pingouin/multivariate.py
@@ -1,7 +1,9 @@
+from collections import namedtuple
+
import numpy as np
import pandas as pd
-from collections import namedtuple
-from pingouin.utils import remove_na, _postprocess_dataframe
+
+from pingouin.utils import _postprocess_dataframe, remove_na
__all__ = ["multivariate_normality", "multivariate_ttest", "box_m"]
@@ -55,9 +57,9 @@ def multivariate_normality(X, alpha=0.05):
Examples
--------
>>> import pingouin as pg
- >>> data = pg.read_dataset('multivariate')
- >>> X = data[['Fever', 'Pressure', 'Aches']]
- >>> pg.multivariate_normality(X, alpha=.05)
+ >>> data = pg.read_dataset("multivariate")
+ >>> X = data[["Fever", "Pressure", "Aches"]]
+ >>> pg.multivariate_normality(X, alpha=0.05)
HZResults(hz=0.540086101851555, pval=0.7173686509622386, normal=True)
"""
from scipy.stats import lognorm
@@ -175,10 +177,10 @@ def multivariate_ttest(X, Y=None, paired=False):
Two-sample independent Hotelling T-squared test
>>> import pingouin as pg
- >>> data = pg.read_dataset('multivariate')
- >>> dvs = ['Fever', 'Pressure', 'Aches']
- >>> X = data[data['Condition'] == 'Drug'][dvs]
- >>> Y = data[data['Condition'] == 'Placebo'][dvs]
+ >>> data = pg.read_dataset("multivariate")
+ >>> dvs = ["Fever", "Pressure", "Aches"]
+ >>> X = data[data["Condition"] == "Drug"][dvs]
+ >>> Y = data[data["Condition"] == "Placebo"][dvs]
>>> pg.multivariate_ttest(X, Y)
T2 F df1 df2 pval
hotelling 4.228679 1.326644 3 32 0.282898
@@ -319,9 +321,8 @@ def box_m(data, dvs, group, alpha=0.001):
>>> import pandas as pd
>>> import pingouin as pg
>>> from scipy.stats import multivariate_normal as mvn
- >>> data = pd.DataFrame(mvn.rvs(size=(100, 3), random_state=42),
- ... columns=['A', 'B', 'C'])
- >>> data['group'] = [1] * 25 + [2] * 25 + [3] * 25 + [4] * 25
+ >>> data = pd.DataFrame(mvn.rvs(size=(100, 3), random_state=42), columns=["A", "B", "C"])
+ >>> data["group"] = [1] * 25 + [2] * 25 + [3] * 25 + [4] * 25
>>> data.head()
A B C group
0 0.496714 -0.138264 0.647689 1
@@ -330,16 +331,15 @@ def box_m(data, dvs, group, alpha=0.001):
3 0.542560 -0.463418 -0.465730 1
4 0.241962 -1.913280 -1.724918 1
- >>> pg.box_m(data, dvs=['A', 'B', 'C'], group='group')
+ >>> pg.box_m(data, dvs=["A", "B", "C"], group="group")
Chi2 df pval equal_cov
box 11.634185 18.0 0.865537 True
2. Box M test with 3 dependent variables of 2 groups (unequal sample size)
- >>> data = pd.DataFrame(mvn.rvs(size=(30, 2), random_state=42),
- ... columns=['A', 'B'])
- >>> data['group'] = [1] * 20 + [2] * 10
- >>> pg.box_m(data, dvs=['A', 'B'], group='group')
+ >>> data = pd.DataFrame(mvn.rvs(size=(30, 2), random_state=42), columns=["A", "B"])
+ >>> data["group"] = [1] * 20 + [2] * 10
+ >>> pg.box_m(data, dvs=["A", "B"], group="group")
Chi2 df pval equal_cov
box 0.706709 3.0 0.871625 True
"""
diff --git a/src/pingouin/nonparametric.py b/src/pingouin/nonparametric.py
index 348485ad..909b428c 100644
--- a/src/pingouin/nonparametric.py
+++ b/src/pingouin/nonparametric.py
@@ -1,9 +1,10 @@
# Author: Raphael Vallat
# Date: May 2018
-import scipy
import numpy as np
import pandas as pd
-from pingouin import remove_na, _check_dataframe, _postprocess_dataframe
+import scipy
+
+from pingouin import _check_dataframe, _postprocess_dataframe, remove_na
__all__ = [
"mad",
@@ -84,7 +85,7 @@ def mad(a, normalize=True, axis=0):
Compare with Scipy >= 1.3
>>> from scipy.stats import median_abs_deviation
- >>> median_abs_deviation(w, scale='normal', axis=None, nan_policy='omit')
+ >>> median_abs_deviation(w, scale="normal", axis=None, nan_policy="omit")
1.1607762457644006
"""
a = np.asarray(a)
@@ -142,7 +143,7 @@ def madmedianrule(a):
Examples
--------
>>> import pingouin as pg
- >>> a = [-1.09, 1., 0.28, -1.51, -0.58, 6.61, -2.43, -0.43]
+ >>> a = [-1.09, 1.0, 0.28, -1.51, -0.58, 6.61, -2.43, -0.43]
>>> pg.madmedianrule(a)
array([False, False, False, False, False, True, False, False])
"""
@@ -238,29 +239,29 @@ def mwu(x, y, alternative="two-sided", **kwargs):
>>> np.random.seed(123)
>>> x = np.random.uniform(low=0, high=1, size=20)
>>> y = np.random.uniform(low=0.2, high=1.2, size=20)
- >>> pg.mwu(x, y, alternative='two-sided')
+ >>> pg.mwu(x, y, alternative="two-sided")
U_val alternative p_val RBC CLES
MWU 97.0 two-sided 0.00556 -0.515 0.2425
Compare with SciPy
>>> import scipy
- >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True, alternative='two-sided')
+ >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True, alternative="two-sided")
MannwhitneyuResult(statistic=97.0, pvalue=0.0055604599321374135)
One-sided test
- >>> pg.mwu(x, y, alternative='greater')
+ >>> pg.mwu(x, y, alternative="greater")
U_val alternative p_val RBC CLES
MWU 97.0 greater 0.997442 -0.515 0.2425
- >>> pg.mwu(x, y, alternative='less')
+ >>> pg.mwu(x, y, alternative="less")
U_val alternative p_val RBC CLES
MWU 97.0 less 0.00278 -0.515 0.7575
Passing keyword arguments to :py:func:`scipy.stats.mannwhitneyu`:
- >>> pg.mwu(x, y, alternative='two-sided', method='exact')
+ >>> pg.mwu(x, y, alternative="two-sided", method="exact")
U_val alternative p_val RBC CLES
MWU 97.0 two-sided 0.004681 -0.515 0.2425
@@ -408,7 +409,7 @@ def wilcoxon(x, y=None, alternative="two-sided", **kwargs):
>>> import pingouin as pg
>>> x = np.array([20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13])
>>> y = np.array([38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16])
- >>> pg.wilcoxon(x, y, alternative='two-sided')
+ >>> pg.wilcoxon(x, y, alternative="two-sided")
W_val alternative p_val RBC CLES
Wilcoxon 20.5 two-sided 0.288086 -0.378788 0.395833
@@ -428,17 +429,17 @@ def wilcoxon(x, y=None, alternative="two-sided", **kwargs):
The p-value is not exactly similar to Pingouin. This is because Pingouin automatically applies
a continuity correction. Disabling it gives the same p-value as scipy:
- >>> pg.wilcoxon(x, y, alternative='two-sided', correction=False)
+ >>> pg.wilcoxon(x, y, alternative="two-sided", correction=False)
W_val alternative p_val RBC CLES
Wilcoxon 20.5 two-sided 0.288086 -0.378788 0.395833
One-sided test
- >>> pg.wilcoxon(x, y, alternative='greater')
+ >>> pg.wilcoxon(x, y, alternative="greater")
W_val alternative p_val RBC CLES
Wilcoxon 20.5 greater 0.865723 -0.378788 0.395833
- >>> pg.wilcoxon(x, y, alternative='less')
+ >>> pg.wilcoxon(x, y, alternative="less")
W_val alternative p_val RBC CLES
Wilcoxon 20.5 less 0.144043 0.378788 0.604167
"""
@@ -540,8 +541,8 @@ def kruskal(data=None, dv=None, between=None, detailed=False):
Compute the Kruskal-Wallis H-test for independent samples.
>>> from pingouin import kruskal, read_dataset
- >>> df = read_dataset('anova')
- >>> kruskal(data=df, dv='Pain threshold', between='Hair color')
+ >>> df = read_dataset("anova")
+ >>> kruskal(data=df, dv="Pain threshold", between="Hair color")
Source ddof1 H p_unc
Kruskal Hair color 3 10.58863 0.014172
"""
@@ -658,10 +659,26 @@ def friedman(data=None, dv=None, within=None, subject=None, method="chisq"):
>>> import pandas as pd
>>> import pingouin as pg
- >>> df = pd.DataFrame({
- ... 'white': {0: 10, 1: 8, 2: 7, 3: 9, 4: 7, 5: 4, 6: 5, 7: 6, 8: 5, 9: 10, 10: 4, 11: 7},
- ... 'red': {0: 7, 1: 5, 2: 8, 3: 6, 4: 5, 5: 7, 6: 9, 7: 6, 8: 4, 9: 6, 10: 7, 11: 3},
- ... 'rose': {0: 8, 1: 5, 2: 6, 3: 4, 4: 7, 5: 5, 6: 3, 7: 7, 8: 6, 9: 4, 10: 4, 11: 3}})
+ >>> df = pd.DataFrame(
+ ... {
+ ... "white": {
+ ... 0: 10,
+ ... 1: 8,
+ ... 2: 7,
+ ... 3: 9,
+ ... 4: 7,
+ ... 5: 4,
+ ... 6: 5,
+ ... 7: 6,
+ ... 8: 5,
+ ... 9: 10,
+ ... 10: 4,
+ ... 11: 7,
+ ... },
+ ... "red": {0: 7, 1: 5, 2: 8, 3: 6, 4: 5, 5: 7, 6: 9, 7: 6, 8: 4, 9: 6, 10: 7, 11: 3},
+ ... "rose": {0: 8, 1: 5, 2: 6, 3: 4, 4: 7, 5: 5, 6: 3, 7: 7, 8: 6, 9: 4, 10: 4, 11: 3},
+ ... }
+ ... )
>>> pg.friedman(df)
Source W ddof1 Q p_unc
Friedman Within 0.083333 2 2.0 0.367879
@@ -808,8 +825,8 @@ def cochran(data=None, dv=None, within=None, subject=None):
Compute the Cochran Q test for repeated measurements.
>>> from pingouin import cochran, read_dataset
- >>> df = read_dataset('cochran')
- >>> cochran(data=df, dv='Energetic', within='Time', subject='Subject')
+ >>> df = read_dataset("cochran")
+ >>> cochran(data=df, dv="Energetic", within="Time", subject="Subject")
Source dof Q p_unc
cochran Time 2 6.705882 0.034981
diff --git a/src/pingouin/pairwise.py b/src/pingouin/pairwise.py
index 6a32f83f..e8fc4c1d 100644
--- a/src/pingouin/pairwise.py
+++ b/src/pingouin/pairwise.py
@@ -1,16 +1,18 @@
# Author: Raphael Vallat
# Date: April 2018
+import warnings
+from itertools import combinations, product
+
import numpy as np
import pandas as pd
import pandas_flavor as pf
-from itertools import combinations, product
+from scipy.stats import studentized_range
+
from pingouin.config import options
-from pingouin.parametric import anova
-from pingouin.multicomp import multicomp
from pingouin.effsize import compute_effsize
+from pingouin.multicomp import multicomp
+from pingouin.parametric import anova
from pingouin.utils import _check_dataframe, _flatten_list, _postprocess_dataframe
-from scipy.stats import studentized_range
-import warnings
__all__ = [
"pairwise_ttests",
@@ -203,16 +205,16 @@ def pairwise_tests(
>>> import pandas as pd
>>> import pingouin as pg
- >>> pd.set_option('display.expand_frame_repr', False)
- >>> pd.set_option('display.max_columns', 20)
- >>> df = pg.read_dataset('mixed_anova.csv')
- >>> pg.pairwise_tests(dv='Scores', between='Group', data=df).round(3)
+ >>> pd.set_option("display.expand_frame_repr", False)
+ >>> pd.set_option("display.max_columns", 20)
+ >>> df = pg.read_dataset("mixed_anova.csv")
+ >>> pg.pairwise_tests(dv="Scores", between="Group", data=df).round(3)
Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Group Control Meditation False True -2.29 178.0 two-sided 0.023 1.813 -0.34
2. One within-subject factor
- >>> post_hocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', data=df)
+ >>> post_hocs = pg.pairwise_tests(dv="Scores", within="Time", subject="Subject", data=df)
>>> post_hocs.round(3)
Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Time August January True True -1.740 59.0 two-sided 0.087 0.582 -0.328
@@ -221,8 +223,9 @@ def pairwise_tests(
3. Non-parametric pairwise paired test (wilcoxon)
- >>> pg.pairwise_tests(dv='Scores', within='Time', subject='Subject',
- ... data=df, parametric=False).round(3)
+ >>> pg.pairwise_tests(
+ ... dv="Scores", within="Time", subject="Subject", data=df, parametric=False
+ ... ).round(3)
Contrast A B Paired Parametric W_val alternative p_unc hedges
0 Time August January True False 716.0 two-sided 0.144 -0.328
1 Time August June True False 564.0 two-sided 0.010 -0.483
@@ -230,8 +233,9 @@ def pairwise_tests(
4. Mixed design (within and between) with bonferroni-corrected p-values
- >>> posthocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject',
- ... between='Group', padjust='bonf', data=df)
+ >>> posthocs = pg.pairwise_tests(
+ ... dv="Scores", within="Time", subject="Subject", between="Group", padjust="bonf", data=df
+ ... )
>>> posthocs.round(3)
Contrast Time A B Paired Parametric T dof alternative p_unc p_corr p_adjust BF10 hedges
0 Time - August January True True -1.740 59.0 two-sided 0.087 0.261 bonf 0.582 -0.328
@@ -244,7 +248,7 @@ def pairwise_tests(
5. Two between-subject factors. The order of the ``between`` factors matters!
- >>> pg.pairwise_tests(dv='Scores', between=['Group', 'Time'], data=df).round(3)
+ >>> pg.pairwise_tests(dv="Scores", between=["Group", "Time"], data=df).round(3)
Contrast Group A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Group - Control Meditation False True -2.290 178.0 two-sided 0.023 1.813 -0.340
1 Time - August January False True -1.806 118.0 two-sided 0.074 0.839 -0.328
@@ -259,16 +263,17 @@ def pairwise_tests(
6. Same but without the interaction, and using a directional test
- >>> df.pairwise_tests(dv='Scores', between=['Group', 'Time'], alternative="less",
- ... interaction=False).round(3)
+ >>> df.pairwise_tests(
+ ... dv="Scores", between=["Group", "Time"], alternative="less", interaction=False
+ ... ).round(3)
Contrast A B Paired Parametric T dof alternative p_unc hedges
0 Group Control Meditation False True -2.290 178.0 less 0.012 -0.340
1 Time August January False True -1.806 118.0 less 0.037 -0.328
2 Time August June False True -2.660 118.0 less 0.004 -0.483
3 Time January June False True -0.934 118.0 less 0.176 -0.170
"""
+ from .nonparametric import mwu, wilcoxon
from .parametric import ttest
- from .nonparametric import wilcoxon, mwu
# Safety checks
data = _check_dataframe(
@@ -660,8 +665,8 @@ def ptests(
>>> import pandas as pd
>>> import pingouin as pg
>>> # Load an example dataset of personality dimensions
- >>> df = pg.read_dataset('pairwise_corr').iloc[:30, 1:]
- >>> df.columns = ["N", "E", "O", 'A', "C"]
+ >>> df = pg.read_dataset("pairwise_corr").iloc[:30, 1:]
+ >>> df.columns = ["N", "E", "O", "A", "C"]
>>> # Add some missing values
>>> df.iloc[[2, 5, 20], 2] = np.nan
>>> df.iloc[[1, 4, 10], 3] = np.nan
@@ -720,6 +725,7 @@ def ptests(
C -4.251 3.595 3.785 3.765 -
"""
from itertools import combinations
+
from numpy import format_float_positional as ffp
from scipy.stats import ttest_ind, ttest_rel
@@ -873,8 +879,8 @@ def pairwise_tukey(data=None, dv=None, between=None, effsize="hedges"):
Pairwise Tukey post-hocs on the Penguins dataset.
>>> import pingouin as pg
- >>> df = pg.read_dataset('penguins')
- >>> df.pairwise_tukey(dv='body_mass_g', between='species').round(3)
+ >>> df = pg.read_dataset("penguins")
+ >>> df.pairwise_tukey(dv="body_mass_g", between="species").round(3)
A B mean(A) mean(B) diff se T p_tukey hedges
0 Adelie Chinstrap 3700.662 3733.088 -32.426 67.512 -0.480 0.881 -0.074
1 Adelie Gentoo 3700.662 5076.016 -1375.354 56.148 -24.495 0.000 -2.860
@@ -1038,9 +1044,8 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize="hedges"):
Pairwise Games-Howell post-hocs on the Penguins dataset.
>>> import pingouin as pg
- >>> df = pg.read_dataset('penguins')
- >>> pg.pairwise_gameshowell(data=df, dv='body_mass_g',
- ... between='species').round(3)
+ >>> df = pg.read_dataset("penguins")
+ >>> pg.pairwise_gameshowell(data=df, dv="body_mass_g", between="species").round(3)
A B mean(A) mean(B) diff se T df pval hedges
0 Adelie Chinstrap 3700.662 3733.088 -32.426 59.706 -0.543 152.455 0.85 -0.074
1 Adelie Gentoo 3700.662 5076.016 -1375.354 58.811 -23.386 249.643 0.00 -2.860
@@ -1242,10 +1247,10 @@ def pairwise_corr(
>>> import pandas as pd
>>> import pingouin as pg
- >>> pd.set_option('display.expand_frame_repr', False)
- >>> pd.set_option('display.max_columns', 20)
- >>> data = pg.read_dataset('pairwise_corr').iloc[:, 1:]
- >>> pg.pairwise_corr(data, method='spearman', alternative='greater', padjust='bonf').round(3)
+ >>> pd.set_option("display.expand_frame_repr", False)
+ >>> pd.set_option("display.max_columns", 20)
+ >>> data = pg.read_dataset("pairwise_corr").iloc[:, 1:]
+ >>> pg.pairwise_corr(data, method="spearman", alternative="greater", padjust="bonf").round(3)
X Y method alternative n r CI95 p_unc p_corr p_adjust power
0 Neuroticism Extraversion spearman greater 500 -0.325 [-0.39, 1.0] 1.000 1.000 bonf 0.000
1 Neuroticism Openness spearman greater 500 -0.028 [-0.1, 1.0] 0.735 1.000 bonf 0.012
@@ -1260,8 +1265,9 @@ def pairwise_corr(
2. Robust two-sided biweight midcorrelation with uncorrected p-values
- >>> pcor = pg.pairwise_corr(data, columns=['Openness', 'Extraversion',
- ... 'Neuroticism'], method='bicor')
+ >>> pcor = pg.pairwise_corr(
+ ... data, columns=["Openness", "Extraversion", "Neuroticism"], method="bicor"
+ ... )
>>> pcor.round(3)
X Y method alternative n r CI95 p_unc power
0 Openness Extraversion bicor two-sided 500 0.247 [0.16, 0.33] 0.000 1.000
@@ -1270,7 +1276,7 @@ def pairwise_corr(
3. One-versus-all pairwise correlations
- >>> pg.pairwise_corr(data, columns=['Neuroticism']).round(3)
+ >>> pg.pairwise_corr(data, columns=["Neuroticism"]).round(3)
X Y method alternative n r CI95 p_unc BF10 power
0 Neuroticism Extraversion pearson two-sided 500 -0.350 [-0.42, -0.27] 0.000 6.765e+12 1.000
1 Neuroticism Openness pearson two-sided 500 -0.010 [-0.1, 0.08] 0.817 0.058 0.056
@@ -1279,7 +1285,7 @@ def pairwise_corr(
4. Pairwise correlations between two lists of columns (cartesian product)
- >>> columns = [['Neuroticism', 'Extraversion'], ['Openness']]
+ >>> columns = [["Neuroticism", "Extraversion"], ["Openness"]]
>>> pg.pairwise_corr(data, columns).round(3)
X Y method alternative n r CI95 p_unc BF10 power
0 Neuroticism Openness pearson two-sided 500 -0.010 [-0.1, 0.08] 0.817 0.058 0.056
@@ -1287,11 +1293,11 @@ def pairwise_corr(
5. As a Pandas method
- >>> pcor = data.pairwise_corr(covar='Neuroticism', method='spearman')
+ >>> pcor = data.pairwise_corr(covar="Neuroticism", method="spearman")
6. Pairwise partial correlation
- >>> pg.pairwise_corr(data, covar=['Neuroticism', 'Openness'])
+ >>> pg.pairwise_corr(data, covar=["Neuroticism", "Openness"])
X Y method covar alternative n r CI95 p_unc
0 Extraversion Agreeableness pearson ['Neuroticism', 'Openness'] two-sided 500 -0.038737 [-0.13, 0.05] 0.388361
1 Extraversion Conscientiousness pearson ['Neuroticism', 'Openness'] two-sided 500 -0.071427 [-0.16, 0.02] 0.111389
@@ -1299,7 +1305,7 @@ def pairwise_corr(
7. Pairwise partial correlation matrix using :py:func:`pingouin.pcorr`
- >>> data[['Neuroticism', 'Openness', 'Extraversion']].pcorr().round(3)
+ >>> data[["Neuroticism", "Openness", "Extraversion"]].pcorr().round(3)
Neuroticism Openness Extraversion
Neuroticism 1.000 0.092 -0.360
Openness 0.092 1.000 0.281
@@ -1307,7 +1313,7 @@ def pairwise_corr(
8. Correlation matrix with p-values using :py:func:`pingouin.rcorr`
- >>> data[['Neuroticism', 'Openness', 'Extraversion']].rcorr()
+ >>> data[["Neuroticism", "Openness", "Extraversion"]].rcorr()
Neuroticism Openness Extraversion
Neuroticism - ***
Openness -0.01 - ***
diff --git a/src/pingouin/parametric.py b/src/pingouin/parametric.py
index 0329771c..4369b78c 100644
--- a/src/pingouin/parametric.py
+++ b/src/pingouin/parametric.py
@@ -1,18 +1,20 @@
# Author: Raphael Vallat
import warnings
from collections.abc import Iterable
+
import numpy as np
import pandas as pd
-from scipy.stats import f
import pandas_flavor as pf
+from scipy.stats import f
+
from pingouin import (
_check_dataframe,
- remove_na,
_flatten_list,
+ _postprocess_dataframe,
bayesfactor_ttest,
epsilon,
+ remove_na,
sphericity,
- _postprocess_dataframe,
)
__all__ = ["ttest", "rm_anova", "anova", "welch_anova", "mixed_anova", "ancova"]
@@ -150,14 +152,14 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70
2. One sided paired T-test.
>>> pre = [5.5, 2.4, 6.8, 9.6, 4.2]
- >>> post = [6.4, 3.4, 6.4, 11., 4.8]
- >>> ttest(pre, post, paired=True, alternative='less').round(2)
+ >>> post = [6.4, 3.4, 6.4, 11.0, 4.8]
+ >>> ttest(pre, post, paired=True, alternative="less").round(2)
T dof alternative p_val CI95 cohen_d power
T_test -2.31 4 less 0.04 [-inf, -0.05] 0.25 0.12
Now testing the opposite alternative hypothesis
- >>> ttest(pre, post, paired=True, alternative='greater').round(2)
+ >>> ttest(pre, post, paired=True, alternative="greater").round(2)
T dof alternative p_val CI95 cohen_d power
T_test -2.31 4 greater 0.96 [-1.35, inf] 0.25 0.02
@@ -165,7 +167,7 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70
>>> import numpy as np
>>> pre = [5.5, 2.4, np.nan, 9.6, 4.2]
- >>> post = [6.4, 3.4, 6.4, 11., 4.8]
+ >>> post = [6.4, 3.4, 6.4, 11.0, 4.8]
>>> ttest(pre, post, paired=True).round(3)
T dof alternative p_val CI95 cohen_d BF10 power
T_test -5.902 3 two-sided 0.01 [-1.5, -0.45] 0.306 7.169 0.073
@@ -205,14 +207,14 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70
>>> np.round(ttest_ind(x, y, equal_var=True), 6) # T value and p-value
array([1.971859, 0.057056])
"""
- from scipy.stats import t, ttest_rel, ttest_ind, ttest_1samp
+ from scipy.stats import t, ttest_1samp, ttest_ind, ttest_rel
try: # pragma: no cover
- from scipy.stats._stats_py import _unequal_var_ttest_denom, _equal_var_ttest_denom
+ from scipy.stats._stats_py import _equal_var_ttest_denom, _unequal_var_ttest_denom
except ImportError: # pragma: no cover
# Fallback for scipy<1.8.0
- from scipy.stats.stats import _unequal_var_ttest_denom, _equal_var_ttest_denom
- from pingouin import power_ttest, power_ttest2n, compute_effsize
+ from scipy.stats.stats import _equal_var_ttest_denom, _unequal_var_ttest_denom
+ from pingouin import compute_effsize, power_ttest, power_ttest2n
# Check arguments
assert alternative in [
@@ -474,7 +476,7 @@ def rm_anova(
1. One-way repeated measures ANOVA using a wide-format dataset
>>> import pingouin as pg
- >>> data = pg.read_dataset('rm_anova_wide')
+ >>> data = pg.read_dataset("rm_anova_wide")
>>> pg.rm_anova(data)
Source ddof1 ddof2 F p_unc ng2 eps
0 Within 3 24 5.200652 0.006557 0.346392 0.694329
@@ -486,9 +488,15 @@ def rm_anova(
means that we want to get the partial eta-squared effect size instead
of the default (generalized) eta-squared.
- >>> df = pg.read_dataset('rm_anova')
- >>> aov = pg.rm_anova(dv='DesireToKill', within='Disgustingness',
- ... subject='Subject', data=df, detailed=True, effsize="np2")
+ >>> df = pg.read_dataset("rm_anova")
+ >>> aov = pg.rm_anova(
+ ... dv="DesireToKill",
+ ... within="Disgustingness",
+ ... subject="Subject",
+ ... data=df,
+ ... detailed=True,
+ ... effsize="np2",
+ ... )
>>> aov.round(3)
Source SS DF MS F p_unc np2 eps
0 Disgustingness 27.485 1 27.485 12.044 0.001 0.116 1.0
@@ -496,12 +504,16 @@ def rm_anova(
3. Two-way repeated-measures ANOVA
- >>> aov = pg.rm_anova(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'],
- ... subject='Subject', data=df)
+ >>> aov = pg.rm_anova(
+ ... dv="DesireToKill",
+ ... within=["Disgustingness", "Frighteningness"],
+ ... subject="Subject",
+ ... data=df,
+ ... )
4. As a :py:class:`pandas.DataFrame` method
- >>> df.rm_anova(dv='DesireToKill', within='Disgustingness', subject='Subject', detailed=False)
+ >>> df.rm_anova(dv="DesireToKill", within="Disgustingness", subject="Subject", detailed=False)
Source ddof1 ddof2 F p_unc ng2 eps
0 Disgustingness 1 92 12.043878 0.000793 0.025784 1.0
"""
@@ -908,9 +920,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize="
One-way ANOVA
>>> import pingouin as pg
- >>> df = pg.read_dataset('anova')
- >>> aov = pg.anova(dv='Pain threshold', between='Hair color', data=df,
- ... detailed=True)
+ >>> df = pg.read_dataset("anova")
+ >>> aov = pg.anova(dv="Pain threshold", between="Hair color", data=df, detailed=True)
>>> aov.round(3)
Source SS DF MS F p_unc np2
0 Hair color 1360.726 3 453.575 6.791 0.004 0.576
@@ -921,14 +932,13 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize="
a method (= built-in function) of our pandas dataframe. In that case,
we don't have to specify ``data`` anymore.
- >>> df.anova(dv='Pain threshold', between='Hair color', detailed=False,
- ... effsize='n2')
+ >>> df.anova(dv="Pain threshold", between="Hair color", detailed=False, effsize="n2")
Source ddof1 ddof2 F p_unc n2
0 Hair color 3 15 6.791407 0.004114 0.575962
Two-way ANOVA with balanced design
- >>> data = pg.read_dataset('anova2')
+ >>> data = pg.read_dataset("anova2")
>>> data.anova(dv="Yield", between=["Blend", "Crop"]).round(3)
Source SS DF MS F p_unc np2
0 Blend 2.042 1 2.042 0.004 0.952 0.000
@@ -938,9 +948,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize="
Two-way ANOVA with unbalanced design (requires statsmodels)
- >>> data = pg.read_dataset('anova2_unbalanced')
- >>> data.anova(dv="Scores", between=["Diet", "Exercise"],
- ... effsize="n2").round(3)
+ >>> data = pg.read_dataset("anova2_unbalanced")
+ >>> data.anova(dv="Scores", between=["Diet", "Exercise"], effsize="n2").round(3)
Source SS DF MS F p_unc n2
0 Diet 390.625 1.0 390.625 7.423 0.034 0.433
1 Exercise 180.625 1.0 180.625 3.432 0.113 0.200
@@ -949,9 +958,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize="
Three-way ANOVA, type 3 sums of squares (requires statsmodels)
- >>> data = pg.read_dataset('anova3')
- >>> data.anova(dv='Cholesterol', between=['Sex', 'Risk', 'Drug'],
- ... ss_type=3).round(3)
+ >>> data = pg.read_dataset("anova3")
+ >>> data.anova(dv="Cholesterol", between=["Sex", "Risk", "Drug"], ss_type=3).round(3)
Source SS DF MS F p_unc np2
0 Sex 2.075 1.0 2.075 2.462 0.123 0.049
1 Risk 11.332 1.0 11.332 13.449 0.001 0.219
@@ -1322,8 +1330,8 @@ def welch_anova(data=None, dv=None, between=None):
1. One-way Welch ANOVA on the pain threshold dataset.
>>> from pingouin import welch_anova, read_dataset
- >>> df = read_dataset('anova')
- >>> aov = welch_anova(dv='Pain threshold', between='Hair color', data=df)
+ >>> df = read_dataset("anova")
+ >>> aov = welch_anova(dv="Pain threshold", between="Hair color", data=df)
>>> aov
Source ddof1 ddof2 F p_unc np2
0 Hair color 3 8.329841 5.890115 0.018813 0.575962
@@ -1446,9 +1454,8 @@ def mixed_anova(
Compute a two-way mixed model ANOVA.
>>> from pingouin import mixed_anova, read_dataset
- >>> df = read_dataset('mixed_anova')
- >>> aov = mixed_anova(dv='Scores', between='Group',
- ... within='Time', subject='Subject', data=df)
+ >>> df = read_dataset("mixed_anova")
+ >>> aov = mixed_anova(dv="Scores", between="Group", within="Time", subject="Subject", data=df)
>>> aov.round(3)
Source SS DF1 DF2 MS F p_unc np2 eps
0 Group 5.460 1 58 5.460 5.052 0.028 0.080 NaN
@@ -1459,8 +1466,9 @@ def mixed_anova(
can also apply this function directly as a method of the dataframe, in
which case we do not need to specify ``data=df`` anymore.
- >>> df.mixed_anova(dv='Scores', between='Group', within='Time',
- ... subject='Subject', effsize="ng2").round(3)
+ >>> df.mixed_anova(
+ ... dv="Scores", between="Group", within="Time", subject="Subject", effsize="ng2"
+ ... ).round(3)
Source SS DF1 DF2 MS F p_unc ng2 eps
0 Group 5.460 1 58 5.460 5.052 0.028 0.031 NaN
1 Time 7.628 2 116 3.814 4.027 0.020 0.042 0.999
@@ -1664,8 +1672,8 @@ def ancova(data=None, dv=None, between=None, covar=None, effsize="np2"):
and family income as a covariate.
>>> from pingouin import ancova, read_dataset
- >>> df = read_dataset('ancova')
- >>> ancova(data=df, dv='Scores', covar='Income', between='Method')
+ >>> df = read_dataset("ancova")
+ >>> ancova(data=df, dv="Scores", covar="Income", between="Method")
Source SS DF F p_unc np2
0 Method 571.029883 3 3.336482 0.031940 0.244077
1 Income 1678.352687 1 29.419438 0.000006 0.486920
@@ -1674,8 +1682,7 @@ def ancova(data=None, dv=None, between=None, covar=None, effsize="np2"):
2. Evaluate the reading scores of students with different teaching method
and family income + BMI as a covariate.
- >>> ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method',
- ... effsize="n2")
+ >>> ancova(data=df, dv="Scores", covar=["Income", "BMI"], between="Method", effsize="n2")
Source SS DF F p_unc n2
0 Method 552.284043 3 3.232550 0.036113 0.141802
1 Income 1573.952434 1 27.637304 0.000011 0.404121
diff --git a/src/pingouin/plotting.py b/src/pingouin/plotting.py
index 910d8763..f136d6d1 100644
--- a/src/pingouin/plotting.py
+++ b/src/pingouin/plotting.py
@@ -5,12 +5,12 @@
- Nicolas Legrand
"""
+import matplotlib.pyplot as plt
+import matplotlib.transforms as transforms
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
-import matplotlib.pyplot as plt
-import matplotlib.transforms as transforms
# Set default Seaborn preferences (disabled Pingouin >= 0.3.4)
# See https://github.com/raphaelvallat/pingouin/issues/85
@@ -109,7 +109,7 @@ def plot_blandaltman(
>>> import pingouin as pg
>>> df = pg.read_dataset("blandaltman")
- >>> ax = pg.plot_blandaltman(df['A'], df['B'])
+ >>> ax = pg.plot_blandaltman(df["A"], df["B"])
>>> plt.tight_layout()
"""
# Safety check
@@ -298,7 +298,7 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, **
>>> import pingouin as pg
>>> np.random.seed(123)
>>> x = np.random.normal(size=50)
- >>> ax = pg.qqplot(x, dist='norm')
+ >>> ax = pg.qqplot(x, dist="norm")
Two Q-Q plots using two separate axes:
@@ -311,8 +311,8 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, **
>>> x = np.random.normal(size=50)
>>> x_exp = np.random.exponential(size=50)
>>> fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
- >>> ax1 = pg.qqplot(x, dist='norm', ax=ax1, confidence=False)
- >>> ax2 = pg.qqplot(x_exp, dist='expon', ax=ax2)
+ >>> ax1 = pg.qqplot(x, dist="norm", ax=ax1, confidence=False)
+ >>> ax2 = pg.qqplot(x_exp, dist="expon", ax=ax2)
Using custom location / scale parameters as well as another Seaborn style
@@ -325,8 +325,8 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, **
>>> np.random.seed(123)
>>> x = np.random.normal(size=50)
>>> mean, std = 0, 0.8
- >>> sns.set_style('darkgrid')
- >>> ax = pg.qqplot(x, dist='norm', sparams=(mean, std))
+ >>> sns.set_style("darkgrid")
+ >>> ax = pg.qqplot(x, dist="norm", sparams=(mean, std))
"""
# Update default kwargs with specified inputs
_scatter_kwargs = {"marker": "o", "color": "blue"}
@@ -482,9 +482,9 @@ def plot_paired(
.. plot::
>>> import pingouin as pg
- >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'")
+ >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'")
>>> df = df.query("Group == 'Meditation' and Subject > 40")
- >>> ax = pg.plot_paired(data=df, dv='Scores', within='Time', subject='Subject')
+ >>> ax = pg.plot_paired(data=df, dv="Scores", within="Time", subject="Subject")
Paired plot on an existing axis (no boxplot and uniform color):
@@ -492,12 +492,18 @@ def plot_paired(
>>> import pingouin as pg
>>> import matplotlib.pyplot as plt
- >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'")
+ >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'")
>>> df = df.query("Group == 'Meditation' and Subject > 40")
>>> fig, ax1 = plt.subplots(1, 1, figsize=(5, 4))
- >>> pg.plot_paired(data=df, dv='Scores', within='Time',
- ... subject='Subject', ax=ax1, boxplot=False,
- ... colors=['grey', 'grey', 'grey']) # doctest: +SKIP
+ >>> pg.plot_paired(
+ ... data=df,
+ ... dv="Scores",
+ ... within="Time",
+ ... subject="Subject",
+ ... ax=ax1,
+ ... boxplot=False,
+ ... colors=["grey", "grey", "grey"],
+ ... ) # doctest: +SKIP
Horizontal paired plot with three unique within-levels:
@@ -505,20 +511,22 @@ def plot_paired(
>>> import pingouin as pg
>>> import matplotlib.pyplot as plt
- >>> df = pg.read_dataset('mixed_anova').query("Group == 'Meditation'")
+ >>> df = pg.read_dataset("mixed_anova").query("Group == 'Meditation'")
>>> # df = df.query("Group == 'Meditation' and Subject > 40")
- >>> pg.plot_paired(data=df, dv='Scores', within='Time',
- ... subject='Subject', orient='h') # doctest: +SKIP
+ >>> pg.plot_paired(
+ ... data=df, dv="Scores", within="Time", subject="Subject", orient="h"
+ ... ) # doctest: +SKIP
With the boxplot on the foreground:
.. plot::
>>> import pingouin as pg
- >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'")
+ >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'")
>>> df = df.query("Group == 'Control'")
- >>> ax = pg.plot_paired(data=df, dv='Scores', within='Time',
- ... subject='Subject', boxplot_in_front=True)
+ >>> ax = pg.plot_paired(
+ ... data=df, dv="Scores", within="Time", subject="Subject", boxplot_in_front=True
+ ... )
"""
from pingouin.utils import _check_dataframe
@@ -741,17 +749,24 @@ def plot_shift(
>>> import pingouin as pg
>>> import matplotlib.pyplot as plt
>>> data = pg.read_dataset("pairwise_corr")
- >>> fig = pg.plot_shift(data["Neuroticism"], data["Conscientiousness"], paired=True,
- ... n_boot=2000, percentiles=[25, 50, 75], show_median=False, seed=456,
- ... violin=False)
+ >>> fig = pg.plot_shift(
+ ... data["Neuroticism"],
+ ... data["Conscientiousness"],
+ ... paired=True,
+ ... n_boot=2000,
+ ... percentiles=[25, 50, 75],
+ ... show_median=False,
+ ... seed=456,
+ ... violin=False,
+ ... )
>>> fig.axes[0].set_xlabel("Groups")
>>> fig.axes[0].set_ylabel("Values", size=15)
>>> fig.axes[0].set_title("Comparing Neuroticism and Conscientiousness", size=15)
>>> fig.axes[1].set_xlabel("Neuroticism quantiles", size=12)
>>> plt.tight_layout()
"""
- from pingouin.regression import _bias_corrected_ci
from pingouin.nonparametric import harrelldavis as hd
+ from pingouin.regression import _bias_corrected_ci
# Safety check
x = np.asarray(x)
@@ -831,7 +846,11 @@ def adjacent_values(vals, q1, q3):
)
if violin:
- vl = plt.violinplot([y, x], showextrema=False, orientation="horizontal", widths=1)
+ import matplotlib as _mpl
+
+ _mpl_ver = tuple(int(v) for v in _mpl.__version__.split(".")[:2])
+ _orient_kw = {"orientation": "horizontal"} if _mpl_ver >= (3, 10) else {"vert": False}
+ vl = plt.violinplot([y, x], showextrema=False, widths=1, **_orient_kw)
# Upper plot
paths = vl["bodies"][0].get_paths()[0]
@@ -963,8 +982,8 @@ def plot_rm_corr(
.. plot::
>>> import pingouin as pg
- >>> df = pg.read_dataset('rm_corr')
- >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
+ >>> df = pg.read_dataset("rm_corr")
+ >>> g = pg.plot_rm_corr(data=df, x="pH", y="PacO2", subject="Subject")
With some tweakings
@@ -972,12 +991,16 @@ def plot_rm_corr(
>>> import pingouin as pg
>>> import seaborn as sns
- >>> df = pg.read_dataset('rm_corr')
- >>> sns.set_theme(style='darkgrid', font_scale=1.2)
- >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2',
- ... subject='Subject', legend=True,
- ... kwargs_facetgrid=dict(height=4.5, aspect=1.5,
- ... palette='Spectral'))
+ >>> df = pg.read_dataset("rm_corr")
+ >>> sns.set_theme(style="darkgrid", font_scale=1.2)
+ >>> g = pg.plot_rm_corr(
+ ... data=df,
+ ... x="pH",
+ ... y="PacO2",
+ ... subject="Subject",
+ ... legend=True,
+ ... kwargs_facetgrid=dict(height=4.5, aspect=1.5, palette="Spectral"),
+ ... )
"""
# Check that stasmodels is installed
from pingouin.utils import _is_statsmodels_installed
@@ -1080,20 +1103,25 @@ def plot_circmean(
>>> import pingouin as pg
>>> import matplotlib.pyplot as plt
>>> _, ax = plt.subplots(1, 1, figsize=(3, 3))
- >>> ax = pg.plot_circmean([0.05, -0.8, 1.2, 0.8, 0.5, -0.3, 0.3, 0.7],
- ... kwargs_markers=dict(color='k', mfc='k'),
- ... kwargs_arrow=dict(ec='k', fc='k'), ax=ax)
+ >>> ax = pg.plot_circmean(
+ ... [0.05, -0.8, 1.2, 0.8, 0.5, -0.3, 0.3, 0.7],
+ ... kwargs_markers=dict(color="k", mfc="k"),
+ ... kwargs_arrow=dict(ec="k", fc="k"),
+ ... ax=ax,
+ ... )
.. plot::
>>> import pingouin as pg
>>> import seaborn as sns
- >>> sns.set_theme(font_scale=1.5, style='white')
- >>> ax = pg.plot_circmean([0.8, 1.5, 3.14, 5.2, 6.1, 2.8, 2.6, 3.2],
- ... kwargs_markers=dict(marker="None"))
+ >>> sns.set_theme(font_scale=1.5, style="white")
+ >>> ax = pg.plot_circmean(
+ ... [0.8, 1.5, 3.14, 5.2, 6.1, 2.8, 2.6, 3.2], kwargs_markers=dict(marker="None")
+ ... )
"""
from matplotlib.patches import Circle
- from .circular import circ_r, circ_mean
+
+ from .circular import circ_mean, circ_r
# Sanity checks
angles = np.asarray(angles)
diff --git a/src/pingouin/power.py b/src/pingouin/power.py
index 2a74a2be..bc4e4fc1 100644
--- a/src/pingouin/power.py
+++ b/src/pingouin/power.py
@@ -1,6 +1,7 @@
# Author: Raphael Vallat
# Date: April 2018
import warnings
+
import numpy as np
from scipy import stats
from scipy.optimize import brenth
@@ -95,31 +96,31 @@ def power_ttest(
1. Compute power of a one-sample T-test given ``d``, ``n`` and ``alpha``
>>> from pingouin import power_ttest
- >>> print('power: %.4f' % power_ttest(d=0.5, n=20, contrast='one-sample'))
+ >>> print("power: %.4f" % power_ttest(d=0.5, n=20, contrast="one-sample"))
power: 0.5645
2. Compute required sample size given ``d``, ``power`` and ``alpha``
- >>> print('n: %.4f' % power_ttest(d=0.5, power=0.80, alternative='greater'))
+ >>> print("n: %.4f" % power_ttest(d=0.5, power=0.80, alternative="greater"))
n: 50.1508
3. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level
- >>> print('d: %.4f' % power_ttest(n=20, power=0.80, alpha=0.05, contrast='paired'))
+ >>> print("d: %.4f" % power_ttest(n=20, power=0.80, alpha=0.05, contrast="paired"))
d: 0.6604
4. Compute achieved alpha level given ``d``, ``n`` and ``power``
- >>> print('alpha: %.4f' % power_ttest(d=0.5, n=20, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_ttest(d=0.5, n=20, power=0.80, alpha=None))
alpha: 0.4430
5. One-sided tests
>>> from pingouin import power_ttest
- >>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='greater'))
+ >>> print("power: %.4f" % power_ttest(d=0.5, n=20, alternative="greater"))
power: 0.4634
- >>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='less'))
+ >>> print("power: %.4f" % power_ttest(d=0.5, n=20, alternative="less"))
power: 0.0007
"""
# Check the number of arguments that are None
@@ -278,17 +279,17 @@ def power_ttest2n(nx, ny, d=None, power=None, alpha=0.05, alternative="two-sided
1. Compute achieved power of a T-test given ``d``, ``n`` and ``alpha``
>>> from pingouin import power_ttest2n
- >>> print('power: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, alternative='greater'))
+ >>> print("power: %.4f" % power_ttest2n(nx=20, ny=15, d=0.5, alternative="greater"))
power: 0.4164
2. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level
- >>> print('d: %.4f' % power_ttest2n(nx=20, ny=15, power=0.80, alpha=0.05))
+ >>> print("d: %.4f" % power_ttest2n(nx=20, ny=15, power=0.80, alpha=0.05))
d: 0.9859
3. Compute achieved alpha level given ``d``, ``n`` and ``power``
- >>> print('alpha: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_ttest2n(nx=20, ny=15, d=0.5, power=0.80, alpha=None))
alpha: 0.5000
"""
# Check the number of arguments that are None
@@ -444,27 +445,27 @@ def power_anova(eta_squared=None, k=None, n=None, power=None, alpha=0.05):
1. Compute achieved power
>>> from pingouin import power_anova
- >>> print('power: %.4f' % power_anova(eta_squared=0.1, k=3, n=20))
+ >>> print("power: %.4f" % power_anova(eta_squared=0.1, k=3, n=20))
power: 0.6082
2. Compute required number of groups
- >>> print('k: %.4f' % power_anova(eta_squared=0.1, n=20, power=0.80))
+ >>> print("k: %.4f" % power_anova(eta_squared=0.1, n=20, power=0.80))
k: 6.0944
3. Compute required sample size
- >>> print('n: %.4f' % power_anova(eta_squared=0.1, k=3, power=0.80))
+ >>> print("n: %.4f" % power_anova(eta_squared=0.1, k=3, power=0.80))
n: 29.9256
4. Compute achieved effect size
- >>> print('eta-squared: %.4f' % power_anova(n=20, k=4, power=0.80, alpha=0.05))
+ >>> print("eta-squared: %.4f" % power_anova(n=20, k=4, power=0.80, alpha=0.05))
eta-squared: 0.1255
5. Compute achieved alpha (significance)
- >>> print('alpha: %.4f' % power_anova(eta_squared=0.1, n=20, k=4, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_anova(eta_squared=0.1, n=20, k=4, power=0.80, alpha=None))
alpha: 0.1085
"""
# Check the number of arguments that are None
@@ -613,27 +614,27 @@ def power_rm_anova(eta_squared=None, m=None, n=None, power=None, alpha=0.05, cor
1. Compute achieved power
>>> from pingouin import power_rm_anova
- >>> print('power: %.4f' % power_rm_anova(eta_squared=0.1, m=3, n=20))
+ >>> print("power: %.4f" % power_rm_anova(eta_squared=0.1, m=3, n=20))
power: 0.8913
2. Compute required number of groups
- >>> print('m: %.4f' % power_rm_anova(eta_squared=0.1, n=20, power=0.90))
+ >>> print("m: %.4f" % power_rm_anova(eta_squared=0.1, n=20, power=0.90))
m: 3.1347
3. Compute required sample size
- >>> print('n: %.4f' % power_rm_anova(eta_squared=0.1, m=3, power=0.80))
+ >>> print("n: %.4f" % power_rm_anova(eta_squared=0.1, m=3, power=0.80))
n: 15.9979
4. Compute achieved effect size
- >>> print('eta-squared: %.4f' % power_rm_anova(n=20, m=4, power=0.80, alpha=0.05))
+ >>> print("eta-squared: %.4f" % power_rm_anova(n=20, m=4, power=0.80, alpha=0.05))
eta-squared: 0.0680
5. Compute achieved alpha (significance)
- >>> print('alpha: %.4f' % power_rm_anova(eta_squared=0.1, n=20, m=4, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_rm_anova(eta_squared=0.1, n=20, m=4, power=0.80, alpha=None))
alpha: 0.0081
Let's take a more concrete example. First, we'll load a repeated measures
@@ -641,7 +642,7 @@ def power_rm_anova(eta_squared=None, m=None, n=None, power=None, alpha=0.05, cor
each column a successive repeated measurements (e.g t=0, t=1, ...).
>>> import pingouin as pg
- >>> data = pg.read_dataset('rm_anova_wide')
+ >>> data = pg.read_dataset("rm_anova_wide")
>>> data.head()
Before 1 week 2 week 3 week
0 4.3 5.3 4.8 6.3
@@ -810,30 +811,30 @@ def power_corr(r=None, n=None, power=None, alpha=0.05, alternative="two-sided"):
1. Compute achieved power given ``r``, ``n`` and ``alpha``
>>> from pingouin import power_corr
- >>> print('power: %.4f' % power_corr(r=0.5, n=20))
+ >>> print("power: %.4f" % power_corr(r=0.5, n=20))
power: 0.6379
2. Same but one-sided test
- >>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="greater"))
+ >>> print("power: %.4f" % power_corr(r=0.5, n=20, alternative="greater"))
power: 0.7510
- >>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="less"))
+ >>> print("power: %.4f" % power_corr(r=0.5, n=20, alternative="less"))
power: 0.0000
3. Compute required sample size given ``r``, ``power`` and ``alpha``
- >>> print('n: %.4f' % power_corr(r=0.5, power=0.80))
+ >>> print("n: %.4f" % power_corr(r=0.5, power=0.80))
n: 28.2484
4. Compute achieved ``r`` given ``n``, ``power`` and ``alpha`` level
- >>> print('r: %.4f' % power_corr(n=20, power=0.80, alpha=0.05))
+ >>> print("r: %.4f" % power_corr(n=20, power=0.80, alpha=0.05))
r: 0.5822
5. Compute achieved alpha level given ``r``, ``n`` and ``power``
- >>> print('alpha: %.4f' % power_corr(r=0.5, n=20, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_corr(r=0.5, n=20, power=0.80, alpha=None))
alpha: 0.1377
"""
# Check the number of arguments that are None
@@ -1001,22 +1002,22 @@ def power_chi2(dof, w=None, n=None, power=None, alpha=0.05):
1. Compute achieved power
>>> from pingouin import power_chi2
- >>> print('power: %.4f' % power_chi2(dof=1, w=0.3, n=20))
+ >>> print("power: %.4f" % power_chi2(dof=1, w=0.3, n=20))
power: 0.2687
2. Compute required sample size
- >>> print('n: %.4f' % power_chi2(dof=3, w=0.3, power=0.80))
+ >>> print("n: %.4f" % power_chi2(dof=3, w=0.3, power=0.80))
n: 121.1396
3. Compute achieved effect size
- >>> print('w: %.4f' % power_chi2(dof=2, n=20, power=0.80, alpha=0.05))
+ >>> print("w: %.4f" % power_chi2(dof=2, n=20, power=0.80, alpha=0.05))
w: 0.6941
4. Compute achieved alpha (significance)
- >>> print('alpha: %.4f' % power_chi2(dof=1, w=0.5, n=20, power=0.80, alpha=None))
+ >>> print("alpha: %.4f" % power_chi2(dof=1, w=0.5, n=20, power=0.80, alpha=None))
alpha: 0.1630
"""
assert isinstance(dof, (int, float))
diff --git a/src/pingouin/regression.py b/src/pingouin/regression.py
index 3b536f6b..23deef66 100644
--- a/src/pingouin/regression.py
+++ b/src/pingouin/regression.py
@@ -1,15 +1,16 @@
import itertools
import warnings
+
import numpy as np
import pandas as pd
import pandas_flavor as pf
-from scipy.stats import t, norm
-from scipy.linalg import pinvh, lstsq
+from scipy.linalg import lstsq, pinvh
+from scipy.stats import norm, t
from pingouin.config import options
-from pingouin.utils import remove_na as rm_na
from pingouin.utils import _flatten_list as _fl
from pingouin.utils import _postprocess_dataframe
+from pingouin.utils import remove_na as rm_na
__all__ = ["linear_regression", "logistic_regression", "mediation_analysis"]
@@ -695,13 +696,12 @@ def logistic_regression(
>>> import numpy as np
>>> import pandas as pd
>>> import pingouin as pg
- >>> df = pg.read_dataset('penguins')
+ >>> df = pg.read_dataset("penguins")
>>> # Let's first convert the target variable from string to boolean:
- >>> df['male'] = (df['sex'] == 'male').astype(int) # male: 1, female: 0
+ >>> df["male"] = (df["sex"] == "male").astype(int) # male: 1, female: 0
>>> # Since there are missing values in our outcome variable, we need to
>>> # set `remove_na=True` otherwise regression will fail.
- >>> lom = pg.logistic_regression(df['body_mass_g'], df['male'],
- ... remove_na=True)
+ >>> lom = pg.logistic_regression(df["body_mass_g"], df["male"], remove_na=True)
>>> lom.round(2)
names coef se z pval CI2.5 CI97.5
0 Intercept -5.16 0.71 -7.24 0.0 -6.56 -3.77
@@ -712,9 +712,8 @@ def logistic_regression(
(e.g divide by 1000) in order to get more intuitive coefficients and
confidence intervals:
- >>> df['body_mass_kg'] = df['body_mass_g'] / 1000
- >>> lom = pg.logistic_regression(df['body_mass_kg'], df['male'],
- ... remove_na=True)
+ >>> df["body_mass_kg"] = df["body_mass_g"] / 1000
+ >>> lom = pg.logistic_regression(df["body_mass_kg"], df["male"], remove_na=True)
>>> lom.round(2)
names coef se z pval CI2.5 CI97.5
0 Intercept -5.16 0.71 -7.24 0.0 -6.56 -3.77
@@ -727,9 +726,9 @@ def logistic_regression(
first level of our categorical variable (species = Adelie) which will be
used as the reference level:
- >>> df = pd.get_dummies(df, columns=['species'], dtype=float, drop_first=True)
- >>> X = df[['body_mass_kg', 'species_Chinstrap', 'species_Gentoo']]
- >>> y = df['male']
+ >>> df = pd.get_dummies(df, columns=["species"], dtype=float, drop_first=True)
+ >>> X = df[["body_mass_kg", "species_Chinstrap", "species_Gentoo"]]
+ >>> y = df["male"]
>>> lom = pg.logistic_regression(X, y, remove_na=True)
>>> lom.round(2)
names coef se z pval CI2.5 CI97.5
@@ -740,15 +739,15 @@ def logistic_regression(
3. Using NumPy aray and returning only the coefficients
- >>> pg.logistic_regression(X.to_numpy(), y.to_numpy(), coef_only=True,
- ... remove_na=True)
+ >>> pg.logistic_regression(X.to_numpy(), y.to_numpy(), coef_only=True, remove_na=True)
array([-26.23906892, 7.09826571, -0.13180626, -9.71718529])
4. Passing custom parameters to sklearn
- >>> lom = pg.logistic_regression(X, y, solver='sag', max_iter=10000,
- ... random_state=42, remove_na=True)
- >>> print(lom['coef'].to_numpy())
+ >>> lom = pg.logistic_regression(
+ ... X, y, solver="sag", max_iter=10000, random_state=42, remove_na=True
+ ... )
+ >>> print(lom["coef"].to_numpy())
[-25.98248153 7.02881472 -0.13119779 -9.62247569]
**How to interpret the log-odds coefficients?**
@@ -763,12 +762,32 @@ def logistic_regression(
probability of the student passing the exam?*
>>> # First, let's create the dataframe
- >>> Hours = [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50,
- ... 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50]
+ >>> Hours = [
+ ... 0.50,
+ ... 0.75,
+ ... 1.00,
+ ... 1.25,
+ ... 1.50,
+ ... 1.75,
+ ... 1.75,
+ ... 2.00,
+ ... 2.25,
+ ... 2.50,
+ ... 2.75,
+ ... 3.00,
+ ... 3.25,
+ ... 3.50,
+ ... 4.00,
+ ... 4.25,
+ ... 4.50,
+ ... 4.75,
+ ... 5.00,
+ ... 5.50,
+ ... ]
>>> Pass = [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]
- >>> df = pd.DataFrame({'HoursStudy': Hours, 'PassExam': Pass})
+ >>> df = pd.DataFrame({"HoursStudy": Hours, "PassExam": Pass})
>>> # And then run the logistic regression
- >>> lr = pg.logistic_regression(df['HoursStudy'], df['PassExam']).round(3)
+ >>> lr = pg.logistic_regression(df["HoursStudy"], df["PassExam"]).round(3)
>>> lr
names coef se z pval CI2.5 CI97.5
0 Intercept -4.078 1.761 -2.316 0.021 -7.529 -0.626
@@ -1147,9 +1166,8 @@ def mediation_analysis(
1. Simple mediation analysis
>>> from pingouin import mediation_analysis, read_dataset
- >>> df = read_dataset('mediation')
- >>> mediation_analysis(data=df, x='X', m='M', y='Y', alpha=0.05,
- ... seed=42)
+ >>> df = read_dataset("mediation")
+ >>> mediation_analysis(data=df, x="X", m="M", y="Y", alpha=0.05, seed=42)
path coef se pval CI2.5 CI97.5 sig
0 M ~ X 0.561015 0.094480 4.391362e-08 0.373522 0.748509 Yes
1 Y ~ M 0.654173 0.085831 1.612674e-11 0.483844 0.824501 Yes
@@ -1159,14 +1177,13 @@ def mediation_analysis(
2. Return the indirect bootstrapped beta coefficients
- >>> stats, dist = mediation_analysis(data=df, x='X', m='M', y='Y',
- ... return_dist=True)
+ >>> stats, dist = mediation_analysis(data=df, x="X", m="M", y="Y", return_dist=True)
>>> print(dist.shape)
(500,)
3. Mediation analysis with a binary mediator variable
- >>> mediation_analysis(data=df, x='X', m='Mbin', y='Y', seed=42).round(3)
+ >>> mediation_analysis(data=df, x="X", m="Mbin", y="Y", seed=42).round(3)
path coef se pval CI2.5 CI97.5 sig
0 Mbin ~ X -0.021 0.116 0.857 -0.248 0.206 No
1 Y ~ Mbin -0.135 0.412 0.743 -0.952 0.682 No
@@ -1176,8 +1193,7 @@ def mediation_analysis(
4. Mediation analysis with covariates
- >>> mediation_analysis(data=df, x='X', m='M', y='Y',
- ... covar=['Mbin', 'Ybin'], seed=42).round(3)
+ >>> mediation_analysis(data=df, x="X", m="M", y="Y", covar=["Mbin", "Ybin"], seed=42).round(3)
path coef se pval CI2.5 CI97.5 sig
0 M ~ X 0.559 0.097 0.000 0.367 0.752 Yes
1 Y ~ M 0.666 0.086 0.000 0.495 0.837 Yes
@@ -1187,8 +1203,7 @@ def mediation_analysis(
5. Mediation analysis with multiple parallel mediators
- >>> mediation_analysis(data=df, x='X', m=['M', 'Mbin'], y='Y',
- ... seed=42).round(3)
+ >>> mediation_analysis(data=df, x="X", m=["M", "Mbin"], y="Y", seed=42).round(3)
path coef se pval CI2.5 CI97.5 sig
0 M ~ X 0.561 0.094 0.000 0.374 0.749 Yes
1 Mbin ~ X -0.005 0.029 0.859 -0.063 0.052 No
diff --git a/src/pingouin/reliability.py b/src/pingouin/reliability.py
index 33614e88..fc93ffc3 100644
--- a/src/pingouin/reliability.py
+++ b/src/pingouin/reliability.py
@@ -1,10 +1,10 @@
import numpy as np
import pandas as pd
from scipy.stats import f
+
from pingouin.config import options
from pingouin.utils import _postprocess_dataframe
-
__all__ = ["cronbach_alpha", "intraclass_corr"]
@@ -98,7 +98,7 @@ def cronbach_alpha(
Binary wide-format dataframe (with missing values)
>>> import pingouin as pg
- >>> data = pg.read_dataset('cronbach_wide_missing')
+ >>> data = pg.read_dataset("cronbach_wide_missing")
>>> # In R: psych:alpha(data, use="pairwise")
>>> pg.cronbach_alpha(data=data)
(0.732660835214447, array([0.435, 0.909]))
@@ -106,7 +106,7 @@ def cronbach_alpha(
After listwise deletion of missing values (remove the entire rows)
>>> # In R: psych:alpha(data, use="complete.obs")
- >>> pg.cronbach_alpha(data=data, nan_policy='listwise')
+ >>> pg.cronbach_alpha(data=data, nan_policy="listwise")
(0.8016949152542373, array([0.581, 0.933]))
After imputing the missing values with the median of each column
@@ -116,9 +116,8 @@ def cronbach_alpha(
Likert-type long-format dataframe
- >>> data = pg.read_dataset('cronbach_alpha')
- >>> pg.cronbach_alpha(data=data, items='Items', scores='Scores',
- ... subject='Subj')
+ >>> data = pg.read_dataset("cronbach_alpha")
+ >>> pg.cronbach_alpha(data=data, items="Items", scores="Scores", subject="Subj")
(0.5917188485995826, array([0.195, 0.84 ]))
"""
# Safety check
@@ -243,9 +242,10 @@ def intraclass_corr(data=None, targets=None, raters=None, ratings=None, nan_poli
ICCs of wine quality assessed by 4 judges.
>>> import pingouin as pg
- >>> data = pg.read_dataset('icc')
- >>> icc = pg.intraclass_corr(data=data, targets='Wine', raters='Judge',
- ... ratings='Scores').round(3)
+ >>> data = pg.read_dataset("icc")
+ >>> icc = pg.intraclass_corr(data=data, targets="Wine", raters="Judge", ratings="Scores").round(
+ ... 3
+ ... )
>>> icc.set_index("Type")
Description ICC F df1 df2 pval CI95
Type
diff --git a/src/pingouin/utils.py b/src/pingouin/utils.py
index 95bd80d3..632f521c 100644
--- a/src/pingouin/utils.py
+++ b/src/pingouin/utils.py
@@ -1,11 +1,13 @@
"""Helper functions."""
+import collections.abc
+import itertools as it
import numbers
+
import numpy as np
import pandas as pd
-import itertools as it
-import collections.abc
from tabulate import tabulate
+
from .config import options
__all__ = [
@@ -281,15 +283,15 @@ def _flatten_list(x, include_tuple=False):
Examples
--------
>>> from pingouin.utils import _flatten_list
- >>> x = ['X1', ['M1', 'M2'], 'Y1', ['Y2']]
+ >>> x = ["X1", ["M1", "M2"], "Y1", ["Y2"]]
>>> _flatten_list(x)
['X1', 'M1', 'M2', 'Y1', 'Y2']
- >>> x = ['Xaa', 'Xbb', 'Xcc']
+ >>> x = ["Xaa", "Xbb", "Xcc"]
>>> _flatten_list(x)
['Xaa', 'Xbb', 'Xcc']
- >>> x = ['Xaa', ('Xbb', 'Xcc'), (1, 2), (1)]
+ >>> x = ["Xaa", ("Xbb", "Xcc"), (1, 2), (1)]
>>> _flatten_list(x)
['Xaa', ('Xbb', 'Xcc'), (1, 2), 1]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..290cc21f
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,3 @@
+import matplotlib
+
+matplotlib.use("Agg")
diff --git a/tests/test_bayesian.py b/tests/test_bayesian.py
index 030b8ba4..5e6760eb 100644
--- a/tests/test_bayesian.py
+++ b/tests/test_bayesian.py
@@ -1,11 +1,12 @@
-import numpy as np
from unittest import TestCase
-from scipy.stats import pearsonr
-from pingouin.parametric import ttest
-from pingouin.bayesian import bayesfactor_ttest, bayesfactor_binom
-from pingouin.bayesian import bayesfactor_pearson as bfp
+import numpy as np
from pytest import approx
+from scipy.stats import pearsonr
+
+from pingouin.bayesian import bayesfactor_binom, bayesfactor_ttest
+from pingouin.bayesian import bayesfactor_pearson as bfp
+from pingouin.parametric import ttest
np.random.seed(1234)
x = np.random.normal(size=100)
diff --git a/tests/test_circular.py b/tests/test_circular.py
index daa437a2..8dc061a0 100644
--- a/tests/test_circular.py
+++ b/tests/test_circular.py
@@ -1,10 +1,12 @@
-import pytest
-import numpy as np
from unittest import TestCase
+
+import numpy as np
+import pytest
from scipy.stats import circmean
+
from pingouin import read_dataset
-from pingouin.circular import convert_angles, _checkangles
from pingouin.circular import (
+ _checkangles,
circ_axial,
circ_corrcc,
circ_corrcl,
@@ -12,6 +14,7 @@
circ_r,
circ_rayleigh,
circ_vtest,
+ convert_angles,
)
np.random.seed(123)
diff --git a/tests/test_config.py b/tests/test_config.py
index a585ce33..c833e254 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,5 +1,6 @@
-import pingouin
from unittest import TestCase
+
+import pingouin
from pingouin.config import set_default_options
expected_default_options = pingouin.options.copy()
diff --git a/tests/test_contingency.py b/tests/test_contingency.py
index 09fde40e..85c95c3f 100644
--- a/tests/test_contingency.py
+++ b/tests/test_contingency.py
@@ -1,10 +1,12 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-import pingouin as pg
-from unittest import TestCase
+import pytest
from scipy.stats import chi2_contingency
+import pingouin as pg
+
df_ind = pg.read_dataset("chi2_independence")
df_mcnemar = pg.read_dataset("chi2_mcnemar")
diff --git a/tests/test_correlation.py b/tests/test_correlation.py
index 838ff5af..59080009 100644
--- a/tests/test_correlation.py
+++ b/tests/test_correlation.py
@@ -1,8 +1,10 @@
-import pytest
-import numpy as np
from unittest import TestCase
-from pingouin.correlation import corr, rm_corr, partial_corr, skipped, distance_corr, bicor
+
+import numpy as np
+import pytest
+
from pingouin import read_dataset
+from pingouin.correlation import bicor, corr, distance_corr, partial_corr, rm_corr, skipped
class TestCorrelation(TestCase):
diff --git a/tests/test_distribution.py b/tests/test_distribution.py
index 6706f579..58287ed8 100644
--- a/tests/test_distribution.py
+++ b/tests/test_distribution.py
@@ -1,16 +1,18 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-from unittest import TestCase
+import pytest
+
+from pingouin import read_dataset
from pingouin.distribution import (
- gzscore,
- normality,
anderson,
epsilon,
+ gzscore,
homoscedasticity,
+ normality,
sphericity,
)
-from pingouin import read_dataset
# Generate random dataframe
df = read_dataset("mixed_anova.csv")
diff --git a/tests/test_effsize.py b/tests/test_effsize.py
index 38742b30..43bef5d0 100644
--- a/tests/test_effsize.py
+++ b/tests/test_effsize.py
@@ -1,10 +1,11 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-from unittest import TestCase
+import pytest
from scipy.stats import pearsonr, pointbiserialr
-from pingouin.effsize import compute_esci, compute_effsize, compute_effsize_from_t, compute_bootci
+from pingouin.effsize import compute_bootci, compute_effsize, compute_effsize_from_t, compute_esci
from pingouin.effsize import convert_effsize as cef
# Dataset
diff --git a/tests/test_equivalence.py b/tests/test_equivalence.py
index 6272ccb5..59ddcbbe 100644
--- a/tests/test_equivalence.py
+++ b/tests/test_equivalence.py
@@ -1,7 +1,9 @@
# Author: Antoine Weill--Duflos
# Date July 2019
-import numpy as np
from unittest import TestCase
+
+import numpy as np
+
from pingouin.equivalence import tost
diff --git a/tests/test_multicomp.py b/tests/test_multicomp.py
index 3efa1018..db563228 100644
--- a/tests/test_multicomp.py
+++ b/tests/test_multicomp.py
@@ -1,8 +1,10 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
+import pytest
from numpy.testing import assert_array_almost_equal, assert_array_equal
-from unittest import TestCase
-from pingouin.multicomp import fdr, bonf, holm, sidak, multicomp
+
+from pingouin.multicomp import bonf, fdr, holm, multicomp, sidak
pvals = [0.52, 0.12, 0.0001, 0.03, 0.14]
pvals2 = [0.52, 0.12, 0.10, 0.30, 0.14]
diff --git a/tests/test_multivariate.py b/tests/test_multivariate.py
index aaa5ac73..f6ec0c42 100644
--- a/tests/test_multivariate.py
+++ b/tests/test_multivariate.py
@@ -1,9 +1,11 @@
+from unittest import TestCase
+
import numpy as np
import pandas as pd
from sklearn import datasets
-from unittest import TestCase
+
from pingouin import read_dataset
-from pingouin.multivariate import multivariate_normality, multivariate_ttest, box_m
+from pingouin.multivariate import box_m, multivariate_normality, multivariate_ttest
data = read_dataset("multivariate")
dvs = ["Fever", "Pressure", "Aches"]
diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py
index de4da3cc..6e1d2ba1 100644
--- a/tests/test_nonparametric.py
+++ b/tests/test_nonparametric.py
@@ -1,17 +1,19 @@
-import pytest
-import scipy
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-from unittest import TestCase
+import pytest
+import scipy
+
from pingouin.nonparametric import (
+ cochran,
+ friedman,
+ harrelldavis,
+ kruskal,
mad,
madmedianrule,
mwu,
wilcoxon,
- kruskal,
- friedman,
- cochran,
- harrelldavis,
)
np.random.seed(1234)
diff --git a/tests/test_pairwise.py b/tests/test_pairwise.py
index c795f319..8c89196b 100644
--- a/tests/test_pairwise.py
+++ b/tests/test_pairwise.py
@@ -1,14 +1,16 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-from unittest import TestCase
+import pytest
+
from pingouin import read_dataset
from pingouin.pairwise import (
- pairwise_ttests,
- pairwise_tests,
pairwise_corr,
- pairwise_tukey,
pairwise_gameshowell,
+ pairwise_tests,
+ pairwise_ttests,
+ pairwise_tukey,
)
@@ -485,6 +487,7 @@ def test_pairwise_tests(self):
def test_ptests(self):
"""Test function ptests."""
from itertools import combinations
+
from scipy.stats import ttest_ind, ttest_rel
# Load BFI dataset
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index fce8b5aa..e1141446 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -5,9 +5,11 @@
- Raphael Vallat
"""
+from unittest import TestCase
+
import numpy as np
+
import pingouin as pg
-from unittest import TestCase
df = pg.read_dataset("mixed_anova")
df_aov3 = pg.read_dataset("anova3_unbalanced")
diff --git a/tests/test_parametric.py b/tests/test_parametric.py
index 41baa884..96ddb43f 100644
--- a/tests/test_parametric.py
+++ b/tests/test_parametric.py
@@ -1,11 +1,11 @@
-import pytest
-import numpy as np
from unittest import TestCase
+
+import numpy as np
+import pytest
from numpy.testing import assert_array_equal as array_equal
from pingouin import read_dataset
-from pingouin.parametric import ttest, anova, rm_anova, mixed_anova, ancova, welch_anova
-
+from pingouin.parametric import ancova, anova, mixed_anova, rm_anova, ttest, welch_anova
# Generate random data for ANOVA
df = read_dataset("mixed_anova.csv")
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 064f6830..7a3bc53c 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -1,19 +1,21 @@
-import pytest
+from unittest import TestCase
+
import matplotlib
+import matplotlib.pyplot as plt
import numpy as np
-from scipy import stats
+import pytest
import seaborn as sns
-import matplotlib.pyplot as plt
-from unittest import TestCase
+from scipy import stats
+
from pingouin import read_dataset
from pingouin.plotting import (
- plot_blandaltman,
_ppoints,
- qqplot,
+ plot_blandaltman,
+ plot_circmean,
plot_paired,
- plot_shift,
plot_rm_corr,
- plot_circmean,
+ plot_shift,
+ qqplot,
)
# Disable open figure warning
diff --git a/tests/test_power.py b/tests/test_power.py
index f16fbe31..d4bdea0b 100644
--- a/tests/test_power.py
+++ b/tests/test_power.py
@@ -1,13 +1,15 @@
-import pytest
-import numpy as np
from unittest import TestCase
+
+import numpy as np
+import pytest
+
from pingouin.power import (
- power_ttest,
- power_ttest2n,
power_anova,
- power_rm_anova,
- power_corr,
power_chi2,
+ power_corr,
+ power_rm_anova,
+ power_ttest,
+ power_ttest2n,
)
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 56f853d8..2c1cbd1d 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -1,21 +1,20 @@
-import pytest
-import numpy as np
-import pandas as pd
from unittest import TestCase
-from scipy.stats import linregress, zscore
-from sklearn.linear_model import LinearRegression
+import numpy as np
+import pandas as pd
+import pytest
import statsmodels.api as sm
-
-from pandas.testing import assert_frame_equal
from numpy.testing import assert_almost_equal, assert_equal
+from pandas.testing import assert_frame_equal
+from scipy.stats import linregress, zscore
+from sklearn.linear_model import LinearRegression
from pingouin import read_dataset
from pingouin.regression import (
+ _pval_from_bootci,
linear_regression,
logistic_regression,
mediation_analysis,
- _pval_from_bootci,
)
# 1st dataset: mediation
@@ -263,7 +262,7 @@ def test_logistic_regression(self):
# summary(glm(Ybin ~ X, data=df, family=binomial))
assert_equal(np.round(lom["coef"], 3), [1.319, -0.199])
assert_equal(np.round(lom["se"], 3), [0.758, 0.121])
- assert_equal(np.round(lom["z"], 3), [1.74, -1.647])
+ assert_almost_equal(lom["z"], [1.74, -1.647], decimal=2)
assert_equal(np.round(lom["pval"], 3), [0.082, 0.099])
assert_equal(np.round(lom["CI2.5"], 3), [-0.167, -0.437])
assert_equal(np.round(lom["CI97.5"], 3), [2.805, 0.038])
@@ -276,7 +275,7 @@ def test_logistic_regression(self):
# summary(glm(Ybin ~ X+M, data=df, family=binomial))
assert_equal(lom["coef"].to_numpy(), [1.327, -0.196, -0.006])
assert_equal(lom["se"].to_numpy(), [0.778, 0.141, 0.125])
- assert_equal(lom["z"].to_numpy(), [1.705, -1.392, -0.048])
+ assert_almost_equal(lom["z"], [1.705, -1.392, -0.048], decimal=2)
assert_equal(lom["pval"].to_numpy(), [0.088, 0.164, 0.962])
assert_equal(lom["CI2.5"].to_numpy(), [-0.198, -0.472, -0.252])
assert_equal(lom["CI97.5"].to_numpy(), [2.853, 0.08, 0.24])
diff --git a/tests/test_reliability.py b/tests/test_reliability.py
index 291ef02b..51c0b68e 100644
--- a/tests/test_reliability.py
+++ b/tests/test_reliability.py
@@ -1,9 +1,11 @@
-import pytest
+from unittest import TestCase
+
import numpy as np
import pandas as pd
-from unittest import TestCase
-from pingouin.reliability import cronbach_alpha, intraclass_corr
+import pytest
+
from pingouin import read_dataset
+from pingouin.reliability import cronbach_alpha, intraclass_corr
class TestReliability(TestCase):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index afa44829..4072841c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,23 +1,23 @@
-import pandas as pd
+from unittest import TestCase
+
import numpy as np
+import pandas as pd
import pytest
import pingouin
-
-from unittest import TestCase
from pingouin.utils import (
- print_table,
- _postprocess_dataframe,
- _get_round_setting_for,
- _perm_pval,
- _check_eftype,
_check_dataframe,
- remove_na,
+ _check_eftype,
_flatten_list,
+ _get_round_setting_for,
+ _is_mpmath_installed,
_is_sklearn_installed,
_is_sklearn_version_compatible,
_is_statsmodels_installed,
- _is_mpmath_installed,
+ _perm_pval,
+ _postprocess_dataframe,
+ print_table,
+ remove_na,
)
# Dataset