diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 00000000..9b67ba73 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,38 @@ +name: Coverage +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + coverage: + runs-on: ubuntu-latest + env: + FORCE_COLOR: true + UV_SYSTEM_PYTHON: 1 + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version-file: "pyproject.toml" + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + + - name: Install dependencies + run: | + uv pip install --group=test . "scikit-learn<1.8.0" + + - name: Run tests with coverage + run: | + pytest --cov --cov-report=xml --verbose + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./coverage.xml + fail_ci_if_error: true diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 00000000..3a1e33e1 --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,38 @@ +name: Documentation +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + docs: + runs-on: ubuntu-latest + env: + FORCE_COLOR: true + UV_SYSTEM_PYTHON: 1 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version-file: "pyproject.toml" + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + + - name: Install dependencies + run: | + uv pip install --group=docs --editable . + + - name: Build documentation + run: | + make -C docs clean + make -C docs html + + - name: Upload documentation artifacts + uses: actions/upload-artifact@v6 + with: + name: docs-artifact + path: docs/build/html diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 00000000..1e756904 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,119 @@ +name: PyTest + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + FORCE_COLOR: 1 + UV_SYSTEM_PYTHON: 1 + +jobs: + test-core: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.10", "3.12", "3.14"] + runs-on: ${{ matrix.platform }} + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + + - name: Install dependencies + run: | + uv pip install --group=test . "scikit-learn<1.8.0" + + - name: Test with pytest + run: uv run pytest --verbose + + # Test against different dependency versions + test-dependency-combinations: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, windows-latest] + deps: + # Minimal supported versions and increasing gradually + - python-version: "3.10" + scipy: "scipy==1.8.0" + numpy: "numpy==1.22.4" + pandas: "pandas==2.1.1" + statsmodels: "statsmodels==0.14.1" + scikit-learn: "scikit-learn==1.2.2" + label: "really-old-versions" + + - python-version: "3.11" + scipy: "scipy==1.9.3" + numpy: "numpy==1.25.2" + pandas: "pandas==2.2.0" + statsmodels: "statsmodels==0.14.3" + scikit-learn: "scikit-learn==1.4.2" + label: "old-versions" + + - python-version: "3.12" + scipy: "scipy==1.14.1" + numpy: "numpy==2.1.3" + pandas: "pandas==2.3.3" + statsmodels: "statsmodels==0.14.4" + scikit-learn: "scikit-learn==1.5.2" + label: "mid-versions" + + - python-version: "3.13" + scipy: "scipy==1.17.0" + numpy: "numpy==2.4.0" + pandas: "pandas==3.0.0" + statsmodels: "statsmodels==0.14.6" + scikit-learn: "scikit-learn==1.7.2" + label: "recent-versions" + + - python-version: "3.14" + scipy: "scipy" + numpy: "numpy" + pandas: "pandas" + statsmodels: "statsmodels" + scikit-learn: "scikit-learn==1.7.2" + label: "latest-versions" + + runs-on: ${{ matrix.platform }} + steps: + - uses: actions/checkout@v6 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.deps.python-version }} + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + + - name: Install numpy (${{ matrix.deps.label }}) + run: uv pip install "${{ matrix.deps.numpy }}" + + - name: Install dependencies (${{ matrix.deps.label }}) + run: >- + uv pip install --group=test . + "${{ matrix.deps.scipy }}" + "${{ matrix.deps.pandas }}" + "${{ matrix.deps.statsmodels }}" + "${{ matrix.deps.scikit-learn }}" + + - name: Display installed versions + run: | + python -c "import scipy; print(f'scipy: {scipy.__version__}')" + python -c "import numpy; print(f'numpy: {numpy.__version__}')" + python -c "import pandas; print(f'pandas: {pandas.__version__}')" + python -c "import statsmodels; print(f'statsmodels: {statsmodels.__version__}')" + python -c "import sklearn; print(f'scikit-learn: {sklearn.__version__}')" + + - name: Test with pytest + run: pytest --verbose diff --git a/.github/workflows/pytest_prerelease.yml b/.github/workflows/pytest_prerelease.yml new file mode 100644 index 00000000..b1093f6a --- /dev/null +++ b/.github/workflows/pytest_prerelease.yml @@ -0,0 +1,72 @@ +name: PyTest Pre-Release + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: '0 3 * * SUN' + workflow_dispatch: + +env: + FORCE_COLOR: 1 + UV_SYSTEM_PYTHON: 1 + +jobs: + test-prerelease: + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, windows-latest] + python-version: ["3.10", "3.12", "3.14"] + runs-on: ${{ matrix.platform }} + continue-on-error: true + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + + - name: Install dependencies + run: | + uv pip install --group=test . + + - name: Install scipy prerelease + run: | + uv pip uninstall scipy + uv pip install -U --pre scipy + + - name: Install numpy prerelease + run: | + uv pip uninstall numpy + uv pip install -U --pre numpy + + - name: Install pandas prerelease + run: | + uv pip uninstall pandas + uv pip install -U --pre pandas + + - name: Install statsmodels prerelease + run: | + uv pip uninstall statsmodels + uv pip install -U --pre statsmodels + + - name: Install scikit-learn prerelease + run: | + uv pip uninstall scikit-learn + uv pip install -U --pre scikit-learn + + - name: Install seaborn prerelease + run: | + uv pip uninstall seaborn + uv pip install -U --pre seaborn + + - name: Test with pytest + run: uv run pytest --verbose \ No newline at end of file diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml deleted file mode 100644 index eba3c3ec..00000000 --- a/.github/workflows/python_tests.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Python tests - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - build: - strategy: - fail-fast: false - matrix: - platform: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] - - runs-on: ${{ matrix.platform }} - - env: - FORCE_COLOR: true - - steps: - - uses: actions/checkout@v6 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install .[test] "scikit-learn<1.8.0" - - - name: Test with pytest - run: | - pytest --cov --cov-report=xml --verbose - - - name: Build docs - if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }} - run: | - pip install .[docs] - make -C docs clean - make -C docs html - - - name: Upload doc build artifacts - if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }} - uses: actions/upload-artifact@v4 - with: - name: docs-artifact - path: docs/build/html - - - name: Upload coverage report - if: ${{ matrix.platform == 'ubuntu-latest' && matrix.python-version == 3.10 }} - uses: codecov/codecov-action@v4 - with: - token: c6ed6ca6-a040-4f23-9ebf-8c474c998097 - file: ./coverage.xml diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 7823a768..bb28efa5 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -4,10 +4,9 @@ jobs: ruff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + - uses: astral-sh/ruff-action@v3 - name: "Linting" - uses: astral-sh/ruff-action@v3 + run: ruff check - name: "Formatting" - uses: astral-sh/ruff-action@v3 - with: - args: "format --check" + run: ruff format --check diff --git a/README.rst b/README.rst index 5a246f4f..375bd9b8 100644 --- a/README.rst +++ b/README.rst @@ -76,22 +76,23 @@ Installation Dependencies ------------ -The main dependencies of Pingouin are : +The main dependencies of Pingouin are: -* `NumPy `_ -* `SciPy `_ -* `Pandas `_ +* `NumPy `_ >= 1.22.4 +* `SciPy `_ >= 1.8.0 +* `Pandas `_ >= 2.1.1 * `Pandas-flavor `_ -* `Statsmodels `_ +* `Statsmodels `_ >= 0.14.1 * `Matplotlib `_ * `Seaborn `_ +* `Scikit-learn `_ >= 1.2.2 +* `Tabulate `_ -In addition, some functions require : +Some functions additionally require: -* `Scikit-learn `_ * `Mpmath `_ -Pingouin is a Python 3 package and is currently tested for Python 3.8-3.11. +Pingouin is a Python 3 package and is currently tested for Python 3.10+. User installation ----------------- @@ -122,10 +123,18 @@ To build and install from source, clone this repository or download the source a .. code-block:: shell cd pingouin - python -m build # optional, build a wheel and sdist - pip install . # install the package - pip install --editable . # or editable install - pytest # test the package + + # optional, build a wheel and sdist + python -m build + + # install the package + pip install . + + # or editable install with dev dependencies + pip install --group test --group docs --editable . + + # test the package + pytest Quick start ============ diff --git a/docs/conf.py b/docs/conf.py index 7868ed58..2b9bfca9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,11 +7,20 @@ # -- Path setup -------------------------------------------------------------- +import inspect import os import sys import time +from pathlib import Path + import pingouin +# Configure for source links +GITHUB_USER = "raphaelvallat" +GITHUB_REPO = "pingouin" +GITHUB_BRANCH = "main" +REPO_ROOT = Path(__file__).resolve().parents[1] + sys.path.insert(0, os.path.abspath("sphinxext")) @@ -36,7 +45,7 @@ extensions = [ "sphinx.ext.mathjax", "sphinx.ext.doctest", - "sphinx.ext.viewcode", + "sphinx.ext.linkcode", "sphinx.ext.githubpages", "sphinx.ext.autosummary", "sphinx.ext.autodoc", @@ -108,7 +117,7 @@ "icon": "fa-brands fa-github", }, ], - "use_edit_page_button": True, + "use_edit_page_button": False, "pygments_light_style": "vs", "pygments_dark_style": "monokai", } @@ -122,12 +131,50 @@ "index": [], } -html_context = { - "github_user": "raphaelvallat", - "github_repo": "pingouin", - "github_version": "main", - "doc_path": "docs", -} +# -- Linkcode ------------------------------------------------ + + +def linkcode_resolve(domain, info): + """ + Resolve source code links to GitHub for Python objects. + + Returns a GitHub URL including line number references when available. + """ + if domain != "py": + return None + + module_name = info.get("module") + full_name = info.get("fullname") + + if not module_name or not full_name: + return None + + module = sys.modules.get(module_name) + if module is None: + return None + + # Resolve the object + obj = module + for part in full_name.split("."): + try: + obj = inspect.getattr_static(obj, part) + except AttributeError: + return None + + # Unwrap decorators (important for @wraps, dataclasses, etc.) + obj = inspect.unwrap(obj) + source_file = inspect.getsourcefile(obj) or inspect.getfile(obj) + source_lines, start_line = inspect.getsourcelines(obj) + source_path = Path(source_file).resolve() + relative_path = source_path.relative_to(REPO_ROOT) + + end_line = start_line + len(source_lines) - 1 + + return ( + f"https://github.com/{GITHUB_USER}/{GITHUB_REPO}" + f"/blob/{GITHUB_BRANCH}/{relative_path.as_posix()}" + f"#L{start_line}-L{end_line}" + ) # -- Intersphinx ------------------------------------------------ diff --git a/docs/contributing.rst b/docs/contributing.rst index 83081e4c..1b15772e 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -12,11 +12,13 @@ Code guidelines *Before starting new code*, we highly recommend opening an issue on `GitHub `_ to discuss potential changes. -* Please use standard `pep8 `_ and `flake8 `_ Python style guidelines. Pingouin uses `ruff `_ for code formatting. Before submitting a PR, please make sure to run the following command in the root folder of Pingouin: +* Please follow `PEP 8 `_ Python style guidelines. Pingouin uses `Ruff `_ for linting and formatting. Before submitting a PR, please run the following commands from the root folder of Pingouin to sort imports and format code: .. code-block:: bash - $ ruff format --line-length=100 + $ ruff check --select I --fix + + $ ruff format * Use `NumPy style `_ for docstrings. Follow existing examples for simplest guidance. @@ -30,28 +32,56 @@ Code guidelines .. code-block:: bash - $ pytest --doctest-modules + $ pytest --verbose + +Setting up a development environment +------------------------------------- + +Pingouin uses `uv `_ for fast dependency management. To set up a local development environment, first clone the repository and then install the package in editable mode with the test dependencies: + +.. code-block:: bash + + $ git clone https://github.com/raphaelvallat/pingouin.git + $ cd pingouin + $ uv pip install --group=test --editable . + +To also install the development tools (Ruff), add the ``dev`` group: + +.. code-block:: bash + + $ uv pip install --group=dev --group=test --editable . + +Continuous Integration +----------------------- + +Pingouin uses `GitHub Actions `_ for continuous integration. The following workflows run automatically on every push and pull request to the ``main`` branch: + +* **PyTest** — runs the test suite on Ubuntu, macOS and Windows across Python 3.10, 3.12 and 3.14, as well as against a range of historical dependency versions (from minimum supported to latest). +* **Coverage** — measures test coverage and uploads the report to `Codecov `_. +* **Ruff** — checks code style and formatting. +* **Documentation** — builds the Sphinx documentation and uploads the result as a downloadable artifact. + +A separate **PyTest (pre-release)** workflow runs weekly against pre-release versions of all major dependencies to catch compatibility issues early. Checking and building documentation ------------------------------------ +------------------------------------ -Pingouin's documentation (including docstring in code) uses ReStructuredText format, +Pingouin's documentation (including docstrings in code) uses ReStructuredText format, see `Sphinx documentation `_ to learn more about editing them. The code follows the `NumPy docstring standard `_. - All changes to the codebase must be properly documented. To ensure that documentation is rendered correctly, the best bet is to follow the existing examples for function docstrings. Build locally ^^^^^^^^^^^^^ -If you want to test the documentation locally, you will need to install additional dependencies. They can be installed with the docs extra: +If you want to test the documentation locally, install the package with the ``docs`` dependency group: .. code-block:: bash - $ pip install --upgrade pingouin[docs] + $ uv pip install --group=docs --editable . -and then within the ``pingouin/docs`` directory do: +Then, within the ``pingouin/docs`` directory, run: .. code-block:: bash @@ -68,28 +98,13 @@ and then come back after executing the ``html`` recipe. Inspect on GitHub ^^^^^^^^^^^^^^^^^ -Thanks to the `GitHub Actions `_ continuous integration service, -the documentation is also built on GitHub servers after every commit you make as part of a Pull Request. -To inspect these build artifacts, follow these steps: +The documentation is also built automatically on GitHub after every commit you make as part of a Pull Request. +To inspect the rendered documentation, follow these steps: * Click on the "Show all checks" dropdown menu at the end of the Pull Request user interface - -.. figure:: /pictures/github_checks.png - :align: center - :alt: GitHub checks dropdown menu - - Screenshot of the GitHub checks dropdown menu - -* Click on the check that starts with ``Python tests / build (ubuntu-latest, 3.9)`` -* Now in the top right corner of the opening window, you will see a small dropdown menu called "Artifacts" - -.. figure:: /pictures/github_build_artifacts.png - :align: center - :alt: GitHub build artifacts dropdown menu - - Screenshot of the GitHub build artifacts dropdown menu - -* Click on that drowndown menu and download the ``docs-artifact`` zip file +* Click on the check named **Documentation / docs** +* In the top-right corner of the opening window, click the **Artifacts** dropdown menu +* Download the ``docs-artifact`` zip file You can then unpack that zip file on your computer, enter the directory, and open the ``index.html`` file that you will find there. That should open the Pingouin documentation based on the changes from your Pull Request. diff --git a/docs/faq.rst b/docs/faq.rst index 093480ee..389d0afe 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -15,7 +15,7 @@ To install Pingouin, open a command prompt (or Terminal or Anaconda Prompt) and .. code-block:: bash - pip install pingouin --upgrade + pip install --upgrade pingouin You should now be able to use Pingouin. To try it, you need to open an interactive Python console (either `IPython `_ or `Jupyter `_). For example, type the following command in a command prompt: diff --git a/docs/index.rst b/docs/index.rst index 63df0995..04057469 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -63,21 +63,22 @@ the :code:`ttest` function of Pingouin returns the T-value, the p-value, the deg Installation ============ -Pingouin is a Python 3 package and is currently tested for Python 3.8-3.11. +Pingouin is a Python 3 package and is currently tested for Python 3.10+. -The main dependencies of Pingouin are : +The main dependencies of Pingouin are: -* `NumPy `_ -* `SciPy `_ -* `Pandas `_ +* `NumPy `_ >= 1.22.4 +* `SciPy `_ >= 1.8.0 +* `Pandas `_ >= 2.1.1 * `Pandas-flavor `_ -* `Statsmodels `_ +* `Statsmodels `_ >= 0.14.1 * `Matplotlib `_ * `Seaborn `_ +* `Scikit-learn `_ >= 1.2.2 +* `Tabulate `_ -In addition, some functions require : +Some functions additionally require: -* `Scikit-learn `_ * `Mpmath `_ Pingouin can be easily installed using pip @@ -549,7 +550,7 @@ Several functions of Pingouin were inspired from R or Matlab toolboxes, includin Functions Guidelines - FAQ + FAQ Changelog Contribute Cite \ No newline at end of file diff --git a/docs/pictures/github_build_artifacts.png b/docs/pictures/github_build_artifacts.png deleted file mode 100644 index 123cdcb1..00000000 Binary files a/docs/pictures/github_build_artifacts.png and /dev/null differ diff --git a/docs/pictures/github_checks.png b/docs/pictures/github_checks.png deleted file mode 100644 index 154d0d0c..00000000 Binary files a/docs/pictures/github_checks.png and /dev/null differ diff --git a/pyproject.toml b/pyproject.toml index feef0a7c..1e472cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=61.0", "wheel"] +requires = ["setuptools>=80.0", "wheel"] build-backend = "setuptools.build_meta" [project] @@ -29,13 +29,13 @@ dynamic = ["version"] requires-python = ">=3.10" dependencies = [ "matplotlib", - "numpy", - "pandas>=1.5", + "numpy>=1.22.4", + "pandas>=2.1.1", "pandas_flavor", - "scikit-learn>=1.2", - "scipy", + "scikit-learn>=1.2.2", + "scipy>=1.8.0", "seaborn", - "statsmodels", + "statsmodels>=0.14.1", "tabulate", ] @@ -43,6 +43,11 @@ dependencies = [ extras = [ "mpmath", ] + +[dependency-groups] +dev = [ + "ruff>=0.15.0", +] test = [ "pytest>=6", "pytest-cov", @@ -59,9 +64,12 @@ docs = [ "sphinx-notfound-page", ] + [project.urls] Homepage = "https://pingouin-stats.org/index.html" Downloads = "https://github.com/raphaelvallat/pingouin/" +Issues = "https://github.com/raphaelvallat/pingouin/issues" +Changelog = "https://pingouin-stats.org/build/html/changelog.html" [tool.setuptools] py-modules = ["pingouin"] @@ -115,11 +123,30 @@ exclude = [ "notebooks", # Skip jupyter notebook examples ] +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint.flake8-import-conventions.aliases] +"matplotlib.pyplot" = "plt" +numpy = "np" +"numpy.typing" = "npt" +pandas = "pd" +seaborn = "sns" +scipy = "sp" + [tool.ruff.lint] select = [ "E4", # Subset of pycodestyle rules "E7", # Subset of pycodestyle rules "E9", # Subset of pycodestyle rules "F", # All Pyflakes rules - "NPY201", + "NPY", # numpy + "W", + "I", + #"PD", # pandas imports + #"UP" # Upgrade pythonv versions ] + +ignore = [ + "NPY002" # exlude numpy random +] \ No newline at end of file diff --git a/src/pingouin/bayesian.py b/src/pingouin/bayesian.py index 9635012a..6b775134 100644 --- a/src/pingouin/bayesian.py +++ b/src/pingouin/bayesian.py @@ -1,9 +1,10 @@ """Bayesian functions.""" import warnings +from math import exp, lgamma, log, pi + import numpy as np from scipy.integrate import quad -from math import pi, exp, log, lgamma __all__ = ["bayesfactor_ttest", "bayesfactor_pearson", "bayesfactor_binom"] @@ -237,18 +238,18 @@ def bayesfactor_pearson(r, n, alternative="two-sided", method="ly", kappa=1.0): Compare to Wetzels method: - >>> bf = bayesfactor_pearson(r, n, method='wetzels') + >>> bf = bayesfactor_pearson(r, n, method="wetzels") >>> print("Bayes Factor: %.3f" % bf) Bayes Factor: 8.221 One-sided test - >>> bf10pos = bayesfactor_pearson(r, n, alternative='greater') - >>> bf10neg = bayesfactor_pearson(r, n, alternative='less') + >>> bf10pos = bayesfactor_pearson(r, n, alternative="greater") + >>> bf10neg = bayesfactor_pearson(r, n, alternative="less") >>> print("BF-pos: %.3f, BF-neg: %.3f" % (bf10pos, bf10neg)) BF-pos: 21.185, BF-neg: 0.082 """ - from scipy.special import gamma, betaln, hyp2f1 + from scipy.special import betaln, gamma, hyp2f1 assert method.lower() in ["ly", "wetzels"], "Method not recognized." assert alternative in [ diff --git a/src/pingouin/circular.py b/src/pingouin/circular.py index ac301610..ce10ac07 100644 --- a/src/pingouin/circular.py +++ b/src/pingouin/circular.py @@ -108,7 +108,7 @@ def convert_angles(angles, low=0, high=360, positive=False): >>> import numpy as np >>> rad = [0.1, 3.14, 5, 2, 6] - >>> convert_angles(rad, low=0, high=2*np.pi) + >>> convert_angles(rad, low=0, high=2 * np.pi) array([ 0.1 , 3.14 , -1.28318531, 2. , -0.28318531]) 4. Convert degrees from a 2-D array @@ -168,8 +168,8 @@ def circ_axial(angles, n): >>> import numpy as np >>> from pingouin import read_dataset >>> from pingouin.circular import circ_axial - >>> df = read_dataset('circular') - >>> angles = df['Orientation'].to_numpy() + >>> df = read_dataset("circular") + >>> angles = df["Orientation"].to_numpy() >>> angles = circ_axial(np.deg2rad(angles), 2) """ angles = np.asarray(angles) @@ -258,7 +258,7 @@ def circ_mean(angles, w=None, axis=0): >>> from scipy.stats import circmean >>> import numpy as np - >>> round(circmean(angles, low=0, high=2*np.pi), 4) + >>> round(circmean(angles, low=0, high=2 * np.pi), 4) 1.013 2. Using a 2-D array of angles in degrees @@ -590,7 +590,7 @@ def circ_corrcl(x, y): >>> print(round(r, 3), round(pval, 3)) 0.109 0.971 """ - from scipy.stats import pearsonr, chi2 + from scipy.stats import chi2, pearsonr x = np.asarray(x) y = np.asarray(y) @@ -662,7 +662,7 @@ def circ_rayleigh(angles, w=None, d=None): 2. Specifying w and d - >>> z, pval = circ_rayleigh(x, w=[.1, .2, .3, .4, .5], d=0.2) + >>> z, pval = circ_rayleigh(x, w=[0.1, 0.2, 0.3, 0.4, 0.5], d=0.2) >>> print(round(z, 3), round(pval, 6)) 0.278 0.806997 """ @@ -741,7 +741,7 @@ def circ_vtest(angles, dir=0.0, w=None, d=None): 2. Specifying w and d - >>> v, pval = circ_vtest(x, dir=0.5, w=[.1, .2, .3, .4, .5], d=0.2) + >>> v, pval = circ_vtest(x, dir=0.5, w=[0.1, 0.2, 0.3, 0.4, 0.5], d=0.2) >>> print(round(v, 3), round(pval, 5)) 0.637 0.23086 """ diff --git a/src/pingouin/contingency.py b/src/pingouin/contingency.py index db636e1a..428fa7f9 100644 --- a/src/pingouin/contingency.py +++ b/src/pingouin/contingency.py @@ -1,13 +1,13 @@ # Date: May 2019 import warnings + import numpy as np import pandas as pd - +from scipy.stats import binom, power_divergence +from scipy.stats import chi2 as sp_chi2 from scipy.stats.contingency import expected_freq -from scipy.stats import power_divergence, binom, chi2 as sp_chi2 - -from pingouin import power_chi2, _postprocess_dataframe +from pingouin import _postprocess_dataframe, power_chi2 __all__ = ["chi2_independence", "chi2_mcnemar", "dichotomous_crosstab"] @@ -292,8 +292,8 @@ def chi2_mcnemar(data, x, y, correction=True): Examples -------- >>> import pingouin as pg - >>> data = pg.read_dataset('chi2_mcnemar') - >>> observed, stats = pg.chi2_mcnemar(data, 'treatment_X', 'treatment_Y') + >>> data = pg.read_dataset("chi2_mcnemar") + >>> observed, stats = pg.chi2_mcnemar(data, "treatment_X", "treatment_Y") >>> observed treatment_Y 0 1 treatment_X diff --git a/src/pingouin/correlation.py b/src/pingouin/correlation.py index c40b9135..481cd2d8 100644 --- a/src/pingouin/correlation.py +++ b/src/pingouin/correlation.py @@ -1,18 +1,18 @@ # Author: Raphael Vallat import warnings + import numpy as np import pandas as pd import pandas_flavor as pf from scipy.spatial.distance import pdist, squareform -from scipy.stats import pearsonr, spearmanr, kendalltau +from scipy.stats import kendalltau, pearsonr, spearmanr +from pingouin.bayesian import bayesfactor_pearson from pingouin.config import options -from pingouin.power import power_corr -from pingouin.multicomp import multicomp from pingouin.effsize import compute_esci -from pingouin.utils import remove_na, _perm_pval, _postprocess_dataframe -from pingouin.bayesian import bayesfactor_pearson - +from pingouin.multicomp import multicomp +from pingouin.power import power_corr +from pingouin.utils import _perm_pval, _postprocess_dataframe, remove_na __all__ = ["corr", "partial_corr", "pcorr", "rcorr", "rm_corr", "distance_corr"] @@ -517,7 +517,7 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs): >>> import pingouin as pg >>> # Generate random correlated samples >>> np.random.seed(123) - >>> mean, cov = [4, 6], [(1, .5), (.5, 1)] + >>> mean, cov = [4, 6], [(1, 0.5), (0.5, 1)] >>> x, y = np.random.multivariate_normal(mean, cov, 30).T >>> # Compute Pearson correlation >>> pg.corr(x, y).round(3) @@ -545,29 +545,29 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs): 5. Percentage bend correlation (robust) - >>> pg.corr(x, y, method='percbend').round(3) + >>> pg.corr(x, y, method="percbend").round(3) n r CI95 p_val power percbend 30 0.389 [0.03, 0.66] 0.034 0.581 6. Shepherd's pi correlation (robust) - >>> pg.corr(x, y, method='shepherd').round(3) + >>> pg.corr(x, y, method="shepherd").round(3) n outliers r CI95 p_val power shepherd 30 2 0.437 [0.08, 0.7] 0.02 0.662 7. Skipped spearman correlation (robust) - >>> pg.corr(x, y, method='skipped').round(3) + >>> pg.corr(x, y, method="skipped").round(3) n outliers r CI95 p_val power skipped 30 2 0.437 [0.08, 0.7] 0.02 0.662 8. One-tailed Pearson correlation - >>> pg.corr(x, y, alternative="greater", method='pearson').round(3) + >>> pg.corr(x, y, alternative="greater", method="pearson").round(3) n r CI95 p_val BF10 power pearson 30 0.147 [-0.17, 1.0] 0.22 0.467 0.194 - >>> pg.corr(x, y, alternative="less", method='pearson').round(3) + >>> pg.corr(x, y, alternative="less", method="pearson").round(3) n r CI95 p_val BF10 power pearson 30 0.147 [-1.0, 0.43] 0.78 0.137 0.008 @@ -580,8 +580,8 @@ def corr(x, y, alternative="two-sided", method="pearson", **kwargs): 10. Using columns of a pandas dataframe >>> import pandas as pd - >>> data = pd.DataFrame({'x': x, 'y': y}) - >>> pg.corr(data['x'], data['y']).round(3) + >>> data = pd.DataFrame({"x": x, "y": y}) + >>> pg.corr(data["x"], data["y"]).round(3) n r CI95 p_val BF10 power pearson 30 0.147 [-0.23, 0.48] 0.439 0.302 0.121 """ @@ -776,34 +776,47 @@ def partial_corr( 1. Partial correlation with one covariate >>> import pingouin as pg - >>> df = pg.read_dataset('partial_corr') - >>> pg.partial_corr(data=df, x='x', y='y', covar='cv1').round(3) + >>> df = pg.read_dataset("partial_corr") + >>> pg.partial_corr(data=df, x="x", y="y", covar="cv1").round(3) n r CI95 p_val pearson 30 0.568 [0.25, 0.77] 0.001 2. Spearman partial correlation with several covariates >>> # Partial correlation of x and y controlling for cv1, cv2 and cv3 - >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'], - ... method='spearman').round(3) + >>> pg.partial_corr( + ... data=df, x="x", y="y", covar=["cv1", "cv2", "cv3"], method="spearman" + ... ).round(3) n r CI95 p_val spearman 30 0.521 [0.18, 0.75] 0.005 3. Same but one-sided test - >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'], - ... alternative="greater", method='spearman').round(3) + >>> pg.partial_corr( + ... data=df, + ... x="x", + ... y="y", + ... covar=["cv1", "cv2", "cv3"], + ... alternative="greater", + ... method="spearman", + ... ).round(3) n r CI95 p_val spearman 30 0.521 [0.24, 1.0] 0.003 - >>> pg.partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'], - ... alternative="less", method='spearman').round(3) + >>> pg.partial_corr( + ... data=df, + ... x="x", + ... y="y", + ... covar=["cv1", "cv2", "cv3"], + ... alternative="less", + ... method="spearman", + ... ).round(3) n r CI95 p_val spearman 30 0.521 [-1.0, 0.72] 0.997 4. As a pandas method - >>> df.partial_corr(x='x', y='y', covar=['cv1'], method='spearman').round(3) + >>> df.partial_corr(x="x", y="y", covar=["cv1"], method="spearman").round(3) n r CI95 p_val spearman 30 0.578 [0.27, 0.78] 0.001 @@ -819,7 +832,7 @@ def partial_corr( 6. Semi-partial correlation on x - >>> pg.partial_corr(data=df, x='x', y='y', x_covar=['cv1', 'cv2', 'cv3']).round(3) + >>> pg.partial_corr(data=df, x="x", y="y", x_covar=["cv1", "cv2", "cv3"]).round(3) n r CI95 p_val pearson 30 0.463 [0.1, 0.72] 0.015 """ @@ -940,7 +953,7 @@ def pcorr(self): Examples -------- >>> import pingouin as pg - >>> data = pg.read_dataset('mediation') + >>> data = pg.read_dataset("mediation") >>> data.pcorr().round(3) X M Y Mbin Ybin W1 W2 X 1.000 0.359 0.074 -0.019 -0.147 -0.148 -0.067 @@ -953,7 +966,7 @@ def pcorr(self): On a subset of columns - >>> data[['X', 'Y', 'M']].pcorr() + >>> data[["X", "Y", "M"]].pcorr() X Y M X 1.000000 0.036649 0.412804 Y 0.036649 1.000000 0.540140 @@ -1026,7 +1039,7 @@ def rcorr( >>> import pandas as pd >>> import pingouin as pg >>> # Load an example dataset of personality dimensions - >>> df = pg.read_dataset('pairwise_corr').iloc[:, 1:] + >>> df = pg.read_dataset("pairwise_corr").iloc[:, 1:] >>> # Add some missing values >>> df.iloc[[2, 5, 20], 2] = np.nan >>> df.iloc[[1, 4, 10], 3] = np.nan @@ -1047,7 +1060,7 @@ def rcorr( Agreeableness -0.134 0.054 0.161 - >>> # Spearman correlation and Holm adjustement for multiple comparisons - >>> df.iloc[:, 0:4].rcorr(method='spearman', padjust='holm') + >>> df.iloc[:, 0:4].rcorr(method="spearman", padjust="holm") Neuroticism Extraversion Openness Agreeableness Neuroticism - *** ** Extraversion -0.325 - *** @@ -1055,9 +1068,8 @@ def rcorr( Agreeableness -0.15 0.06 0.173 - >>> # Compare with the pg.pairwise_corr function - >>> pairwise = df.iloc[:, 0:4].pairwise_corr(method='spearman', - ... padjust='holm') - >>> pairwise[['X', 'Y', 'r', 'p_corr']].round(3) # Do not show all columns + >>> pairwise = df.iloc[:, 0:4].pairwise_corr(method="spearman", padjust="holm") + >>> pairwise[["X", "Y", "r", "p_corr"]].round(3) # Do not show all columns X Y r p_corr 0 Neuroticism Extraversion -0.325 0.000 1 Neuroticism Openness -0.027 0.543 @@ -1074,7 +1086,7 @@ def rcorr( Agreeableness -0.134 0.0539 - >>> # With the sample size on the upper triangle instead of the p-values - >>> df.iloc[:, [0, 1, 2]].rcorr(upper='n') + >>> df.iloc[:, [0, 1, 2]].rcorr(upper="n") Neuroticism Extraversion Openness Neuroticism - 500 497 Extraversion -0.35 - 497 @@ -1182,8 +1194,8 @@ def rm_corr(data=None, x=None, y=None, subject=None): Examples -------- >>> import pingouin as pg - >>> df = pg.read_dataset('rm_corr') - >>> pg.rm_corr(data=df, x='pH', y='PacO2', subject='Subject') + >>> df = pg.read_dataset("rm_corr") + >>> pg.rm_corr(data=df, x="pH", y="PacO2", subject="Subject") r dof pval CI95 power rm_corr -0.50677 38 0.000847 [-0.71, -0.23] 0.929579 @@ -1192,8 +1204,8 @@ def rm_corr(data=None, x=None, y=None, subject=None): .. plot:: >>> import pingouin as pg - >>> df = pg.read_dataset('rm_corr') - >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject') + >>> df = pg.read_dataset("rm_corr") + >>> g = pg.plot_rm_corr(data=df, x="pH", y="PacO2", subject="Subject") """ from pingouin import ancova, power_corr diff --git a/src/pingouin/distribution.py b/src/pingouin/distribution.py index 32209e33..55d1e44d 100644 --- a/src/pingouin/distribution.py +++ b/src/pingouin/distribution.py @@ -1,11 +1,12 @@ import warnings -import scipy.stats +from collections import namedtuple + import numpy as np import pandas as pd -from collections import namedtuple -from pingouin.utils import _flatten_list as _fl -from pingouin.utils import remove_na, _postprocess_dataframe +import scipy.stats +from pingouin.utils import _flatten_list as _fl +from pingouin.utils import _postprocess_dataframe, remove_na __all__ = ["gzscore", "normality", "homoscedasticity", "anderson", "epsilon", "sphericity"] @@ -172,9 +173,9 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05): 2. Omnibus test on a wide-format dataframe with missing values - >>> data = pg.read_dataset('mediation') - >>> data.loc[1, 'X'] = np.nan - >>> pg.normality(data, method='normaltest').round(3) + >>> data = pg.read_dataset("mediation") + >>> data.loc[1, "X"] = np.nan + >>> pg.normality(data, method="normaltest").round(3) W pval normal X 1.792 0.408 True M 0.492 0.782 True @@ -186,14 +187,14 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05): 3. Pandas Series - >>> pg.normality(data['X'], method='normaltest') + >>> pg.normality(data["X"], method="normaltest") W pval normal X 1.791839 0.408232 True 4. Long-format dataframe - >>> data = pg.read_dataset('rm_anova2') - >>> pg.normality(data, dv='Performance', group='Time') + >>> data = pg.read_dataset("rm_anova2") + >>> pg.normality(data, dv="Performance", group="Time") W pval normal Time Pre 0.967718 0.478773 True @@ -201,7 +202,7 @@ def normality(data, dv=None, group=None, method="shapiro", alpha=0.05): 5. Same but using the Jarque-Bera test - >>> pg.normality(data, dv='Performance', group='Time', method="jarque_bera") + >>> pg.normality(data, dv="Performance", group="Time", method="jarque_bera") W pval normal Time Pre 0.304021 0.858979 True @@ -346,14 +347,14 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=0.05, **k >>> import numpy as np >>> import pingouin as pg - >>> data = pg.read_dataset('mediation') - >>> pg.homoscedasticity(data[['X', 'Y', 'M']]) + >>> data = pg.read_dataset("mediation") + >>> pg.homoscedasticity(data[["X", "Y", "M"]]) W pval equal_var levene 1.173518 0.310707 True 2. Same data but using a long-format dataframe - >>> data_long = data[['X', 'Y', 'M']].melt() + >>> data_long = data[["X", "Y", "M"]].melt() >>> pg.homoscedasticity(data_long, dv="value", group="variable") W pval equal_var levene 1.173518 0.310707 True @@ -367,7 +368,7 @@ def homoscedasticity(data, dv=None, group=None, method="levene", alpha=0.05, **k 4. Bartlett test using a list of iterables >>> data = [[4, 8, 9, 20, 14], np.array([5, 8, 15, 45, 12])] - >>> pg.homoscedasticity(data, method="bartlett", alpha=.05) + >>> pg.homoscedasticity(data, method="bartlett", alpha=0.05) T pval equal_var bartlett 2.873569 0.090045 True """ @@ -629,18 +630,22 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"): >>> import pandas as pd >>> import pingouin as pg - >>> data = pd.DataFrame({'A': [2.2, 3.1, 4.3, 4.1, 7.2], - ... 'B': [1.1, 2.5, 4.1, 5.2, 6.4], - ... 'C': [8.2, 4.5, 3.4, 6.2, 7.2]}) - >>> gg = pg.epsilon(data, correction='gg') - >>> hf = pg.epsilon(data, correction='hf') - >>> lb = pg.epsilon(data, correction='lb') + >>> data = pd.DataFrame( + ... { + ... "A": [2.2, 3.1, 4.3, 4.1, 7.2], + ... "B": [1.1, 2.5, 4.1, 5.2, 6.4], + ... "C": [8.2, 4.5, 3.4, 6.2, 7.2], + ... } + ... ) + >>> gg = pg.epsilon(data, correction="gg") + >>> hf = pg.epsilon(data, correction="hf") + >>> lb = pg.epsilon(data, correction="lb") >>> print("%.2f %.2f %.2f" % (lb, gg, hf)) 0.50 0.56 0.62 Now using a long-format dataframe - >>> data = pg.read_dataset('rm_anova2') + >>> data = pg.read_dataset("rm_anova2") >>> data.head() Subject Time Metric Performance 0 1 Pre Product 13 @@ -651,8 +656,7 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"): Let's first calculate the epsilon of the *Time* within-subject factor - >>> pg.epsilon(data, dv='Performance', subject='Subject', - ... within='Time') + >>> pg.epsilon(data, dv="Performance", subject="Subject", within="Time") 1.0 Since *Time* has only two levels (Pre and Post), the sphericity assumption @@ -660,8 +664,7 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"): The *Metric* factor, however, has three levels: - >>> round(pg.epsilon(data, dv='Performance', subject='Subject', - ... within=['Metric']), 3) + >>> round(pg.epsilon(data, dv="Performance", subject="Subject", within=["Metric"]), 3) 0.969 The epsilon value is very close to 1, meaning that there is no major @@ -670,15 +673,14 @@ def epsilon(data, dv=None, within=None, subject=None, correction="gg"): Now, let's calculate the epsilon for the interaction between the two repeated measures factor: - >>> round(pg.epsilon(data, dv='Performance', subject='Subject', - ... within=['Time', 'Metric']), 3) + >>> round(pg.epsilon(data, dv="Performance", subject="Subject", within=["Time", "Metric"]), 3) 0.727 Alternatively, we could use a wide-format dataframe with two column levels: >>> # Pivot from long-format to wide-format - >>> piv = data.pivot(index='Subject', columns=['Time', 'Metric'], values='Performance') + >>> piv = data.pivot(index="Subject", columns=["Time", "Metric"], values="Performance") >>> piv.head() Time Pre Post Metric Product Client Action Product Client Action @@ -872,21 +874,25 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha >>> import pandas as pd >>> import pingouin as pg - >>> data = pd.DataFrame({'A': [2.2, 3.1, 4.3, 4.1, 7.2], - ... 'B': [1.1, 2.5, 4.1, 5.2, 6.4], - ... 'C': [8.2, 4.5, 3.4, 6.2, 7.2]}) + >>> data = pd.DataFrame( + ... { + ... "A": [2.2, 3.1, 4.3, 4.1, 7.2], + ... "B": [1.1, 2.5, 4.1, 5.2, 6.4], + ... "C": [8.2, 4.5, 3.4, 6.2, 7.2], + ... } + ... ) >>> spher, W, chisq, dof, pval = pg.sphericity(data) >>> print(spher, round(W, 3), round(chisq, 3), dof, round(pval, 3)) True 0.21 4.677 2 0.096 John, Nagao and Sugiura (JNS) test - >>> round(pg.sphericity(data, method='jns')[-1], 3) # P-value only + >>> round(pg.sphericity(data, method="jns")[-1], 3) # P-value only 0.046 Now using a long-format dataframe - >>> data = pg.read_dataset('rm_anova2') + >>> data = pg.read_dataset("rm_anova2") >>> data.head() Subject Time Metric Performance 0 1 Pre Product 13 @@ -897,8 +903,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha Let's first test sphericity for the *Time* within-subject factor - >>> pg.sphericity(data, dv='Performance', subject='Subject', - ... within='Time') + >>> pg.sphericity(data, dv="Performance", subject="Subject", within="Time") (True, nan, nan, 1, 1.0) Since *Time* has only two levels (Pre and Post), the sphericity assumption @@ -906,8 +911,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha The *Metric* factor, however, has three levels: - >>> round(pg.sphericity(data, dv='Performance', subject='Subject', - ... within=['Metric'])[-1], 3) + >>> round(pg.sphericity(data, dv="Performance", subject="Subject", within=["Metric"])[-1], 3) 0.878 The p-value value is very large, and the test therefore indicates that @@ -918,9 +922,9 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha if at least one of the two within-subject factors has no more than two levels. - >>> spher, _, chisq, dof, pval = pg.sphericity(data, dv='Performance', - ... subject='Subject', - ... within=['Time', 'Metric']) + >>> spher, _, chisq, dof, pval = pg.sphericity( + ... data, dv="Performance", subject="Subject", within=["Time", "Metric"] + ... ) >>> print(spher, round(chisq, 3), dof, round(pval, 3)) True 3.763 2 0.152 @@ -931,7 +935,7 @@ def sphericity(data, dv=None, within=None, subject=None, method="mauchly", alpha levels: >>> # Pivot from long-format to wide-format - >>> piv = data.pivot(index='Subject', columns=['Time', 'Metric'], values='Performance') + >>> piv = data.pivot(index="Subject", columns=["Time", "Metric"], values="Performance") >>> piv.head() Time Pre Post Metric Product Client Action Product Client Action diff --git a/src/pingouin/effsize.py b/src/pingouin/effsize.py index 1e26457e..317af5f2 100644 --- a/src/pingouin/effsize.py +++ b/src/pingouin/effsize.py @@ -1,8 +1,10 @@ # Author: Raphael Vallat # Date: April 2018 import warnings + import numpy as np from scipy.stats import pearsonr + from pingouin.utils import _check_eftype, remove_na # from pingouin.distribution import homoscedasticity @@ -124,15 +126,15 @@ def compute_esci( >>> x = [3, 4, 6, 7, 5, 6, 7, 3, 5, 4, 2] >>> y = [4, 6, 6, 7, 6, 5, 5, 2, 3, 4, 1] >>> nx, ny = len(x), len(y) - >>> stat = pg.compute_effsize(x, y, eftype='r') - >>> ci = pg.compute_esci(stat=stat, nx=nx, ny=ny, eftype='r') + >>> stat = pg.compute_effsize(x, y, eftype="r") + >>> ci = pg.compute_esci(stat=stat, nx=nx, ny=ny, eftype="r") >>> print(round(stat, 4), ci) 0.7468 [0.27 0.93] 2. Confidence interval of a Cohen d - >>> stat = pg.compute_effsize(x, y, eftype='cohen') - >>> ci = pg.compute_esci(stat, nx=nx, ny=ny, eftype='cohen', decimals=3) + >>> stat = pg.compute_effsize(x, y, eftype="cohen") + >>> ci = pg.compute_esci(stat, nx=nx, ny=ny, eftype="cohen", decimals=3) >>> print(round(stat, 4), ci) 0.1538 [-0.737 1.045] """ @@ -288,7 +290,7 @@ def compute_bootci( >>> x = rng.normal(loc=4, scale=2, size=100) >>> y = rng.normal(loc=3, scale=1, size=100) >>> stat = np.corrcoef(x, y)[0][1] - >>> ci = pg.compute_bootci(x, y, func='pearson', paired=True, seed=42, decimals=4) + >>> ci = pg.compute_bootci(x, y, func="pearson", paired=True, seed=42, decimals=4) >>> print(round(stat, 4), ci) 0.0945 [-0.098 0.2738] @@ -296,15 +298,21 @@ def compute_bootci( >>> from scipy.stats import bootstrap >>> bt_scipy = bootstrap( - ... data=(x, y), statistic=lambda x, y: np.corrcoef(x, y)[0][1], - ... method="basic", vectorized=False, n_resamples=2000, paired=True, random_state=42) + ... data=(x, y), + ... statistic=lambda x, y: np.corrcoef(x, y)[0][1], + ... method="basic", + ... vectorized=False, + ... n_resamples=2000, + ... paired=True, + ... random_state=42, + ... ) >>> np.round(bt_scipy.confidence_interval, 4) array([-0.0952, 0.2883]) 2. Bootstrapped 95% confidence interval of a Cohen d - >>> stat = pg.compute_effsize(x, y, eftype='cohen') - >>> ci = pg.compute_bootci(x, y, func='cohen', seed=42, decimals=3) + >>> stat = pg.compute_effsize(x, y, eftype="cohen") + >>> ci = pg.compute_bootci(x, y, func="cohen", seed=42, decimals=3) >>> print(round(stat, 4), ci) 0.7009 [0.403 1.009] @@ -312,7 +320,7 @@ def compute_bootci( >>> import numpy as np >>> stat = np.std(x, ddof=1) - >>> ci = pg.compute_bootci(x, func='std', seed=123) + >>> ci = pg.compute_bootci(x, func="std", seed=123) >>> print(round(stat, 4), ci) 1.5534 [1.38 1.8 ] @@ -321,16 +329,16 @@ def compute_bootci( >>> def std(x, axis): ... return np.std(x, ddof=1, axis=axis) - >>> bt_scipy = bootstrap(data=(x, ), statistic=std, n_resamples=2000, random_state=123) + >>> bt_scipy = bootstrap(data=(x,), statistic=std, n_resamples=2000, random_state=123) >>> np.round(bt_scipy.confidence_interval, 2) array([1.39, 1.81]) Changing the confidence intervals type in Pingouin - >>> pg.compute_bootci(x, func='std', seed=123, method="norm") + >>> pg.compute_bootci(x, func="std", seed=123, method="norm") array([1.37, 1.76]) - >>> pg.compute_bootci(x, func='std', seed=123, method="percentile") + >>> pg.compute_bootci(x, func="std", seed=123, method="percentile") array([1.35, 1.75]) 4. Bootstrapped confidence interval using a custom univariate function @@ -352,11 +360,14 @@ def compute_bootci( We can also get the bootstrapped distribution >>> ci, bt = pg.compute_bootci(x, y2, func=mean_diff, n_boot=10000, return_dist=True, seed=9) - >>> print(f"The bootstrap distribution has {bt.size} samples. The mean and standard " - ... f"{bt.mean():.4f} ± {bt.std():.4f}") + >>> print( + ... f"The bootstrap distribution has {bt.size} samples. The mean and standard " + ... f"{bt.mean():.4f} ± {bt.std():.4f}" + ... ) The bootstrap distribution has 10000 samples. The mean and standard 0.8807 ± 0.1704 """ from inspect import isfunction, isroutine + from scipy.stats import norm # Check other arguments @@ -569,27 +580,27 @@ def convert_effsize(ef, input_type, output_type, nx=None, ny=None): 1. Convert from Cohen d to eta-square >>> import pingouin as pg - >>> d = .45 - >>> eta = pg.convert_effsize(d, 'cohen', 'eta_square') + >>> d = 0.45 + >>> eta = pg.convert_effsize(d, "cohen", "eta_square") >>> print(eta) 0.048185603807257595 2. Convert from Cohen d to Hegdes g (requires the sample sizes of each group) - >>> pg.convert_effsize(.45, 'cohen', 'hedges', nx=10, ny=10) + >>> pg.convert_effsize(0.45, "cohen", "hedges", nx=10, ny=10) 0.4309859154929578 3. Convert a point-biserial correlation to Cohen d >>> rpb = 0.40 - >>> d = pg.convert_effsize(rpb, 'pointbiserialr', 'cohen') + >>> d = pg.convert_effsize(rpb, "pointbiserialr", "cohen") >>> print(d) 0.8728715609439696 4. Reverse operation: convert Cohen d to a point-biserial correlation - >>> pg.convert_effsize(d, 'cohen', 'pointbiserialr') + >>> pg.convert_effsize(d, "cohen", "pointbiserialr") 0.4000000000000001 """ it = input_type.lower() @@ -738,32 +749,32 @@ def compute_effsize(x, y, paired=False, eftype="cohen"): >>> import pingouin as pg >>> x = [1, 2, 3, 4] >>> y = [3, 4, 5, 6, 7] - >>> pg.compute_effsize(x, y, paired=False, eftype='cohen') + >>> pg.compute_effsize(x, y, paired=False, eftype="cohen") -1.707825127659933 The sign of the Cohen d will be opposite if we reverse the order of ``x`` and ``y``: - >>> pg.compute_effsize(y, x, paired=False, eftype='cohen') + >>> pg.compute_effsize(y, x, paired=False, eftype="cohen") 1.707825127659933 2. Hedges g from two paired samples. >>> x = [1, 2, 3, 4, 5, 6, 7] >>> y = [1, 3, 5, 7, 9, 11, 13] - >>> pg.compute_effsize(x, y, paired=True, eftype='hedges') + >>> pg.compute_effsize(x, y, paired=True, eftype="hedges") -0.8222477210374874 3. Common Language Effect Size. - >>> pg.compute_effsize(x, y, eftype='cles') + >>> pg.compute_effsize(x, y, eftype="cles") 0.2857142857142857 In other words, there are ~29% of pairs where ``x`` is higher than ``y``, which means that there are ~71% of pairs where ``x`` is *lower* than ``y``. This can be easily verified by changing the order of ``x`` and ``y``: - >>> pg.compute_effsize(y, x, eftype='cles') + >>> pg.compute_effsize(y, x, eftype="cles") 0.7142857142857143 """ # Check arguments @@ -850,14 +861,14 @@ def compute_effsize_from_t(tval, nx=None, ny=None, N=None, eftype="cohen"): >>> from pingouin import compute_effsize_from_t >>> tval, nx, ny = 2.90, 35, 25 - >>> d = compute_effsize_from_t(tval, nx=nx, ny=ny, eftype='cohen') + >>> d = compute_effsize_from_t(tval, nx=nx, ny=ny, eftype="cohen") >>> print(d) 0.7593982580212534 2. Compute effect size when only total sample size is known (nx+ny) >>> tval, N = 2.90, 60 - >>> d = compute_effsize_from_t(tval, N=N, eftype='cohen') + >>> d = compute_effsize_from_t(tval, N=N, eftype="cohen") >>> print(d) 0.7487767802667672 """ diff --git a/src/pingouin/equivalence.py b/src/pingouin/equivalence.py index 70f5ee58..b2460c6d 100644 --- a/src/pingouin/equivalence.py +++ b/src/pingouin/equivalence.py @@ -2,10 +2,10 @@ # Date: July 2019 import numpy as np import pandas as pd + from pingouin.parametric import ttest from pingouin.utils import _postprocess_dataframe - __all__ = ["tost"] diff --git a/src/pingouin/multicomp.py b/src/pingouin/multicomp.py index 04415287..4b63bb42 100644 --- a/src/pingouin/multicomp.py +++ b/src/pingouin/multicomp.py @@ -80,8 +80,8 @@ def fdr(pvals, alpha=0.05, method="fdr_bh"): FDR correction of an array of p-values >>> import pingouin as pg - >>> pvals = [.50, .003, .32, .054, .0003] - >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh', alpha=.05) + >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003] + >>> reject, pvals_corr = pg.multicomp(pvals, method="fdr_bh", alpha=0.05) >>> print(reject, pvals_corr) [False True False False True] [0.5 0.0075 0.4 0.09 0.0015] """ @@ -178,8 +178,8 @@ def bonf(pvals, alpha=0.05): Examples -------- >>> import pingouin as pg - >>> pvals = [.50, .003, .32, .054, .0003] - >>> reject, pvals_corr = pg.multicomp(pvals, method='bonf', alpha=.05) + >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003] + >>> reject, pvals_corr = pg.multicomp(pvals, method="bonf", alpha=0.05) >>> print(reject, pvals_corr) [False True False False True] [1. 0.015 1. 0.27 0.0015] """ @@ -251,8 +251,8 @@ def holm(pvals, alpha=0.05): Examples -------- >>> import pingouin as pg - >>> pvals = [.50, .003, .32, .054, .0003] - >>> reject, pvals_corr = pg.multicomp(pvals, method='holm', alpha=.05) + >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003] + >>> reject, pvals_corr = pg.multicomp(pvals, method="holm", alpha=0.05) >>> print(reject, pvals_corr) [False True False False True] [0.64 0.012 0.64 0.162 0.0015] """ @@ -327,8 +327,8 @@ def sidak(pvals, alpha=0.05): -------- >>> import numpy as np >>> import pingouin as pg - >>> pvals = [.50, .003, .32, .054, .0003] - >>> reject, pvals_corr = pg.multicomp(pvals, method='sidak', alpha=.05) + >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003] + >>> reject, pvals_corr = pg.multicomp(pvals, method="sidak", alpha=0.05) >>> print(reject, np.round(pvals_corr, 4)) [False True False False True] [0.9688 0.0149 0.8546 0.2424 0.0015] """ @@ -459,8 +459,8 @@ def multicomp(pvals, alpha=0.05, method="holm"): FDR correction of an array of p-values >>> import pingouin as pg - >>> pvals = [.50, .003, .32, .054, .0003] - >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh') + >>> pvals = [0.50, 0.003, 0.32, 0.054, 0.0003] + >>> reject, pvals_corr = pg.multicomp(pvals, method="fdr_bh") >>> print(reject, pvals_corr) [False True False False True] [0.5 0.0075 0.4 0.09 0.0015] @@ -468,7 +468,7 @@ def multicomp(pvals, alpha=0.05, method="holm"): >>> import numpy as np >>> pvals[2] = np.nan - >>> reject, pvals_corr = pg.multicomp(pvals, method='holm') + >>> reject, pvals_corr = pg.multicomp(pvals, method="holm") >>> print(reject, pvals_corr) [False True False False True] [0.5 0.009 nan 0.108 0.0012] """ diff --git a/src/pingouin/multivariate.py b/src/pingouin/multivariate.py index f2ec6da0..f0ec4c99 100644 --- a/src/pingouin/multivariate.py +++ b/src/pingouin/multivariate.py @@ -1,7 +1,9 @@ +from collections import namedtuple + import numpy as np import pandas as pd -from collections import namedtuple -from pingouin.utils import remove_na, _postprocess_dataframe + +from pingouin.utils import _postprocess_dataframe, remove_na __all__ = ["multivariate_normality", "multivariate_ttest", "box_m"] @@ -55,9 +57,9 @@ def multivariate_normality(X, alpha=0.05): Examples -------- >>> import pingouin as pg - >>> data = pg.read_dataset('multivariate') - >>> X = data[['Fever', 'Pressure', 'Aches']] - >>> pg.multivariate_normality(X, alpha=.05) + >>> data = pg.read_dataset("multivariate") + >>> X = data[["Fever", "Pressure", "Aches"]] + >>> pg.multivariate_normality(X, alpha=0.05) HZResults(hz=0.540086101851555, pval=0.7173686509622386, normal=True) """ from scipy.stats import lognorm @@ -175,10 +177,10 @@ def multivariate_ttest(X, Y=None, paired=False): Two-sample independent Hotelling T-squared test >>> import pingouin as pg - >>> data = pg.read_dataset('multivariate') - >>> dvs = ['Fever', 'Pressure', 'Aches'] - >>> X = data[data['Condition'] == 'Drug'][dvs] - >>> Y = data[data['Condition'] == 'Placebo'][dvs] + >>> data = pg.read_dataset("multivariate") + >>> dvs = ["Fever", "Pressure", "Aches"] + >>> X = data[data["Condition"] == "Drug"][dvs] + >>> Y = data[data["Condition"] == "Placebo"][dvs] >>> pg.multivariate_ttest(X, Y) T2 F df1 df2 pval hotelling 4.228679 1.326644 3 32 0.282898 @@ -319,9 +321,8 @@ def box_m(data, dvs, group, alpha=0.001): >>> import pandas as pd >>> import pingouin as pg >>> from scipy.stats import multivariate_normal as mvn - >>> data = pd.DataFrame(mvn.rvs(size=(100, 3), random_state=42), - ... columns=['A', 'B', 'C']) - >>> data['group'] = [1] * 25 + [2] * 25 + [3] * 25 + [4] * 25 + >>> data = pd.DataFrame(mvn.rvs(size=(100, 3), random_state=42), columns=["A", "B", "C"]) + >>> data["group"] = [1] * 25 + [2] * 25 + [3] * 25 + [4] * 25 >>> data.head() A B C group 0 0.496714 -0.138264 0.647689 1 @@ -330,16 +331,15 @@ def box_m(data, dvs, group, alpha=0.001): 3 0.542560 -0.463418 -0.465730 1 4 0.241962 -1.913280 -1.724918 1 - >>> pg.box_m(data, dvs=['A', 'B', 'C'], group='group') + >>> pg.box_m(data, dvs=["A", "B", "C"], group="group") Chi2 df pval equal_cov box 11.634185 18.0 0.865537 True 2. Box M test with 3 dependent variables of 2 groups (unequal sample size) - >>> data = pd.DataFrame(mvn.rvs(size=(30, 2), random_state=42), - ... columns=['A', 'B']) - >>> data['group'] = [1] * 20 + [2] * 10 - >>> pg.box_m(data, dvs=['A', 'B'], group='group') + >>> data = pd.DataFrame(mvn.rvs(size=(30, 2), random_state=42), columns=["A", "B"]) + >>> data["group"] = [1] * 20 + [2] * 10 + >>> pg.box_m(data, dvs=["A", "B"], group="group") Chi2 df pval equal_cov box 0.706709 3.0 0.871625 True """ diff --git a/src/pingouin/nonparametric.py b/src/pingouin/nonparametric.py index 348485ad..909b428c 100644 --- a/src/pingouin/nonparametric.py +++ b/src/pingouin/nonparametric.py @@ -1,9 +1,10 @@ # Author: Raphael Vallat # Date: May 2018 -import scipy import numpy as np import pandas as pd -from pingouin import remove_na, _check_dataframe, _postprocess_dataframe +import scipy + +from pingouin import _check_dataframe, _postprocess_dataframe, remove_na __all__ = [ "mad", @@ -84,7 +85,7 @@ def mad(a, normalize=True, axis=0): Compare with Scipy >= 1.3 >>> from scipy.stats import median_abs_deviation - >>> median_abs_deviation(w, scale='normal', axis=None, nan_policy='omit') + >>> median_abs_deviation(w, scale="normal", axis=None, nan_policy="omit") 1.1607762457644006 """ a = np.asarray(a) @@ -142,7 +143,7 @@ def madmedianrule(a): Examples -------- >>> import pingouin as pg - >>> a = [-1.09, 1., 0.28, -1.51, -0.58, 6.61, -2.43, -0.43] + >>> a = [-1.09, 1.0, 0.28, -1.51, -0.58, 6.61, -2.43, -0.43] >>> pg.madmedianrule(a) array([False, False, False, False, False, True, False, False]) """ @@ -238,29 +239,29 @@ def mwu(x, y, alternative="two-sided", **kwargs): >>> np.random.seed(123) >>> x = np.random.uniform(low=0, high=1, size=20) >>> y = np.random.uniform(low=0.2, high=1.2, size=20) - >>> pg.mwu(x, y, alternative='two-sided') + >>> pg.mwu(x, y, alternative="two-sided") U_val alternative p_val RBC CLES MWU 97.0 two-sided 0.00556 -0.515 0.2425 Compare with SciPy >>> import scipy - >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True, alternative='two-sided') + >>> scipy.stats.mannwhitneyu(x, y, use_continuity=True, alternative="two-sided") MannwhitneyuResult(statistic=97.0, pvalue=0.0055604599321374135) One-sided test - >>> pg.mwu(x, y, alternative='greater') + >>> pg.mwu(x, y, alternative="greater") U_val alternative p_val RBC CLES MWU 97.0 greater 0.997442 -0.515 0.2425 - >>> pg.mwu(x, y, alternative='less') + >>> pg.mwu(x, y, alternative="less") U_val alternative p_val RBC CLES MWU 97.0 less 0.00278 -0.515 0.7575 Passing keyword arguments to :py:func:`scipy.stats.mannwhitneyu`: - >>> pg.mwu(x, y, alternative='two-sided', method='exact') + >>> pg.mwu(x, y, alternative="two-sided", method="exact") U_val alternative p_val RBC CLES MWU 97.0 two-sided 0.004681 -0.515 0.2425 @@ -408,7 +409,7 @@ def wilcoxon(x, y=None, alternative="two-sided", **kwargs): >>> import pingouin as pg >>> x = np.array([20, 22, 19, 20, 22, 18, 24, 20, 19, 24, 26, 13]) >>> y = np.array([38, 37, 33, 29, 14, 12, 20, 22, 17, 25, 26, 16]) - >>> pg.wilcoxon(x, y, alternative='two-sided') + >>> pg.wilcoxon(x, y, alternative="two-sided") W_val alternative p_val RBC CLES Wilcoxon 20.5 two-sided 0.288086 -0.378788 0.395833 @@ -428,17 +429,17 @@ def wilcoxon(x, y=None, alternative="two-sided", **kwargs): The p-value is not exactly similar to Pingouin. This is because Pingouin automatically applies a continuity correction. Disabling it gives the same p-value as scipy: - >>> pg.wilcoxon(x, y, alternative='two-sided', correction=False) + >>> pg.wilcoxon(x, y, alternative="two-sided", correction=False) W_val alternative p_val RBC CLES Wilcoxon 20.5 two-sided 0.288086 -0.378788 0.395833 One-sided test - >>> pg.wilcoxon(x, y, alternative='greater') + >>> pg.wilcoxon(x, y, alternative="greater") W_val alternative p_val RBC CLES Wilcoxon 20.5 greater 0.865723 -0.378788 0.395833 - >>> pg.wilcoxon(x, y, alternative='less') + >>> pg.wilcoxon(x, y, alternative="less") W_val alternative p_val RBC CLES Wilcoxon 20.5 less 0.144043 0.378788 0.604167 """ @@ -540,8 +541,8 @@ def kruskal(data=None, dv=None, between=None, detailed=False): Compute the Kruskal-Wallis H-test for independent samples. >>> from pingouin import kruskal, read_dataset - >>> df = read_dataset('anova') - >>> kruskal(data=df, dv='Pain threshold', between='Hair color') + >>> df = read_dataset("anova") + >>> kruskal(data=df, dv="Pain threshold", between="Hair color") Source ddof1 H p_unc Kruskal Hair color 3 10.58863 0.014172 """ @@ -658,10 +659,26 @@ def friedman(data=None, dv=None, within=None, subject=None, method="chisq"): >>> import pandas as pd >>> import pingouin as pg - >>> df = pd.DataFrame({ - ... 'white': {0: 10, 1: 8, 2: 7, 3: 9, 4: 7, 5: 4, 6: 5, 7: 6, 8: 5, 9: 10, 10: 4, 11: 7}, - ... 'red': {0: 7, 1: 5, 2: 8, 3: 6, 4: 5, 5: 7, 6: 9, 7: 6, 8: 4, 9: 6, 10: 7, 11: 3}, - ... 'rose': {0: 8, 1: 5, 2: 6, 3: 4, 4: 7, 5: 5, 6: 3, 7: 7, 8: 6, 9: 4, 10: 4, 11: 3}}) + >>> df = pd.DataFrame( + ... { + ... "white": { + ... 0: 10, + ... 1: 8, + ... 2: 7, + ... 3: 9, + ... 4: 7, + ... 5: 4, + ... 6: 5, + ... 7: 6, + ... 8: 5, + ... 9: 10, + ... 10: 4, + ... 11: 7, + ... }, + ... "red": {0: 7, 1: 5, 2: 8, 3: 6, 4: 5, 5: 7, 6: 9, 7: 6, 8: 4, 9: 6, 10: 7, 11: 3}, + ... "rose": {0: 8, 1: 5, 2: 6, 3: 4, 4: 7, 5: 5, 6: 3, 7: 7, 8: 6, 9: 4, 10: 4, 11: 3}, + ... } + ... ) >>> pg.friedman(df) Source W ddof1 Q p_unc Friedman Within 0.083333 2 2.0 0.367879 @@ -808,8 +825,8 @@ def cochran(data=None, dv=None, within=None, subject=None): Compute the Cochran Q test for repeated measurements. >>> from pingouin import cochran, read_dataset - >>> df = read_dataset('cochran') - >>> cochran(data=df, dv='Energetic', within='Time', subject='Subject') + >>> df = read_dataset("cochran") + >>> cochran(data=df, dv="Energetic", within="Time", subject="Subject") Source dof Q p_unc cochran Time 2 6.705882 0.034981 diff --git a/src/pingouin/pairwise.py b/src/pingouin/pairwise.py index 6a32f83f..e8fc4c1d 100644 --- a/src/pingouin/pairwise.py +++ b/src/pingouin/pairwise.py @@ -1,16 +1,18 @@ # Author: Raphael Vallat # Date: April 2018 +import warnings +from itertools import combinations, product + import numpy as np import pandas as pd import pandas_flavor as pf -from itertools import combinations, product +from scipy.stats import studentized_range + from pingouin.config import options -from pingouin.parametric import anova -from pingouin.multicomp import multicomp from pingouin.effsize import compute_effsize +from pingouin.multicomp import multicomp +from pingouin.parametric import anova from pingouin.utils import _check_dataframe, _flatten_list, _postprocess_dataframe -from scipy.stats import studentized_range -import warnings __all__ = [ "pairwise_ttests", @@ -203,16 +205,16 @@ def pairwise_tests( >>> import pandas as pd >>> import pingouin as pg - >>> pd.set_option('display.expand_frame_repr', False) - >>> pd.set_option('display.max_columns', 20) - >>> df = pg.read_dataset('mixed_anova.csv') - >>> pg.pairwise_tests(dv='Scores', between='Group', data=df).round(3) + >>> pd.set_option("display.expand_frame_repr", False) + >>> pd.set_option("display.max_columns", 20) + >>> df = pg.read_dataset("mixed_anova.csv") + >>> pg.pairwise_tests(dv="Scores", between="Group", data=df).round(3) Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges 0 Group Control Meditation False True -2.29 178.0 two-sided 0.023 1.813 -0.34 2. One within-subject factor - >>> post_hocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', data=df) + >>> post_hocs = pg.pairwise_tests(dv="Scores", within="Time", subject="Subject", data=df) >>> post_hocs.round(3) Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges 0 Time August January True True -1.740 59.0 two-sided 0.087 0.582 -0.328 @@ -221,8 +223,9 @@ def pairwise_tests( 3. Non-parametric pairwise paired test (wilcoxon) - >>> pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', - ... data=df, parametric=False).round(3) + >>> pg.pairwise_tests( + ... dv="Scores", within="Time", subject="Subject", data=df, parametric=False + ... ).round(3) Contrast A B Paired Parametric W_val alternative p_unc hedges 0 Time August January True False 716.0 two-sided 0.144 -0.328 1 Time August June True False 564.0 two-sided 0.010 -0.483 @@ -230,8 +233,9 @@ def pairwise_tests( 4. Mixed design (within and between) with bonferroni-corrected p-values - >>> posthocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', - ... between='Group', padjust='bonf', data=df) + >>> posthocs = pg.pairwise_tests( + ... dv="Scores", within="Time", subject="Subject", between="Group", padjust="bonf", data=df + ... ) >>> posthocs.round(3) Contrast Time A B Paired Parametric T dof alternative p_unc p_corr p_adjust BF10 hedges 0 Time - August January True True -1.740 59.0 two-sided 0.087 0.261 bonf 0.582 -0.328 @@ -244,7 +248,7 @@ def pairwise_tests( 5. Two between-subject factors. The order of the ``between`` factors matters! - >>> pg.pairwise_tests(dv='Scores', between=['Group', 'Time'], data=df).round(3) + >>> pg.pairwise_tests(dv="Scores", between=["Group", "Time"], data=df).round(3) Contrast Group A B Paired Parametric T dof alternative p_unc BF10 hedges 0 Group - Control Meditation False True -2.290 178.0 two-sided 0.023 1.813 -0.340 1 Time - August January False True -1.806 118.0 two-sided 0.074 0.839 -0.328 @@ -259,16 +263,17 @@ def pairwise_tests( 6. Same but without the interaction, and using a directional test - >>> df.pairwise_tests(dv='Scores', between=['Group', 'Time'], alternative="less", - ... interaction=False).round(3) + >>> df.pairwise_tests( + ... dv="Scores", between=["Group", "Time"], alternative="less", interaction=False + ... ).round(3) Contrast A B Paired Parametric T dof alternative p_unc hedges 0 Group Control Meditation False True -2.290 178.0 less 0.012 -0.340 1 Time August January False True -1.806 118.0 less 0.037 -0.328 2 Time August June False True -2.660 118.0 less 0.004 -0.483 3 Time January June False True -0.934 118.0 less 0.176 -0.170 """ + from .nonparametric import mwu, wilcoxon from .parametric import ttest - from .nonparametric import wilcoxon, mwu # Safety checks data = _check_dataframe( @@ -660,8 +665,8 @@ def ptests( >>> import pandas as pd >>> import pingouin as pg >>> # Load an example dataset of personality dimensions - >>> df = pg.read_dataset('pairwise_corr').iloc[:30, 1:] - >>> df.columns = ["N", "E", "O", 'A', "C"] + >>> df = pg.read_dataset("pairwise_corr").iloc[:30, 1:] + >>> df.columns = ["N", "E", "O", "A", "C"] >>> # Add some missing values >>> df.iloc[[2, 5, 20], 2] = np.nan >>> df.iloc[[1, 4, 10], 3] = np.nan @@ -720,6 +725,7 @@ def ptests( C -4.251 3.595 3.785 3.765 - """ from itertools import combinations + from numpy import format_float_positional as ffp from scipy.stats import ttest_ind, ttest_rel @@ -873,8 +879,8 @@ def pairwise_tukey(data=None, dv=None, between=None, effsize="hedges"): Pairwise Tukey post-hocs on the Penguins dataset. >>> import pingouin as pg - >>> df = pg.read_dataset('penguins') - >>> df.pairwise_tukey(dv='body_mass_g', between='species').round(3) + >>> df = pg.read_dataset("penguins") + >>> df.pairwise_tukey(dv="body_mass_g", between="species").round(3) A B mean(A) mean(B) diff se T p_tukey hedges 0 Adelie Chinstrap 3700.662 3733.088 -32.426 67.512 -0.480 0.881 -0.074 1 Adelie Gentoo 3700.662 5076.016 -1375.354 56.148 -24.495 0.000 -2.860 @@ -1038,9 +1044,8 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize="hedges"): Pairwise Games-Howell post-hocs on the Penguins dataset. >>> import pingouin as pg - >>> df = pg.read_dataset('penguins') - >>> pg.pairwise_gameshowell(data=df, dv='body_mass_g', - ... between='species').round(3) + >>> df = pg.read_dataset("penguins") + >>> pg.pairwise_gameshowell(data=df, dv="body_mass_g", between="species").round(3) A B mean(A) mean(B) diff se T df pval hedges 0 Adelie Chinstrap 3700.662 3733.088 -32.426 59.706 -0.543 152.455 0.85 -0.074 1 Adelie Gentoo 3700.662 5076.016 -1375.354 58.811 -23.386 249.643 0.00 -2.860 @@ -1242,10 +1247,10 @@ def pairwise_corr( >>> import pandas as pd >>> import pingouin as pg - >>> pd.set_option('display.expand_frame_repr', False) - >>> pd.set_option('display.max_columns', 20) - >>> data = pg.read_dataset('pairwise_corr').iloc[:, 1:] - >>> pg.pairwise_corr(data, method='spearman', alternative='greater', padjust='bonf').round(3) + >>> pd.set_option("display.expand_frame_repr", False) + >>> pd.set_option("display.max_columns", 20) + >>> data = pg.read_dataset("pairwise_corr").iloc[:, 1:] + >>> pg.pairwise_corr(data, method="spearman", alternative="greater", padjust="bonf").round(3) X Y method alternative n r CI95 p_unc p_corr p_adjust power 0 Neuroticism Extraversion spearman greater 500 -0.325 [-0.39, 1.0] 1.000 1.000 bonf 0.000 1 Neuroticism Openness spearman greater 500 -0.028 [-0.1, 1.0] 0.735 1.000 bonf 0.012 @@ -1260,8 +1265,9 @@ def pairwise_corr( 2. Robust two-sided biweight midcorrelation with uncorrected p-values - >>> pcor = pg.pairwise_corr(data, columns=['Openness', 'Extraversion', - ... 'Neuroticism'], method='bicor') + >>> pcor = pg.pairwise_corr( + ... data, columns=["Openness", "Extraversion", "Neuroticism"], method="bicor" + ... ) >>> pcor.round(3) X Y method alternative n r CI95 p_unc power 0 Openness Extraversion bicor two-sided 500 0.247 [0.16, 0.33] 0.000 1.000 @@ -1270,7 +1276,7 @@ def pairwise_corr( 3. One-versus-all pairwise correlations - >>> pg.pairwise_corr(data, columns=['Neuroticism']).round(3) + >>> pg.pairwise_corr(data, columns=["Neuroticism"]).round(3) X Y method alternative n r CI95 p_unc BF10 power 0 Neuroticism Extraversion pearson two-sided 500 -0.350 [-0.42, -0.27] 0.000 6.765e+12 1.000 1 Neuroticism Openness pearson two-sided 500 -0.010 [-0.1, 0.08] 0.817 0.058 0.056 @@ -1279,7 +1285,7 @@ def pairwise_corr( 4. Pairwise correlations between two lists of columns (cartesian product) - >>> columns = [['Neuroticism', 'Extraversion'], ['Openness']] + >>> columns = [["Neuroticism", "Extraversion"], ["Openness"]] >>> pg.pairwise_corr(data, columns).round(3) X Y method alternative n r CI95 p_unc BF10 power 0 Neuroticism Openness pearson two-sided 500 -0.010 [-0.1, 0.08] 0.817 0.058 0.056 @@ -1287,11 +1293,11 @@ def pairwise_corr( 5. As a Pandas method - >>> pcor = data.pairwise_corr(covar='Neuroticism', method='spearman') + >>> pcor = data.pairwise_corr(covar="Neuroticism", method="spearman") 6. Pairwise partial correlation - >>> pg.pairwise_corr(data, covar=['Neuroticism', 'Openness']) + >>> pg.pairwise_corr(data, covar=["Neuroticism", "Openness"]) X Y method covar alternative n r CI95 p_unc 0 Extraversion Agreeableness pearson ['Neuroticism', 'Openness'] two-sided 500 -0.038737 [-0.13, 0.05] 0.388361 1 Extraversion Conscientiousness pearson ['Neuroticism', 'Openness'] two-sided 500 -0.071427 [-0.16, 0.02] 0.111389 @@ -1299,7 +1305,7 @@ def pairwise_corr( 7. Pairwise partial correlation matrix using :py:func:`pingouin.pcorr` - >>> data[['Neuroticism', 'Openness', 'Extraversion']].pcorr().round(3) + >>> data[["Neuroticism", "Openness", "Extraversion"]].pcorr().round(3) Neuroticism Openness Extraversion Neuroticism 1.000 0.092 -0.360 Openness 0.092 1.000 0.281 @@ -1307,7 +1313,7 @@ def pairwise_corr( 8. Correlation matrix with p-values using :py:func:`pingouin.rcorr` - >>> data[['Neuroticism', 'Openness', 'Extraversion']].rcorr() + >>> data[["Neuroticism", "Openness", "Extraversion"]].rcorr() Neuroticism Openness Extraversion Neuroticism - *** Openness -0.01 - *** diff --git a/src/pingouin/parametric.py b/src/pingouin/parametric.py index 0329771c..4369b78c 100644 --- a/src/pingouin/parametric.py +++ b/src/pingouin/parametric.py @@ -1,18 +1,20 @@ # Author: Raphael Vallat import warnings from collections.abc import Iterable + import numpy as np import pandas as pd -from scipy.stats import f import pandas_flavor as pf +from scipy.stats import f + from pingouin import ( _check_dataframe, - remove_na, _flatten_list, + _postprocess_dataframe, bayesfactor_ttest, epsilon, + remove_na, sphericity, - _postprocess_dataframe, ) __all__ = ["ttest", "rm_anova", "anova", "welch_anova", "mixed_anova", "ancova"] @@ -150,14 +152,14 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70 2. One sided paired T-test. >>> pre = [5.5, 2.4, 6.8, 9.6, 4.2] - >>> post = [6.4, 3.4, 6.4, 11., 4.8] - >>> ttest(pre, post, paired=True, alternative='less').round(2) + >>> post = [6.4, 3.4, 6.4, 11.0, 4.8] + >>> ttest(pre, post, paired=True, alternative="less").round(2) T dof alternative p_val CI95 cohen_d power T_test -2.31 4 less 0.04 [-inf, -0.05] 0.25 0.12 Now testing the opposite alternative hypothesis - >>> ttest(pre, post, paired=True, alternative='greater').round(2) + >>> ttest(pre, post, paired=True, alternative="greater").round(2) T dof alternative p_val CI95 cohen_d power T_test -2.31 4 greater 0.96 [-1.35, inf] 0.25 0.02 @@ -165,7 +167,7 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70 >>> import numpy as np >>> pre = [5.5, 2.4, np.nan, 9.6, 4.2] - >>> post = [6.4, 3.4, 6.4, 11., 4.8] + >>> post = [6.4, 3.4, 6.4, 11.0, 4.8] >>> ttest(pre, post, paired=True).round(3) T dof alternative p_val CI95 cohen_d BF10 power T_test -5.902 3 two-sided 0.01 [-1.5, -0.45] 0.306 7.169 0.073 @@ -205,14 +207,14 @@ def ttest(x, y, paired=False, alternative="two-sided", correction="auto", r=0.70 >>> np.round(ttest_ind(x, y, equal_var=True), 6) # T value and p-value array([1.971859, 0.057056]) """ - from scipy.stats import t, ttest_rel, ttest_ind, ttest_1samp + from scipy.stats import t, ttest_1samp, ttest_ind, ttest_rel try: # pragma: no cover - from scipy.stats._stats_py import _unequal_var_ttest_denom, _equal_var_ttest_denom + from scipy.stats._stats_py import _equal_var_ttest_denom, _unequal_var_ttest_denom except ImportError: # pragma: no cover # Fallback for scipy<1.8.0 - from scipy.stats.stats import _unequal_var_ttest_denom, _equal_var_ttest_denom - from pingouin import power_ttest, power_ttest2n, compute_effsize + from scipy.stats.stats import _equal_var_ttest_denom, _unequal_var_ttest_denom + from pingouin import compute_effsize, power_ttest, power_ttest2n # Check arguments assert alternative in [ @@ -474,7 +476,7 @@ def rm_anova( 1. One-way repeated measures ANOVA using a wide-format dataset >>> import pingouin as pg - >>> data = pg.read_dataset('rm_anova_wide') + >>> data = pg.read_dataset("rm_anova_wide") >>> pg.rm_anova(data) Source ddof1 ddof2 F p_unc ng2 eps 0 Within 3 24 5.200652 0.006557 0.346392 0.694329 @@ -486,9 +488,15 @@ def rm_anova( means that we want to get the partial eta-squared effect size instead of the default (generalized) eta-squared. - >>> df = pg.read_dataset('rm_anova') - >>> aov = pg.rm_anova(dv='DesireToKill', within='Disgustingness', - ... subject='Subject', data=df, detailed=True, effsize="np2") + >>> df = pg.read_dataset("rm_anova") + >>> aov = pg.rm_anova( + ... dv="DesireToKill", + ... within="Disgustingness", + ... subject="Subject", + ... data=df, + ... detailed=True, + ... effsize="np2", + ... ) >>> aov.round(3) Source SS DF MS F p_unc np2 eps 0 Disgustingness 27.485 1 27.485 12.044 0.001 0.116 1.0 @@ -496,12 +504,16 @@ def rm_anova( 3. Two-way repeated-measures ANOVA - >>> aov = pg.rm_anova(dv='DesireToKill', within=['Disgustingness', 'Frighteningness'], - ... subject='Subject', data=df) + >>> aov = pg.rm_anova( + ... dv="DesireToKill", + ... within=["Disgustingness", "Frighteningness"], + ... subject="Subject", + ... data=df, + ... ) 4. As a :py:class:`pandas.DataFrame` method - >>> df.rm_anova(dv='DesireToKill', within='Disgustingness', subject='Subject', detailed=False) + >>> df.rm_anova(dv="DesireToKill", within="Disgustingness", subject="Subject", detailed=False) Source ddof1 ddof2 F p_unc ng2 eps 0 Disgustingness 1 92 12.043878 0.000793 0.025784 1.0 """ @@ -908,9 +920,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize=" One-way ANOVA >>> import pingouin as pg - >>> df = pg.read_dataset('anova') - >>> aov = pg.anova(dv='Pain threshold', between='Hair color', data=df, - ... detailed=True) + >>> df = pg.read_dataset("anova") + >>> aov = pg.anova(dv="Pain threshold", between="Hair color", data=df, detailed=True) >>> aov.round(3) Source SS DF MS F p_unc np2 0 Hair color 1360.726 3 453.575 6.791 0.004 0.576 @@ -921,14 +932,13 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize=" a method (= built-in function) of our pandas dataframe. In that case, we don't have to specify ``data`` anymore. - >>> df.anova(dv='Pain threshold', between='Hair color', detailed=False, - ... effsize='n2') + >>> df.anova(dv="Pain threshold", between="Hair color", detailed=False, effsize="n2") Source ddof1 ddof2 F p_unc n2 0 Hair color 3 15 6.791407 0.004114 0.575962 Two-way ANOVA with balanced design - >>> data = pg.read_dataset('anova2') + >>> data = pg.read_dataset("anova2") >>> data.anova(dv="Yield", between=["Blend", "Crop"]).round(3) Source SS DF MS F p_unc np2 0 Blend 2.042 1 2.042 0.004 0.952 0.000 @@ -938,9 +948,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize=" Two-way ANOVA with unbalanced design (requires statsmodels) - >>> data = pg.read_dataset('anova2_unbalanced') - >>> data.anova(dv="Scores", between=["Diet", "Exercise"], - ... effsize="n2").round(3) + >>> data = pg.read_dataset("anova2_unbalanced") + >>> data.anova(dv="Scores", between=["Diet", "Exercise"], effsize="n2").round(3) Source SS DF MS F p_unc n2 0 Diet 390.625 1.0 390.625 7.423 0.034 0.433 1 Exercise 180.625 1.0 180.625 3.432 0.113 0.200 @@ -949,9 +958,8 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize=" Three-way ANOVA, type 3 sums of squares (requires statsmodels) - >>> data = pg.read_dataset('anova3') - >>> data.anova(dv='Cholesterol', between=['Sex', 'Risk', 'Drug'], - ... ss_type=3).round(3) + >>> data = pg.read_dataset("anova3") + >>> data.anova(dv="Cholesterol", between=["Sex", "Risk", "Drug"], ss_type=3).round(3) Source SS DF MS F p_unc np2 0 Sex 2.075 1.0 2.075 2.462 0.123 0.049 1 Risk 11.332 1.0 11.332 13.449 0.001 0.219 @@ -1322,8 +1330,8 @@ def welch_anova(data=None, dv=None, between=None): 1. One-way Welch ANOVA on the pain threshold dataset. >>> from pingouin import welch_anova, read_dataset - >>> df = read_dataset('anova') - >>> aov = welch_anova(dv='Pain threshold', between='Hair color', data=df) + >>> df = read_dataset("anova") + >>> aov = welch_anova(dv="Pain threshold", between="Hair color", data=df) >>> aov Source ddof1 ddof2 F p_unc np2 0 Hair color 3 8.329841 5.890115 0.018813 0.575962 @@ -1446,9 +1454,8 @@ def mixed_anova( Compute a two-way mixed model ANOVA. >>> from pingouin import mixed_anova, read_dataset - >>> df = read_dataset('mixed_anova') - >>> aov = mixed_anova(dv='Scores', between='Group', - ... within='Time', subject='Subject', data=df) + >>> df = read_dataset("mixed_anova") + >>> aov = mixed_anova(dv="Scores", between="Group", within="Time", subject="Subject", data=df) >>> aov.round(3) Source SS DF1 DF2 MS F p_unc np2 eps 0 Group 5.460 1 58 5.460 5.052 0.028 0.080 NaN @@ -1459,8 +1466,9 @@ def mixed_anova( can also apply this function directly as a method of the dataframe, in which case we do not need to specify ``data=df`` anymore. - >>> df.mixed_anova(dv='Scores', between='Group', within='Time', - ... subject='Subject', effsize="ng2").round(3) + >>> df.mixed_anova( + ... dv="Scores", between="Group", within="Time", subject="Subject", effsize="ng2" + ... ).round(3) Source SS DF1 DF2 MS F p_unc ng2 eps 0 Group 5.460 1 58 5.460 5.052 0.028 0.031 NaN 1 Time 7.628 2 116 3.814 4.027 0.020 0.042 0.999 @@ -1664,8 +1672,8 @@ def ancova(data=None, dv=None, between=None, covar=None, effsize="np2"): and family income as a covariate. >>> from pingouin import ancova, read_dataset - >>> df = read_dataset('ancova') - >>> ancova(data=df, dv='Scores', covar='Income', between='Method') + >>> df = read_dataset("ancova") + >>> ancova(data=df, dv="Scores", covar="Income", between="Method") Source SS DF F p_unc np2 0 Method 571.029883 3 3.336482 0.031940 0.244077 1 Income 1678.352687 1 29.419438 0.000006 0.486920 @@ -1674,8 +1682,7 @@ def ancova(data=None, dv=None, between=None, covar=None, effsize="np2"): 2. Evaluate the reading scores of students with different teaching method and family income + BMI as a covariate. - >>> ancova(data=df, dv='Scores', covar=['Income', 'BMI'], between='Method', - ... effsize="n2") + >>> ancova(data=df, dv="Scores", covar=["Income", "BMI"], between="Method", effsize="n2") Source SS DF F p_unc n2 0 Method 552.284043 3 3.232550 0.036113 0.141802 1 Income 1573.952434 1 27.637304 0.000011 0.404121 diff --git a/src/pingouin/plotting.py b/src/pingouin/plotting.py index 910d8763..f136d6d1 100644 --- a/src/pingouin/plotting.py +++ b/src/pingouin/plotting.py @@ -5,12 +5,12 @@ - Nicolas Legrand """ +import matplotlib.pyplot as plt +import matplotlib.transforms as transforms import numpy as np import pandas as pd import seaborn as sns from scipy import stats -import matplotlib.pyplot as plt -import matplotlib.transforms as transforms # Set default Seaborn preferences (disabled Pingouin >= 0.3.4) # See https://github.com/raphaelvallat/pingouin/issues/85 @@ -109,7 +109,7 @@ def plot_blandaltman( >>> import pingouin as pg >>> df = pg.read_dataset("blandaltman") - >>> ax = pg.plot_blandaltman(df['A'], df['B']) + >>> ax = pg.plot_blandaltman(df["A"], df["B"]) >>> plt.tight_layout() """ # Safety check @@ -298,7 +298,7 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, ** >>> import pingouin as pg >>> np.random.seed(123) >>> x = np.random.normal(size=50) - >>> ax = pg.qqplot(x, dist='norm') + >>> ax = pg.qqplot(x, dist="norm") Two Q-Q plots using two separate axes: @@ -311,8 +311,8 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, ** >>> x = np.random.normal(size=50) >>> x_exp = np.random.exponential(size=50) >>> fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4)) - >>> ax1 = pg.qqplot(x, dist='norm', ax=ax1, confidence=False) - >>> ax2 = pg.qqplot(x_exp, dist='expon', ax=ax2) + >>> ax1 = pg.qqplot(x, dist="norm", ax=ax1, confidence=False) + >>> ax2 = pg.qqplot(x_exp, dist="expon", ax=ax2) Using custom location / scale parameters as well as another Seaborn style @@ -325,8 +325,8 @@ def qqplot(x, dist="norm", sparams=(), confidence=0.95, square=True, ax=None, ** >>> np.random.seed(123) >>> x = np.random.normal(size=50) >>> mean, std = 0, 0.8 - >>> sns.set_style('darkgrid') - >>> ax = pg.qqplot(x, dist='norm', sparams=(mean, std)) + >>> sns.set_style("darkgrid") + >>> ax = pg.qqplot(x, dist="norm", sparams=(mean, std)) """ # Update default kwargs with specified inputs _scatter_kwargs = {"marker": "o", "color": "blue"} @@ -482,9 +482,9 @@ def plot_paired( .. plot:: >>> import pingouin as pg - >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'") + >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'") >>> df = df.query("Group == 'Meditation' and Subject > 40") - >>> ax = pg.plot_paired(data=df, dv='Scores', within='Time', subject='Subject') + >>> ax = pg.plot_paired(data=df, dv="Scores", within="Time", subject="Subject") Paired plot on an existing axis (no boxplot and uniform color): @@ -492,12 +492,18 @@ def plot_paired( >>> import pingouin as pg >>> import matplotlib.pyplot as plt - >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'") + >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'") >>> df = df.query("Group == 'Meditation' and Subject > 40") >>> fig, ax1 = plt.subplots(1, 1, figsize=(5, 4)) - >>> pg.plot_paired(data=df, dv='Scores', within='Time', - ... subject='Subject', ax=ax1, boxplot=False, - ... colors=['grey', 'grey', 'grey']) # doctest: +SKIP + >>> pg.plot_paired( + ... data=df, + ... dv="Scores", + ... within="Time", + ... subject="Subject", + ... ax=ax1, + ... boxplot=False, + ... colors=["grey", "grey", "grey"], + ... ) # doctest: +SKIP Horizontal paired plot with three unique within-levels: @@ -505,20 +511,22 @@ def plot_paired( >>> import pingouin as pg >>> import matplotlib.pyplot as plt - >>> df = pg.read_dataset('mixed_anova').query("Group == 'Meditation'") + >>> df = pg.read_dataset("mixed_anova").query("Group == 'Meditation'") >>> # df = df.query("Group == 'Meditation' and Subject > 40") - >>> pg.plot_paired(data=df, dv='Scores', within='Time', - ... subject='Subject', orient='h') # doctest: +SKIP + >>> pg.plot_paired( + ... data=df, dv="Scores", within="Time", subject="Subject", orient="h" + ... ) # doctest: +SKIP With the boxplot on the foreground: .. plot:: >>> import pingouin as pg - >>> df = pg.read_dataset('mixed_anova').query("Time != 'January'") + >>> df = pg.read_dataset("mixed_anova").query("Time != 'January'") >>> df = df.query("Group == 'Control'") - >>> ax = pg.plot_paired(data=df, dv='Scores', within='Time', - ... subject='Subject', boxplot_in_front=True) + >>> ax = pg.plot_paired( + ... data=df, dv="Scores", within="Time", subject="Subject", boxplot_in_front=True + ... ) """ from pingouin.utils import _check_dataframe @@ -741,17 +749,24 @@ def plot_shift( >>> import pingouin as pg >>> import matplotlib.pyplot as plt >>> data = pg.read_dataset("pairwise_corr") - >>> fig = pg.plot_shift(data["Neuroticism"], data["Conscientiousness"], paired=True, - ... n_boot=2000, percentiles=[25, 50, 75], show_median=False, seed=456, - ... violin=False) + >>> fig = pg.plot_shift( + ... data["Neuroticism"], + ... data["Conscientiousness"], + ... paired=True, + ... n_boot=2000, + ... percentiles=[25, 50, 75], + ... show_median=False, + ... seed=456, + ... violin=False, + ... ) >>> fig.axes[0].set_xlabel("Groups") >>> fig.axes[0].set_ylabel("Values", size=15) >>> fig.axes[0].set_title("Comparing Neuroticism and Conscientiousness", size=15) >>> fig.axes[1].set_xlabel("Neuroticism quantiles", size=12) >>> plt.tight_layout() """ - from pingouin.regression import _bias_corrected_ci from pingouin.nonparametric import harrelldavis as hd + from pingouin.regression import _bias_corrected_ci # Safety check x = np.asarray(x) @@ -831,7 +846,11 @@ def adjacent_values(vals, q1, q3): ) if violin: - vl = plt.violinplot([y, x], showextrema=False, orientation="horizontal", widths=1) + import matplotlib as _mpl + + _mpl_ver = tuple(int(v) for v in _mpl.__version__.split(".")[:2]) + _orient_kw = {"orientation": "horizontal"} if _mpl_ver >= (3, 10) else {"vert": False} + vl = plt.violinplot([y, x], showextrema=False, widths=1, **_orient_kw) # Upper plot paths = vl["bodies"][0].get_paths()[0] @@ -963,8 +982,8 @@ def plot_rm_corr( .. plot:: >>> import pingouin as pg - >>> df = pg.read_dataset('rm_corr') - >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject') + >>> df = pg.read_dataset("rm_corr") + >>> g = pg.plot_rm_corr(data=df, x="pH", y="PacO2", subject="Subject") With some tweakings @@ -972,12 +991,16 @@ def plot_rm_corr( >>> import pingouin as pg >>> import seaborn as sns - >>> df = pg.read_dataset('rm_corr') - >>> sns.set_theme(style='darkgrid', font_scale=1.2) - >>> g = pg.plot_rm_corr(data=df, x='pH', y='PacO2', - ... subject='Subject', legend=True, - ... kwargs_facetgrid=dict(height=4.5, aspect=1.5, - ... palette='Spectral')) + >>> df = pg.read_dataset("rm_corr") + >>> sns.set_theme(style="darkgrid", font_scale=1.2) + >>> g = pg.plot_rm_corr( + ... data=df, + ... x="pH", + ... y="PacO2", + ... subject="Subject", + ... legend=True, + ... kwargs_facetgrid=dict(height=4.5, aspect=1.5, palette="Spectral"), + ... ) """ # Check that stasmodels is installed from pingouin.utils import _is_statsmodels_installed @@ -1080,20 +1103,25 @@ def plot_circmean( >>> import pingouin as pg >>> import matplotlib.pyplot as plt >>> _, ax = plt.subplots(1, 1, figsize=(3, 3)) - >>> ax = pg.plot_circmean([0.05, -0.8, 1.2, 0.8, 0.5, -0.3, 0.3, 0.7], - ... kwargs_markers=dict(color='k', mfc='k'), - ... kwargs_arrow=dict(ec='k', fc='k'), ax=ax) + >>> ax = pg.plot_circmean( + ... [0.05, -0.8, 1.2, 0.8, 0.5, -0.3, 0.3, 0.7], + ... kwargs_markers=dict(color="k", mfc="k"), + ... kwargs_arrow=dict(ec="k", fc="k"), + ... ax=ax, + ... ) .. plot:: >>> import pingouin as pg >>> import seaborn as sns - >>> sns.set_theme(font_scale=1.5, style='white') - >>> ax = pg.plot_circmean([0.8, 1.5, 3.14, 5.2, 6.1, 2.8, 2.6, 3.2], - ... kwargs_markers=dict(marker="None")) + >>> sns.set_theme(font_scale=1.5, style="white") + >>> ax = pg.plot_circmean( + ... [0.8, 1.5, 3.14, 5.2, 6.1, 2.8, 2.6, 3.2], kwargs_markers=dict(marker="None") + ... ) """ from matplotlib.patches import Circle - from .circular import circ_r, circ_mean + + from .circular import circ_mean, circ_r # Sanity checks angles = np.asarray(angles) diff --git a/src/pingouin/power.py b/src/pingouin/power.py index 2a74a2be..bc4e4fc1 100644 --- a/src/pingouin/power.py +++ b/src/pingouin/power.py @@ -1,6 +1,7 @@ # Author: Raphael Vallat # Date: April 2018 import warnings + import numpy as np from scipy import stats from scipy.optimize import brenth @@ -95,31 +96,31 @@ def power_ttest( 1. Compute power of a one-sample T-test given ``d``, ``n`` and ``alpha`` >>> from pingouin import power_ttest - >>> print('power: %.4f' % power_ttest(d=0.5, n=20, contrast='one-sample')) + >>> print("power: %.4f" % power_ttest(d=0.5, n=20, contrast="one-sample")) power: 0.5645 2. Compute required sample size given ``d``, ``power`` and ``alpha`` - >>> print('n: %.4f' % power_ttest(d=0.5, power=0.80, alternative='greater')) + >>> print("n: %.4f" % power_ttest(d=0.5, power=0.80, alternative="greater")) n: 50.1508 3. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level - >>> print('d: %.4f' % power_ttest(n=20, power=0.80, alpha=0.05, contrast='paired')) + >>> print("d: %.4f" % power_ttest(n=20, power=0.80, alpha=0.05, contrast="paired")) d: 0.6604 4. Compute achieved alpha level given ``d``, ``n`` and ``power`` - >>> print('alpha: %.4f' % power_ttest(d=0.5, n=20, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_ttest(d=0.5, n=20, power=0.80, alpha=None)) alpha: 0.4430 5. One-sided tests >>> from pingouin import power_ttest - >>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='greater')) + >>> print("power: %.4f" % power_ttest(d=0.5, n=20, alternative="greater")) power: 0.4634 - >>> print('power: %.4f' % power_ttest(d=0.5, n=20, alternative='less')) + >>> print("power: %.4f" % power_ttest(d=0.5, n=20, alternative="less")) power: 0.0007 """ # Check the number of arguments that are None @@ -278,17 +279,17 @@ def power_ttest2n(nx, ny, d=None, power=None, alpha=0.05, alternative="two-sided 1. Compute achieved power of a T-test given ``d``, ``n`` and ``alpha`` >>> from pingouin import power_ttest2n - >>> print('power: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, alternative='greater')) + >>> print("power: %.4f" % power_ttest2n(nx=20, ny=15, d=0.5, alternative="greater")) power: 0.4164 2. Compute achieved ``d`` given ``n``, ``power`` and ``alpha`` level - >>> print('d: %.4f' % power_ttest2n(nx=20, ny=15, power=0.80, alpha=0.05)) + >>> print("d: %.4f" % power_ttest2n(nx=20, ny=15, power=0.80, alpha=0.05)) d: 0.9859 3. Compute achieved alpha level given ``d``, ``n`` and ``power`` - >>> print('alpha: %.4f' % power_ttest2n(nx=20, ny=15, d=0.5, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_ttest2n(nx=20, ny=15, d=0.5, power=0.80, alpha=None)) alpha: 0.5000 """ # Check the number of arguments that are None @@ -444,27 +445,27 @@ def power_anova(eta_squared=None, k=None, n=None, power=None, alpha=0.05): 1. Compute achieved power >>> from pingouin import power_anova - >>> print('power: %.4f' % power_anova(eta_squared=0.1, k=3, n=20)) + >>> print("power: %.4f" % power_anova(eta_squared=0.1, k=3, n=20)) power: 0.6082 2. Compute required number of groups - >>> print('k: %.4f' % power_anova(eta_squared=0.1, n=20, power=0.80)) + >>> print("k: %.4f" % power_anova(eta_squared=0.1, n=20, power=0.80)) k: 6.0944 3. Compute required sample size - >>> print('n: %.4f' % power_anova(eta_squared=0.1, k=3, power=0.80)) + >>> print("n: %.4f" % power_anova(eta_squared=0.1, k=3, power=0.80)) n: 29.9256 4. Compute achieved effect size - >>> print('eta-squared: %.4f' % power_anova(n=20, k=4, power=0.80, alpha=0.05)) + >>> print("eta-squared: %.4f" % power_anova(n=20, k=4, power=0.80, alpha=0.05)) eta-squared: 0.1255 5. Compute achieved alpha (significance) - >>> print('alpha: %.4f' % power_anova(eta_squared=0.1, n=20, k=4, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_anova(eta_squared=0.1, n=20, k=4, power=0.80, alpha=None)) alpha: 0.1085 """ # Check the number of arguments that are None @@ -613,27 +614,27 @@ def power_rm_anova(eta_squared=None, m=None, n=None, power=None, alpha=0.05, cor 1. Compute achieved power >>> from pingouin import power_rm_anova - >>> print('power: %.4f' % power_rm_anova(eta_squared=0.1, m=3, n=20)) + >>> print("power: %.4f" % power_rm_anova(eta_squared=0.1, m=3, n=20)) power: 0.8913 2. Compute required number of groups - >>> print('m: %.4f' % power_rm_anova(eta_squared=0.1, n=20, power=0.90)) + >>> print("m: %.4f" % power_rm_anova(eta_squared=0.1, n=20, power=0.90)) m: 3.1347 3. Compute required sample size - >>> print('n: %.4f' % power_rm_anova(eta_squared=0.1, m=3, power=0.80)) + >>> print("n: %.4f" % power_rm_anova(eta_squared=0.1, m=3, power=0.80)) n: 15.9979 4. Compute achieved effect size - >>> print('eta-squared: %.4f' % power_rm_anova(n=20, m=4, power=0.80, alpha=0.05)) + >>> print("eta-squared: %.4f" % power_rm_anova(n=20, m=4, power=0.80, alpha=0.05)) eta-squared: 0.0680 5. Compute achieved alpha (significance) - >>> print('alpha: %.4f' % power_rm_anova(eta_squared=0.1, n=20, m=4, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_rm_anova(eta_squared=0.1, n=20, m=4, power=0.80, alpha=None)) alpha: 0.0081 Let's take a more concrete example. First, we'll load a repeated measures @@ -641,7 +642,7 @@ def power_rm_anova(eta_squared=None, m=None, n=None, power=None, alpha=0.05, cor each column a successive repeated measurements (e.g t=0, t=1, ...). >>> import pingouin as pg - >>> data = pg.read_dataset('rm_anova_wide') + >>> data = pg.read_dataset("rm_anova_wide") >>> data.head() Before 1 week 2 week 3 week 0 4.3 5.3 4.8 6.3 @@ -810,30 +811,30 @@ def power_corr(r=None, n=None, power=None, alpha=0.05, alternative="two-sided"): 1. Compute achieved power given ``r``, ``n`` and ``alpha`` >>> from pingouin import power_corr - >>> print('power: %.4f' % power_corr(r=0.5, n=20)) + >>> print("power: %.4f" % power_corr(r=0.5, n=20)) power: 0.6379 2. Same but one-sided test - >>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="greater")) + >>> print("power: %.4f" % power_corr(r=0.5, n=20, alternative="greater")) power: 0.7510 - >>> print('power: %.4f' % power_corr(r=0.5, n=20, alternative="less")) + >>> print("power: %.4f" % power_corr(r=0.5, n=20, alternative="less")) power: 0.0000 3. Compute required sample size given ``r``, ``power`` and ``alpha`` - >>> print('n: %.4f' % power_corr(r=0.5, power=0.80)) + >>> print("n: %.4f" % power_corr(r=0.5, power=0.80)) n: 28.2484 4. Compute achieved ``r`` given ``n``, ``power`` and ``alpha`` level - >>> print('r: %.4f' % power_corr(n=20, power=0.80, alpha=0.05)) + >>> print("r: %.4f" % power_corr(n=20, power=0.80, alpha=0.05)) r: 0.5822 5. Compute achieved alpha level given ``r``, ``n`` and ``power`` - >>> print('alpha: %.4f' % power_corr(r=0.5, n=20, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_corr(r=0.5, n=20, power=0.80, alpha=None)) alpha: 0.1377 """ # Check the number of arguments that are None @@ -1001,22 +1002,22 @@ def power_chi2(dof, w=None, n=None, power=None, alpha=0.05): 1. Compute achieved power >>> from pingouin import power_chi2 - >>> print('power: %.4f' % power_chi2(dof=1, w=0.3, n=20)) + >>> print("power: %.4f" % power_chi2(dof=1, w=0.3, n=20)) power: 0.2687 2. Compute required sample size - >>> print('n: %.4f' % power_chi2(dof=3, w=0.3, power=0.80)) + >>> print("n: %.4f" % power_chi2(dof=3, w=0.3, power=0.80)) n: 121.1396 3. Compute achieved effect size - >>> print('w: %.4f' % power_chi2(dof=2, n=20, power=0.80, alpha=0.05)) + >>> print("w: %.4f" % power_chi2(dof=2, n=20, power=0.80, alpha=0.05)) w: 0.6941 4. Compute achieved alpha (significance) - >>> print('alpha: %.4f' % power_chi2(dof=1, w=0.5, n=20, power=0.80, alpha=None)) + >>> print("alpha: %.4f" % power_chi2(dof=1, w=0.5, n=20, power=0.80, alpha=None)) alpha: 0.1630 """ assert isinstance(dof, (int, float)) diff --git a/src/pingouin/regression.py b/src/pingouin/regression.py index 3b536f6b..23deef66 100644 --- a/src/pingouin/regression.py +++ b/src/pingouin/regression.py @@ -1,15 +1,16 @@ import itertools import warnings + import numpy as np import pandas as pd import pandas_flavor as pf -from scipy.stats import t, norm -from scipy.linalg import pinvh, lstsq +from scipy.linalg import lstsq, pinvh +from scipy.stats import norm, t from pingouin.config import options -from pingouin.utils import remove_na as rm_na from pingouin.utils import _flatten_list as _fl from pingouin.utils import _postprocess_dataframe +from pingouin.utils import remove_na as rm_na __all__ = ["linear_regression", "logistic_regression", "mediation_analysis"] @@ -695,13 +696,12 @@ def logistic_regression( >>> import numpy as np >>> import pandas as pd >>> import pingouin as pg - >>> df = pg.read_dataset('penguins') + >>> df = pg.read_dataset("penguins") >>> # Let's first convert the target variable from string to boolean: - >>> df['male'] = (df['sex'] == 'male').astype(int) # male: 1, female: 0 + >>> df["male"] = (df["sex"] == "male").astype(int) # male: 1, female: 0 >>> # Since there are missing values in our outcome variable, we need to >>> # set `remove_na=True` otherwise regression will fail. - >>> lom = pg.logistic_regression(df['body_mass_g'], df['male'], - ... remove_na=True) + >>> lom = pg.logistic_regression(df["body_mass_g"], df["male"], remove_na=True) >>> lom.round(2) names coef se z pval CI2.5 CI97.5 0 Intercept -5.16 0.71 -7.24 0.0 -6.56 -3.77 @@ -712,9 +712,8 @@ def logistic_regression( (e.g divide by 1000) in order to get more intuitive coefficients and confidence intervals: - >>> df['body_mass_kg'] = df['body_mass_g'] / 1000 - >>> lom = pg.logistic_regression(df['body_mass_kg'], df['male'], - ... remove_na=True) + >>> df["body_mass_kg"] = df["body_mass_g"] / 1000 + >>> lom = pg.logistic_regression(df["body_mass_kg"], df["male"], remove_na=True) >>> lom.round(2) names coef se z pval CI2.5 CI97.5 0 Intercept -5.16 0.71 -7.24 0.0 -6.56 -3.77 @@ -727,9 +726,9 @@ def logistic_regression( first level of our categorical variable (species = Adelie) which will be used as the reference level: - >>> df = pd.get_dummies(df, columns=['species'], dtype=float, drop_first=True) - >>> X = df[['body_mass_kg', 'species_Chinstrap', 'species_Gentoo']] - >>> y = df['male'] + >>> df = pd.get_dummies(df, columns=["species"], dtype=float, drop_first=True) + >>> X = df[["body_mass_kg", "species_Chinstrap", "species_Gentoo"]] + >>> y = df["male"] >>> lom = pg.logistic_regression(X, y, remove_na=True) >>> lom.round(2) names coef se z pval CI2.5 CI97.5 @@ -740,15 +739,15 @@ def logistic_regression( 3. Using NumPy aray and returning only the coefficients - >>> pg.logistic_regression(X.to_numpy(), y.to_numpy(), coef_only=True, - ... remove_na=True) + >>> pg.logistic_regression(X.to_numpy(), y.to_numpy(), coef_only=True, remove_na=True) array([-26.23906892, 7.09826571, -0.13180626, -9.71718529]) 4. Passing custom parameters to sklearn - >>> lom = pg.logistic_regression(X, y, solver='sag', max_iter=10000, - ... random_state=42, remove_na=True) - >>> print(lom['coef'].to_numpy()) + >>> lom = pg.logistic_regression( + ... X, y, solver="sag", max_iter=10000, random_state=42, remove_na=True + ... ) + >>> print(lom["coef"].to_numpy()) [-25.98248153 7.02881472 -0.13119779 -9.62247569] **How to interpret the log-odds coefficients?** @@ -763,12 +762,32 @@ def logistic_regression( probability of the student passing the exam?* >>> # First, let's create the dataframe - >>> Hours = [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, - ... 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50] + >>> Hours = [ + ... 0.50, + ... 0.75, + ... 1.00, + ... 1.25, + ... 1.50, + ... 1.75, + ... 1.75, + ... 2.00, + ... 2.25, + ... 2.50, + ... 2.75, + ... 3.00, + ... 3.25, + ... 3.50, + ... 4.00, + ... 4.25, + ... 4.50, + ... 4.75, + ... 5.00, + ... 5.50, + ... ] >>> Pass = [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1] - >>> df = pd.DataFrame({'HoursStudy': Hours, 'PassExam': Pass}) + >>> df = pd.DataFrame({"HoursStudy": Hours, "PassExam": Pass}) >>> # And then run the logistic regression - >>> lr = pg.logistic_regression(df['HoursStudy'], df['PassExam']).round(3) + >>> lr = pg.logistic_regression(df["HoursStudy"], df["PassExam"]).round(3) >>> lr names coef se z pval CI2.5 CI97.5 0 Intercept -4.078 1.761 -2.316 0.021 -7.529 -0.626 @@ -1147,9 +1166,8 @@ def mediation_analysis( 1. Simple mediation analysis >>> from pingouin import mediation_analysis, read_dataset - >>> df = read_dataset('mediation') - >>> mediation_analysis(data=df, x='X', m='M', y='Y', alpha=0.05, - ... seed=42) + >>> df = read_dataset("mediation") + >>> mediation_analysis(data=df, x="X", m="M", y="Y", alpha=0.05, seed=42) path coef se pval CI2.5 CI97.5 sig 0 M ~ X 0.561015 0.094480 4.391362e-08 0.373522 0.748509 Yes 1 Y ~ M 0.654173 0.085831 1.612674e-11 0.483844 0.824501 Yes @@ -1159,14 +1177,13 @@ def mediation_analysis( 2. Return the indirect bootstrapped beta coefficients - >>> stats, dist = mediation_analysis(data=df, x='X', m='M', y='Y', - ... return_dist=True) + >>> stats, dist = mediation_analysis(data=df, x="X", m="M", y="Y", return_dist=True) >>> print(dist.shape) (500,) 3. Mediation analysis with a binary mediator variable - >>> mediation_analysis(data=df, x='X', m='Mbin', y='Y', seed=42).round(3) + >>> mediation_analysis(data=df, x="X", m="Mbin", y="Y", seed=42).round(3) path coef se pval CI2.5 CI97.5 sig 0 Mbin ~ X -0.021 0.116 0.857 -0.248 0.206 No 1 Y ~ Mbin -0.135 0.412 0.743 -0.952 0.682 No @@ -1176,8 +1193,7 @@ def mediation_analysis( 4. Mediation analysis with covariates - >>> mediation_analysis(data=df, x='X', m='M', y='Y', - ... covar=['Mbin', 'Ybin'], seed=42).round(3) + >>> mediation_analysis(data=df, x="X", m="M", y="Y", covar=["Mbin", "Ybin"], seed=42).round(3) path coef se pval CI2.5 CI97.5 sig 0 M ~ X 0.559 0.097 0.000 0.367 0.752 Yes 1 Y ~ M 0.666 0.086 0.000 0.495 0.837 Yes @@ -1187,8 +1203,7 @@ def mediation_analysis( 5. Mediation analysis with multiple parallel mediators - >>> mediation_analysis(data=df, x='X', m=['M', 'Mbin'], y='Y', - ... seed=42).round(3) + >>> mediation_analysis(data=df, x="X", m=["M", "Mbin"], y="Y", seed=42).round(3) path coef se pval CI2.5 CI97.5 sig 0 M ~ X 0.561 0.094 0.000 0.374 0.749 Yes 1 Mbin ~ X -0.005 0.029 0.859 -0.063 0.052 No diff --git a/src/pingouin/reliability.py b/src/pingouin/reliability.py index 33614e88..fc93ffc3 100644 --- a/src/pingouin/reliability.py +++ b/src/pingouin/reliability.py @@ -1,10 +1,10 @@ import numpy as np import pandas as pd from scipy.stats import f + from pingouin.config import options from pingouin.utils import _postprocess_dataframe - __all__ = ["cronbach_alpha", "intraclass_corr"] @@ -98,7 +98,7 @@ def cronbach_alpha( Binary wide-format dataframe (with missing values) >>> import pingouin as pg - >>> data = pg.read_dataset('cronbach_wide_missing') + >>> data = pg.read_dataset("cronbach_wide_missing") >>> # In R: psych:alpha(data, use="pairwise") >>> pg.cronbach_alpha(data=data) (0.732660835214447, array([0.435, 0.909])) @@ -106,7 +106,7 @@ def cronbach_alpha( After listwise deletion of missing values (remove the entire rows) >>> # In R: psych:alpha(data, use="complete.obs") - >>> pg.cronbach_alpha(data=data, nan_policy='listwise') + >>> pg.cronbach_alpha(data=data, nan_policy="listwise") (0.8016949152542373, array([0.581, 0.933])) After imputing the missing values with the median of each column @@ -116,9 +116,8 @@ def cronbach_alpha( Likert-type long-format dataframe - >>> data = pg.read_dataset('cronbach_alpha') - >>> pg.cronbach_alpha(data=data, items='Items', scores='Scores', - ... subject='Subj') + >>> data = pg.read_dataset("cronbach_alpha") + >>> pg.cronbach_alpha(data=data, items="Items", scores="Scores", subject="Subj") (0.5917188485995826, array([0.195, 0.84 ])) """ # Safety check @@ -243,9 +242,10 @@ def intraclass_corr(data=None, targets=None, raters=None, ratings=None, nan_poli ICCs of wine quality assessed by 4 judges. >>> import pingouin as pg - >>> data = pg.read_dataset('icc') - >>> icc = pg.intraclass_corr(data=data, targets='Wine', raters='Judge', - ... ratings='Scores').round(3) + >>> data = pg.read_dataset("icc") + >>> icc = pg.intraclass_corr(data=data, targets="Wine", raters="Judge", ratings="Scores").round( + ... 3 + ... ) >>> icc.set_index("Type") Description ICC F df1 df2 pval CI95 Type diff --git a/src/pingouin/utils.py b/src/pingouin/utils.py index 95bd80d3..632f521c 100644 --- a/src/pingouin/utils.py +++ b/src/pingouin/utils.py @@ -1,11 +1,13 @@ """Helper functions.""" +import collections.abc +import itertools as it import numbers + import numpy as np import pandas as pd -import itertools as it -import collections.abc from tabulate import tabulate + from .config import options __all__ = [ @@ -281,15 +283,15 @@ def _flatten_list(x, include_tuple=False): Examples -------- >>> from pingouin.utils import _flatten_list - >>> x = ['X1', ['M1', 'M2'], 'Y1', ['Y2']] + >>> x = ["X1", ["M1", "M2"], "Y1", ["Y2"]] >>> _flatten_list(x) ['X1', 'M1', 'M2', 'Y1', 'Y2'] - >>> x = ['Xaa', 'Xbb', 'Xcc'] + >>> x = ["Xaa", "Xbb", "Xcc"] >>> _flatten_list(x) ['Xaa', 'Xbb', 'Xcc'] - >>> x = ['Xaa', ('Xbb', 'Xcc'), (1, 2), (1)] + >>> x = ["Xaa", ("Xbb", "Xcc"), (1, 2), (1)] >>> _flatten_list(x) ['Xaa', ('Xbb', 'Xcc'), (1, 2), 1] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..290cc21f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +import matplotlib + +matplotlib.use("Agg") diff --git a/tests/test_bayesian.py b/tests/test_bayesian.py index 030b8ba4..5e6760eb 100644 --- a/tests/test_bayesian.py +++ b/tests/test_bayesian.py @@ -1,11 +1,12 @@ -import numpy as np from unittest import TestCase -from scipy.stats import pearsonr -from pingouin.parametric import ttest -from pingouin.bayesian import bayesfactor_ttest, bayesfactor_binom -from pingouin.bayesian import bayesfactor_pearson as bfp +import numpy as np from pytest import approx +from scipy.stats import pearsonr + +from pingouin.bayesian import bayesfactor_binom, bayesfactor_ttest +from pingouin.bayesian import bayesfactor_pearson as bfp +from pingouin.parametric import ttest np.random.seed(1234) x = np.random.normal(size=100) diff --git a/tests/test_circular.py b/tests/test_circular.py index daa437a2..8dc061a0 100644 --- a/tests/test_circular.py +++ b/tests/test_circular.py @@ -1,10 +1,12 @@ -import pytest -import numpy as np from unittest import TestCase + +import numpy as np +import pytest from scipy.stats import circmean + from pingouin import read_dataset -from pingouin.circular import convert_angles, _checkangles from pingouin.circular import ( + _checkangles, circ_axial, circ_corrcc, circ_corrcl, @@ -12,6 +14,7 @@ circ_r, circ_rayleigh, circ_vtest, + convert_angles, ) np.random.seed(123) diff --git a/tests/test_config.py b/tests/test_config.py index a585ce33..c833e254 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,5 +1,6 @@ -import pingouin from unittest import TestCase + +import pingouin from pingouin.config import set_default_options expected_default_options = pingouin.options.copy() diff --git a/tests/test_contingency.py b/tests/test_contingency.py index 09fde40e..85c95c3f 100644 --- a/tests/test_contingency.py +++ b/tests/test_contingency.py @@ -1,10 +1,12 @@ -import pytest +from unittest import TestCase + import numpy as np import pandas as pd -import pingouin as pg -from unittest import TestCase +import pytest from scipy.stats import chi2_contingency +import pingouin as pg + df_ind = pg.read_dataset("chi2_independence") df_mcnemar = pg.read_dataset("chi2_mcnemar") diff --git a/tests/test_correlation.py b/tests/test_correlation.py index 838ff5af..59080009 100644 --- a/tests/test_correlation.py +++ b/tests/test_correlation.py @@ -1,8 +1,10 @@ -import pytest -import numpy as np from unittest import TestCase -from pingouin.correlation import corr, rm_corr, partial_corr, skipped, distance_corr, bicor + +import numpy as np +import pytest + from pingouin import read_dataset +from pingouin.correlation import bicor, corr, distance_corr, partial_corr, rm_corr, skipped class TestCorrelation(TestCase): diff --git a/tests/test_distribution.py b/tests/test_distribution.py index 6706f579..58287ed8 100644 --- a/tests/test_distribution.py +++ b/tests/test_distribution.py @@ -1,16 +1,18 @@ -import pytest +from unittest import TestCase + import numpy as np import pandas as pd -from unittest import TestCase +import pytest + +from pingouin import read_dataset from pingouin.distribution import ( - gzscore, - normality, anderson, epsilon, + gzscore, homoscedasticity, + normality, sphericity, ) -from pingouin import read_dataset # Generate random dataframe df = read_dataset("mixed_anova.csv") diff --git a/tests/test_effsize.py b/tests/test_effsize.py index 38742b30..43bef5d0 100644 --- a/tests/test_effsize.py +++ b/tests/test_effsize.py @@ -1,10 +1,11 @@ -import pytest +from unittest import TestCase + import numpy as np import pandas as pd -from unittest import TestCase +import pytest from scipy.stats import pearsonr, pointbiserialr -from pingouin.effsize import compute_esci, compute_effsize, compute_effsize_from_t, compute_bootci +from pingouin.effsize import compute_bootci, compute_effsize, compute_effsize_from_t, compute_esci from pingouin.effsize import convert_effsize as cef # Dataset diff --git a/tests/test_equivalence.py b/tests/test_equivalence.py index 6272ccb5..59ddcbbe 100644 --- a/tests/test_equivalence.py +++ b/tests/test_equivalence.py @@ -1,7 +1,9 @@ # Author: Antoine Weill--Duflos # Date July 2019 -import numpy as np from unittest import TestCase + +import numpy as np + from pingouin.equivalence import tost diff --git a/tests/test_multicomp.py b/tests/test_multicomp.py index 3efa1018..db563228 100644 --- a/tests/test_multicomp.py +++ b/tests/test_multicomp.py @@ -1,8 +1,10 @@ -import pytest +from unittest import TestCase + import numpy as np +import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal -from unittest import TestCase -from pingouin.multicomp import fdr, bonf, holm, sidak, multicomp + +from pingouin.multicomp import bonf, fdr, holm, multicomp, sidak pvals = [0.52, 0.12, 0.0001, 0.03, 0.14] pvals2 = [0.52, 0.12, 0.10, 0.30, 0.14] diff --git a/tests/test_multivariate.py b/tests/test_multivariate.py index aaa5ac73..f6ec0c42 100644 --- a/tests/test_multivariate.py +++ b/tests/test_multivariate.py @@ -1,9 +1,11 @@ +from unittest import TestCase + import numpy as np import pandas as pd from sklearn import datasets -from unittest import TestCase + from pingouin import read_dataset -from pingouin.multivariate import multivariate_normality, multivariate_ttest, box_m +from pingouin.multivariate import box_m, multivariate_normality, multivariate_ttest data = read_dataset("multivariate") dvs = ["Fever", "Pressure", "Aches"] diff --git a/tests/test_nonparametric.py b/tests/test_nonparametric.py index de4da3cc..6e1d2ba1 100644 --- a/tests/test_nonparametric.py +++ b/tests/test_nonparametric.py @@ -1,17 +1,19 @@ -import pytest -import scipy +from unittest import TestCase + import numpy as np import pandas as pd -from unittest import TestCase +import pytest +import scipy + from pingouin.nonparametric import ( + cochran, + friedman, + harrelldavis, + kruskal, mad, madmedianrule, mwu, wilcoxon, - kruskal, - friedman, - cochran, - harrelldavis, ) np.random.seed(1234) diff --git a/tests/test_pairwise.py b/tests/test_pairwise.py index c795f319..8c89196b 100644 --- a/tests/test_pairwise.py +++ b/tests/test_pairwise.py @@ -1,14 +1,16 @@ -import pytest +from unittest import TestCase + import numpy as np import pandas as pd -from unittest import TestCase +import pytest + from pingouin import read_dataset from pingouin.pairwise import ( - pairwise_ttests, - pairwise_tests, pairwise_corr, - pairwise_tukey, pairwise_gameshowell, + pairwise_tests, + pairwise_ttests, + pairwise_tukey, ) @@ -485,6 +487,7 @@ def test_pairwise_tests(self): def test_ptests(self): """Test function ptests.""" from itertools import combinations + from scipy.stats import ttest_ind, ttest_rel # Load BFI dataset diff --git a/tests/test_pandas.py b/tests/test_pandas.py index fce8b5aa..e1141446 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -5,9 +5,11 @@ - Raphael Vallat """ +from unittest import TestCase + import numpy as np + import pingouin as pg -from unittest import TestCase df = pg.read_dataset("mixed_anova") df_aov3 = pg.read_dataset("anova3_unbalanced") diff --git a/tests/test_parametric.py b/tests/test_parametric.py index 41baa884..96ddb43f 100644 --- a/tests/test_parametric.py +++ b/tests/test_parametric.py @@ -1,11 +1,11 @@ -import pytest -import numpy as np from unittest import TestCase + +import numpy as np +import pytest from numpy.testing import assert_array_equal as array_equal from pingouin import read_dataset -from pingouin.parametric import ttest, anova, rm_anova, mixed_anova, ancova, welch_anova - +from pingouin.parametric import ancova, anova, mixed_anova, rm_anova, ttest, welch_anova # Generate random data for ANOVA df = read_dataset("mixed_anova.csv") diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 064f6830..7a3bc53c 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1,19 +1,21 @@ -import pytest +from unittest import TestCase + import matplotlib +import matplotlib.pyplot as plt import numpy as np -from scipy import stats +import pytest import seaborn as sns -import matplotlib.pyplot as plt -from unittest import TestCase +from scipy import stats + from pingouin import read_dataset from pingouin.plotting import ( - plot_blandaltman, _ppoints, - qqplot, + plot_blandaltman, + plot_circmean, plot_paired, - plot_shift, plot_rm_corr, - plot_circmean, + plot_shift, + qqplot, ) # Disable open figure warning diff --git a/tests/test_power.py b/tests/test_power.py index f16fbe31..d4bdea0b 100644 --- a/tests/test_power.py +++ b/tests/test_power.py @@ -1,13 +1,15 @@ -import pytest -import numpy as np from unittest import TestCase + +import numpy as np +import pytest + from pingouin.power import ( - power_ttest, - power_ttest2n, power_anova, - power_rm_anova, - power_corr, power_chi2, + power_corr, + power_rm_anova, + power_ttest, + power_ttest2n, ) diff --git a/tests/test_regression.py b/tests/test_regression.py index 56f853d8..2c1cbd1d 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,21 +1,20 @@ -import pytest -import numpy as np -import pandas as pd from unittest import TestCase -from scipy.stats import linregress, zscore -from sklearn.linear_model import LinearRegression +import numpy as np +import pandas as pd +import pytest import statsmodels.api as sm - -from pandas.testing import assert_frame_equal from numpy.testing import assert_almost_equal, assert_equal +from pandas.testing import assert_frame_equal +from scipy.stats import linregress, zscore +from sklearn.linear_model import LinearRegression from pingouin import read_dataset from pingouin.regression import ( + _pval_from_bootci, linear_regression, logistic_regression, mediation_analysis, - _pval_from_bootci, ) # 1st dataset: mediation @@ -263,7 +262,7 @@ def test_logistic_regression(self): # summary(glm(Ybin ~ X, data=df, family=binomial)) assert_equal(np.round(lom["coef"], 3), [1.319, -0.199]) assert_equal(np.round(lom["se"], 3), [0.758, 0.121]) - assert_equal(np.round(lom["z"], 3), [1.74, -1.647]) + assert_almost_equal(lom["z"], [1.74, -1.647], decimal=2) assert_equal(np.round(lom["pval"], 3), [0.082, 0.099]) assert_equal(np.round(lom["CI2.5"], 3), [-0.167, -0.437]) assert_equal(np.round(lom["CI97.5"], 3), [2.805, 0.038]) @@ -276,7 +275,7 @@ def test_logistic_regression(self): # summary(glm(Ybin ~ X+M, data=df, family=binomial)) assert_equal(lom["coef"].to_numpy(), [1.327, -0.196, -0.006]) assert_equal(lom["se"].to_numpy(), [0.778, 0.141, 0.125]) - assert_equal(lom["z"].to_numpy(), [1.705, -1.392, -0.048]) + assert_almost_equal(lom["z"], [1.705, -1.392, -0.048], decimal=2) assert_equal(lom["pval"].to_numpy(), [0.088, 0.164, 0.962]) assert_equal(lom["CI2.5"].to_numpy(), [-0.198, -0.472, -0.252]) assert_equal(lom["CI97.5"].to_numpy(), [2.853, 0.08, 0.24]) diff --git a/tests/test_reliability.py b/tests/test_reliability.py index 291ef02b..51c0b68e 100644 --- a/tests/test_reliability.py +++ b/tests/test_reliability.py @@ -1,9 +1,11 @@ -import pytest +from unittest import TestCase + import numpy as np import pandas as pd -from unittest import TestCase -from pingouin.reliability import cronbach_alpha, intraclass_corr +import pytest + from pingouin import read_dataset +from pingouin.reliability import cronbach_alpha, intraclass_corr class TestReliability(TestCase): diff --git a/tests/test_utils.py b/tests/test_utils.py index afa44829..4072841c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,23 +1,23 @@ -import pandas as pd +from unittest import TestCase + import numpy as np +import pandas as pd import pytest import pingouin - -from unittest import TestCase from pingouin.utils import ( - print_table, - _postprocess_dataframe, - _get_round_setting_for, - _perm_pval, - _check_eftype, _check_dataframe, - remove_na, + _check_eftype, _flatten_list, + _get_round_setting_for, + _is_mpmath_installed, _is_sklearn_installed, _is_sklearn_version_compatible, _is_statsmodels_installed, - _is_mpmath_installed, + _perm_pval, + _postprocess_dataframe, + print_table, + remove_na, ) # Dataset