diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 77d9965f2..f74581767 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,6 +10,10 @@ build: - poetry config virtualenvs.create false - . "$READTHEDOCS_VIRTUALENV_PATH/bin/activate" && poetry install --with docs - pip freeze + post_install: + # RTD auto-upgrades sphinx (breaks myst-parser <9) and setuptools + # (82+ removes pkg_resources, breaking pybtex). Re-pin to safe versions. + - pip install "sphinx<9" "setuptools<82" sphinx: configuration: docs/conf.py diff --git a/Makefile b/Makefile index 151a96831..903f89c6c 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,10 @@ clean.doc: @$(RM) -rf docs/auto_examples/ @$(RM) -rf docs/sg_execution_times.rst +.PHONY: doc-quick +doc-quick: install.doc + @$(SPHINX) -b html docs/ docs/_build/html + .PHONY: clean.py clean.py: @find . -name __pycache__ -exec $(RM) -r {} + diff --git a/README.md b/README.md index 43875d2cf..0e926c355 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,10 @@ pre-commit install ### Examples & Tutorials The `examples` folder contains a starting point if you want to launch your first scripts and notebook with the Leaspy package. +<<<<<<< HEAD + +======= +>>>>>>> upstream/v2.1 ## Description Leaspy is a software package for the statistical analysis of **longitudinal data**, particularly **medical** data that comes in a form of **repeated observations** of patients at different time-points. diff --git a/browser/static/favicon.png b/browser/static/favicon.png deleted file mode 100644 index 3f24afb0b..000000000 Binary files a/browser/static/favicon.png and /dev/null differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css index e5645e0a5..0c52cdcb2 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,6 +1,246 @@ -.wy-nav-content { - max-width: 96% !important; +/* ============================================================================= + Leaspy custom styles for PyData Sphinx Theme + CSS variables: https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/styling.html + ============================================================================= */ + +/* --- Viewcode: pastel blue highlights (hll lines + target blocks) --- */ +html[data-theme="light"] { + --pst-color-target: rgba(80, 150, 220, 0.18); } +html[data-theme="dark"] { + --pst-color-target: rgba(80, 150, 220, 0.15); +} +html[data-theme="light"] .highlight .hll { + background-color: rgba(80, 150, 220, 0.18) !important; +} +html[data-theme="dark"] .highlight .hll { + background-color: rgba(80, 150, 220, 0.15) !important; +} + +/* ============================================================================= + Sphinx-Gallery + ============================================================================= */ + +/* Hide download note, timing, and download footer */ +.sphx-glr-download-link-note, +.sphx-glr-timing, +.sphx-glr-footer { + display: none !important; +} + +/* Gallery card thumbnails: text only, no images */ .sphx-glr-thumbcontainer .xref.std.std-ref { display: none; +} +.sphx-glr-thumbcontainer img { + display: none !important; +} +.sphx-glr-thumbcontainer { + min-height: auto !important; + height: auto !important; +} + +/* Input code blocks: green left border + grey/white outline */ +div.highlight-Python, +div.highlight-python3, +div.highlight-python { + border: 1px solid; + border-left: 4px solid var(--pst-color-success); + border-radius: 6px; + overflow: hidden; +} +html[data-theme="light"] div.highlight-Python, +html[data-theme="light"] div.highlight-python3, +html[data-theme="light"] div.highlight-python { + border-color: #c0c0c0; + border-left-color: var(--pst-color-success); +} +html[data-theme="dark"] div.highlight-Python, +html[data-theme="dark"] div.highlight-python3, +html[data-theme="dark"] div.highlight-python { + border-color: #555; + border-left-color: var(--pst-color-success); +} + +/* Output blocks: grey/white outline, no green bar, tinted background */ +.sphx-glr-script-out div.highlight-none { + border: 1px solid; + border-radius: 6px; + overflow: hidden; +} +html[data-theme="light"] .sphx-glr-script-out div.highlight-none { + border-color: #989898; +} +html[data-theme="dark"] .sphx-glr-script-out div.highlight-none { + border-color: #555; +} +html[data-theme="light"] .sphx-glr-script-out .highlight, +html[data-theme="light"] .sphx-glr-script-out pre { + background: #e1e1e1 !important; +} +html[data-theme="dark"] .sphx-glr-script-out .highlight, +html[data-theme="dark"] .sphx-glr-script-out pre { + background: #272626 !important; +} +.sphx-glr-script-out pre { + color: var(--pst-color-text-base) !important; + font-size: 0.875em; + line-height: 1.5; +} + +/* ============================================================================= + Misc + ============================================================================= */ + +/* Logo: white background patch for transparency on dark bg */ +img[src*="leaspy_logo.png"] { + background-color: white; + padding: 5px; + border-radius: 4px; +} + +/* Pandas DataFrame styling */ +table.dataframe { + border: none !important; + border-collapse: collapse; + border-spacing: 0; + margin-bottom: 1em; + width: auto; + overflow-x: auto; + display: block; +} +table.dataframe thead { + border-bottom: 1px solid var(--pst-color-border); + vertical-align: bottom; +} +table.dataframe tr, +table.dataframe th, +table.dataframe td { + text-align: right; + vertical-align: middle; + padding: 0.5em; + line-height: normal; + white-space: normal; + max-width: none; + border: none; + color: var(--pst-color-text-base); +} +table.dataframe th { + font-weight: bold; + background: var(--pst-color-surface); +} +table.dataframe tbody tr:nth-child(odd) { + background: var(--pst-color-surface); +} +table.dataframe tbody tr:hover { + background: var(--pst-color-surface); + opacity: 0.85; +} + +/* ============================================================================= + API Reference Styling + ============================================================================= */ + +/* 1. Class block separation */ +dl.py.class { + margin-bottom: 4rem !important; + padding-bottom: 2rem; + border-bottom: 2px solid var(--pst-color-border); +} + +/* 2. Method / Function separation */ +dl.py.method, +dl.py.function { + margin-bottom: 2rem !important; + padding-bottom: 1rem; + border-bottom: 1px solid var(--pst-color-border); +} +dl.py.class > dd > dl.py.method:last-child { + border-bottom: none; + margin-bottom: 0 !important; + padding-bottom: 0; +} + +/* 3. Signature headers */ +dl.py.class > dt { + background-color: var(--pst-color-surface); + padding: 0.8rem; + border-radius: 4px; + margin-top: 1.5rem; + font-weight: bold; + border-left: 5px solid var(--pst-color-primary); + display: block; + text-indent: 0 !important; + margin-left: 0 !important; + width: 100%; + box-sizing: border-box; +} +dl.py.function > dt, +dl.py.method > dt { + background-color: var(--pst-color-surface); + padding: 0.6rem; + border-radius: 4px; + margin-top: 1.0rem; + font-weight: bold; + border-left: 3px solid var(--pst-color-text-base); + display: block; + text-indent: 0 !important; + margin-left: 0 !important; + width: 100%; + box-sizing: border-box; +} + +/* 4. Body indentation with vertical lines */ +dl.py.class > dd { + margin-left: 0 !important; + padding-left: 1.5rem; + border-left: 4px solid var(--pst-color-primary); +} +dl.py.method > dd, +dl.py.function > dd { + margin-left: 0 !important; + padding-left: 1.5rem; + border-left: 2px solid var(--pst-color-text-base); +} + +/* 5. Parameters / Returns / Attributes field lists */ +dl.field-list > dt { + font-weight: bold; + color: var(--pst-color-text-base); + margin-top: 1.0rem; + margin-bottom: 0.5rem; + padding: 0; + border: none; + background: none; +} +dl.field-list > dd { + margin-left: 0 !important; + padding-left: 1.0rem; +} +dl.field-list > dd > dl > dt { + background-color: transparent !important; + padding: 0 !important; + margin-top: 0 !important; + margin-bottom: 0 !important; + font-weight: bold; + border: none !important; + color: var(--pst-color-text-base); + font-family: var(--pst-font-family-monospace); + line-height: 1.2; +} +dl.field-list > dd > dl > dt::before { + content: "•"; + color: var(--pst-color-text-muted); + margin-right: 0.5rem; + font-weight: normal; +} +dl.field-list > dd > dl > dd { + margin-top: 0 !important; + margin-bottom: 0.1rem !important; + margin-left: 1.0rem !important; +} +dl.field-list > dd > dl > dd > p { + margin-top: 0 !important; + margin-bottom: 0 !important; + line-height: 1.3; } \ No newline at end of file diff --git a/docs/_static/custom.js b/docs/_static/custom.js index 0febaaf58..103057d7e 100644 --- a/docs/_static/custom.js +++ b/docs/_static/custom.js @@ -19,5 +19,8 @@ document.addEventListener("DOMContentLoaded", function() { // Change cursor to pointer el.style.cursor = 'pointer'; } + + // Remove the tooltip attribute to prevent the description from popping up + el.removeAttribute('tooltip'); }); }); \ No newline at end of file diff --git a/docs/_static/favicon.png b/docs/_static/favicon.png new file mode 100644 index 000000000..a1e6444b8 Binary files /dev/null and b/docs/_static/favicon.png differ diff --git a/docs/_templates/navbar-nav.html b/docs/_templates/navbar-nav.html new file mode 100644 index 000000000..ee1de80f7 --- /dev/null +++ b/docs/_templates/navbar-nav.html @@ -0,0 +1,22 @@ +{# Navbar navigation links. + On toctree-rooted pages, generate_header_nav_html produces the links. + On orphan pages (e.g. viewcode _modules/), it returns empty — fall back to + a hardcoded list so navigation is always visible. +#} + diff --git a/docs/conf.py b/docs/conf.py index 523e8c764..7e6b980a8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,8 +49,14 @@ def find_var(varname: str, *py_file_paths): "sphinx_gallery.gen_gallery", "myst_nb", "sphinxcontrib.bibtex", + "sphinx.ext.viewcode", + "sphinx_copybutton", ] +# sphinx-copybutton: strip shell prompts from copied text +copybutton_prompt_text = r">>> |\$ " +copybutton_prompt_is_regexp = True + bibtex_bibfiles = ["references.bib"] # -- autoapi configuration --------------------------------------------------- @@ -133,6 +139,7 @@ def find_var(varname: str, *py_file_paths): "auto_examples/*.ipynb", "auto_examples/*.py.md5", "auto_examples/*.codeobj.json", + "data_summary.ipynb", ] show_warning_types = True @@ -143,20 +150,16 @@ def find_var(varname: str, *py_file_paths): ] # The name of the Pygments (syntax highlighting) style to use. +# PyData supports separate styles for light and dark mode. highlight_language = "python3" -pygments_style = "sphinx" +pygments_style = "friendly" # light mode +pygments_dark_style = "monokai" # dark mode # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -# ---sphinx-themes----- -# html_theme = 'neo_rtd_theme' -# html_theme_path = [sphinx_theme.get_html_theme_path()] - -# html_theme = 'alabaster' -# html_theme = 'sphinx-theme' html_theme = "pydata_sphinx_theme" add_function_parentheses = True @@ -170,49 +173,41 @@ def find_var(varname: str, *py_file_paths): ] html_js_files = ["custom.js"] -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. +# Favicon +html_favicon = "_static/favicon.png" + +# Theme options html_theme_options = { - "collapse_navigation": True, - "navigation_depth": 4, - # Logo and description - # 'description': 'LEArning Spatiotemporal Patterns in Python', - # 'logo_name': 'false', - # 'logo_text_align': 'center', - # GitHub stuff - # 'github_banner': 'true', - # 'github_repo': 'pyts', - # 'github_type': 'star', - # 'github_user': 'johannfaouzi', - # Page and sidebar widths - # 'page_width': '1300px', - "body_max_width": "1000px", - # 'sidebar_width': '250px', - # Related links - # 'show_related': 'true', - # 'show_relbar_bottom': 'true', - # Font sizes - # 'font_size': '15px', - # 'code_font_size': '13px' + # Navbar + "navbar_align": "left", + "navbar_center": ["navbar-nav"], + "navbar_end": ["theme-switcher", "navbar-icon-links"], + "header_links_before_dropdown": 6, + # Secondary (right) sidebar + "secondary_sidebar_items": ["page-toc"], + "show_toc_level": 2, + # Logo + "logo": { + "image_light": "_static/images/leaspy_logo.png", + "image_dark": "_static/images/leaspy_logo.png", + }, + # GitHub icon in navbar + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/aramis-lab/leaspy", + "icon": "fa-brands fa-github", + } + ], } html_context = { - "display_github": True, - "github_url": "https://github.com", "github_user": "aramis-lab", "github_repo": "leaspy", - "github_version": "v2/", - "conf_py_path": "/docs/", + "github_version": "v2", + "doc_path": "docs", } -# Custom CSS files -# html_css_files = [ -# 'custom.css', -# ]# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -html_logo = "_static/images/leaspy_logo.png" - html_title = "Leaspy" # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = "Leaspy documentation" diff --git a/docs/data_summary.ipynb b/docs/data_summary.ipynb new file mode 100644 index 000000000..b24dea8db --- /dev/null +++ b/docs/data_summary.ipynb @@ -0,0 +1,251 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6b089476", + "metadata": {}, + "source": [ + "# Understanding Leaspy's Data Containers: `Data` and `Dataset`\n", + "\n", + "In `leaspy`, transforming raw data (like a CSV) into a model-ready format involves two key classes: `Data` and `Dataset`. Understanding their distinct roles is crucial for having full control of your analysis.\n", + "\n", + "---\n", + "\n", + "## 1. The `Data` Class: The User Interface\n", + "\n", + "The `Data` class is your **primary tool** for loading, organizing, and inspecting data. It acts as a flexible, patient-centric container that bridges the gap between raw spreadsheets and the model.\n", + "\n", + "## Key Features & Methods\n", + "\n", + "### **Loading Data**\n", + "Use the factory method to load from a pandas DataFrame. Notice that there is a slight difference when you work with joint models." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c6de245e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ID TIME EVENT_TIME EVENT_BOOL Y0 Y1 Y2 Y3\n", + "0 116 78.461 85.5 1 0.44444 0.04 0.0 0.0\n", + "1 116 78.936 85.5 1 0.60000 0.00 0.0 0.2\n", + "2 116 79.482 85.5 1 0.39267 0.04 0.0 0.2\n" + ] + } + ], + "source": [ + "import os\n", + "import pandas as pd\n", + "import leaspy\n", + "from leaspy.io.data import Data\n", + "\n", + "leaspy_root = os.path.dirname(leaspy.__file__)\n", + "data_path = os.path.join(leaspy_root, \"datasets/data/simulated_data_for_joint.csv\")\n", + "df = pd.read_csv(data_path, dtype={\"ID\": str}, sep=\";\")\n", + "\n", + "data = Data.from_dataframe(df) \t\t\t\t\t\t\t# <-\n", + "# For joint models (longitudinal + time-to-event):\n", + "data_joint = Data.from_dataframe(df, data_type='joint')\t# <-\n", + "print(df.head(3))" + ] + }, + { + "cell_type": "markdown", + "id": "73706687", + "metadata": {}, + "source": [ + "### **Inspection**: \n", + "Access data naturally by patient ID or index. This is made thanks to the iterators handdling inside `Data`, it also allows you to iterate using for loops. So you can:\n", + "* select data usint the brackets (`data['116']`)\n", + "* check some attributes (`data.n_individuals`) \n", + "* convert the whole dataset or some individuals back into a dataframe object (`data[['116']].to_dataframe()`)\n", + "* iterate into each individual (`for individual in data:`)\n", + "* generate an iterator (`for i, individual in enumerate(data):`)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6e92c403", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of patients: 17\n", + "patient data (observations shape): (9, 6)\n", + "patient data (dataframe):\n", + " ID TIME EVENT_TIME EVENT_BOOL Y0 Y1 Y2 Y3\n", + "0 116 78.461 85.5 1.0 0.44444 0.04 0.0 0.0\n", + "1 116 78.936 85.5 1.0 0.60000 0.00 0.0 0.2\n", + "2 116 79.482 85.5 1.0 0.39267 0.04 0.0 0.2\n", + "3 116 79.939 85.5 1.0 0.58511 0.00 0.0 0.0\n", + "4 116 80.491 85.5 1.0 0.57044 0.00 0.0 0.0\n", + "5 116 81.455 85.5 1.0 0.55556 0.20 0.1 0.2\n", + "6 116 82.491 85.5 1.0 0.71844 0.20 0.1 0.6\n", + "7 116 83.463 85.5 1.0 0.71111 0.32 0.2 0.6\n", + "8 116 84.439 85.5 1.0 0.91111 0.52 0.6 1.0\n", + "\n", + "Iterating over first 3 patients:\n", + " - Patient 116: 9 visits\n", + " - Patient 142: 11 visits\n", + " - Patient 169: 7 visits\n", + "Patient ID: 116\n", + "Patient ID: 142\n", + "Patient ID: 169\n" + ] + } + ], + "source": [ + "patient_data = data['116'] # Get a specific individual\n", + "n_patients = data.n_individuals # Get total count\n", + "print(f\"Number of patients: {n_patients}\")\n", + "print(f\"patient data (observations shape): {patient_data.observations.shape}\")\n", + "print(f\"patient data (dataframe):\\n{data[['116']].to_dataframe()}\")\n", + "print(\"\\nIterating over first 3 patients:\")\n", + "for individual in data:\n", + " if len(individual.timepoints) == 10: break\n", + " print(f\" - Patient {individual.idx}: {len(individual.timepoints)} visits\")\n", + "for i, individual in enumerate(data):\n", + " if i >= 3: break # Stop after 3 iterations using the index 'i'\n", + " print(f\"Patient ID: {individual.idx}\")" + ] + }, + { + "cell_type": "markdown", + "id": "12eb4a50", + "metadata": {}, + "source": [ + "### **Managing Cofactors**\n", + "Easily attach patient characteristics (e.g., genetics, demographics). It is used to group populations when using `plotter.plot_distribution`, so plotter can color different cofactors. Lets generate a dataset and its parameters to show how it works." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "8867dfe4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4wAAAH5CAYAAADKurD5AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAALH1JREFUeJzt3Qu41VWdN/AfcADxAsQdRiC0FLznZZQ0x5TE63ih0jQDJU1HmBQ1pZBEK9J8ssm8jD0m+iRlNqSJpSleSkVFZsjUInEoaORSOoCogMJ+n7Xe95yXowsNOLDP5nw+z/N387/svdc5y33+53vWrVWlUqkEAAAAvEPrdx4AAACARGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgqC5q0Jo1a+Lll1+O7bbbLlq1alXt4gAAANSUSqUSr732WvTp0ydat269ZQXGFBb79u1b7WIAAADUtPnz58f222+/ZQXG1LJY/8V17Nix2sUBAACoKcuWLcuNcPXZaosKjPXdUFNYFBgBAAA2zPsN8TPpDQAAAEUCIwAAAEUCIwAAAFvOGEYAAKD5Wr16dbz11lvVLkaL1rZt22jTps1Gv47ACAAANNnafgsXLowlS5ZUuyhEROfOnaNXr14btXa9wAgAADSJ+rDYo0eP2HrrrTcqqLDhUnB/4403YvHixXm/d+/eG/xaAiMAANAk3VDrw2LXrl2rXZwWr0OHDvkxhcZUJxvaPdWkNwAAwEarH7OYWhZpHurrYmPGkwqMAABAk9ENdcuqC4ERAACAIoERAACAjZ/05oYbbsjbn/70p7y/6667xvjx4+PII4/M+ytWrIgLLrggfvzjH8fKlStj6NChcf3110fPnj0bXmPevHlxzjnnxMMPPxzbbrttDB8+PCZOnBh1debfAQCALdHISTM26/vdPGK/qHUf/OAH47zzzstbzbQwbr/99vHNb34zZs6cGc8880wceuihcdxxx8Xzzz+fz59//vlxzz33xJ133hmPPvpovPzyy3HiiSc2mjnp6KOPjlWrVsUTTzwRt956a0yaNCmHTgAAgGoYMWJEHu/3zm3OnDnR0q1Xs96xxx7baP/rX/96bnF88sknc5i8+eabY/LkyTlIJrfccksMGjQonz/ggAPiV7/6Vbzwwgvx4IMP5lbHvfbaK6644oq4+OKL47LLLot27do17VcHAADwdzjiiCNyfllb9+7do6Xb4DGMqbUwdT19/fXXY/DgwbnVMU3XOmTIkIZrBg4cGP369Yvp06fn/fS4++67N+qimrqtLlu2rKGVsiR1b03XrL0BAAA0lfbt20evXr0abW3atIm777479t5779hqq61ihx12iAkTJsTbb7/d8LzUEvnv//7vccwxx+RlLFKDWco9qXXykEMOiW222SY++tGPxksvvdTwnPTv1FMz5aI0TG+//fbLjWrvJa1x+fnPfz6H2I4dO+ZGut/+9reb9HuyQYHxd7/7Xf6i0jf07LPPjp/97Gexyy67xMKFC3MLYefOnRtdn74J6VySHtcOi/Xn68+tSxrj2KlTp4atb9++61tsAACA9fKb3/wmPve5z8UXv/jF3FMyBcM0pC71tFxb6jWZrps1a1ZuNDvllFPiC1/4QowdOzYP5atUKjFq1KiG65cvXx5HHXVUTJs2Lf7rv/4rt26m3pxpvpd1+dSnPhWLFy+OX/7yl7mxLoXYww47LF599dXmFRh33nnn/I146qmn8uQ1adKa9M3blNI3eunSpQ3b/PnzN+n7AQAALcvUqVNzw1j9lgLahAkT4pJLLsmZJ7UufuITn8jhMAXHtZ1++unx6U9/Onbaaac83C5NEnrqqafm3pSpxTEFzkceeaTh+j333DMHyt122y0+/OEP59fccccd4+c//3mxbI899lg8/fTTea6YfffdNz/n6quvzo11P/3pTzfp92W9pyZNrYgf+tCH8r/32WefmDFjRvzbv/1bnHTSSXkym9RUunYr46JFi3JzbpIe0xe6tnS+/ty6pNbMtAEAAGwKH//4x/P8LPVSV9I99tgjHn/88UYtimloXlod4o033shdUJN03Tt7UKaheGsfS89JQ+tSd9LUwpjmcLn33ntjwYIFuYvrm2++uc4WxtT1ND2na9eujY6n56zd1XVT2Oi1LNasWZPHGKbw2LZt29ysOmzYsHxu9uzZ+YtOYxyT9Ji+2akptUePHvnYAw88kL9pqVsrAABANaSAWN8wVm/58uW5lXHtlR/qpTGN9VIOWntM47qOpeyUXHjhhTkHpVbC9J4dOnSIT37yk7kBriSVo3fv3o1aKeu9c0hgVQNj6hqa1lxME9m89tpreUbUVOj7778/jy0cOXJkjBkzJrp06ZJD4OjRo3NITDOkJocffngOhqeddlpcddVVedziuHHj4txzz9WCCMAWZXOvObal2BLWTgO2HHvvvXduBHtnkNxYqdUyLeVxwgknNATC+rXu11WOlJ3S2vVpfcbNab0CY2oZTIM5U7NpCoip6TWFxdSXN7nmmmuidevWuYUxtTqmPrvXX399w/PTLEOpb3Aa+5iCZErxqT/w5Zdf3vRfGQAAwEYYP358nv00NZilFsCUdVL30Oeeey6+9rWvbfDrpjGIU6ZMyRPdpNbHSy+9tKH1sSStRJHy0/HHH58b3tJYybTmferSmkJnGtfYLAJjWmfxvaRm2euuuy5v69K/f//4xS9+sT5vCwAA1LBa7T0wdOjQ3OCVGriuvPLK3M00zYKalrfYGN/+9rfjjDPOyMttdOvWLU+U815LB6ZQmTLUV77ylTzBzl//+tc8B8zBBx/8rlUomlqrSprjtcakb2Zq4UwzpqaurwDQ3OiS2rJ+qQQiT+oyd+7cGDBgQKPxfTTPOvl7M9V6L6sBAABAyyAwAgAAUCQwAgAAUCQwAgAAUCQwAgAAUCQwAgAAUCQwAgAAUCQwAgAAUCQwAgAANJE//elP0apVq5g1a1ZsCeqqXQAAAGALN/mkzft+p9yxXpePGDEibr311vjCF74QN954Y6Nz5557blx//fUxfPjwmDRpUrQ0WhgBAIAWr2/fvvHjH/843nzzzYZjK1asiMmTJ0e/fv2ipRIYAQCAFm/vvffOoXHKlCkNx6ZMmZLD4kc+8pGGY/fdd18cdNBB0blz5+jatWscc8wx8dJLL73naz/33HNx5JFHxrbbbhs9e/aM0047Lf72t79FLRAYAQAAIuKMM86IW265pWH/Bz/4QZx++umNrnn99ddjzJgx8cwzz8S0adOidevWccIJJ8SaNWuKr7lkyZI49NBDc+hMz0mBc9GiRfHpT386aoExjAAAABHx2c9+NsaOHRt//vOf8/7jjz+eu6k+8sgjDdcMGzas0XNSqOzevXu88MILsdtuu73rNb/3ve/lsPiNb3yj0XNSa+Yf//jH2GmnnaI5ExgBAAAicvA7+uij8+Q2lUol/7tbt26NrnnxxRdj/Pjx8dRTT+VupfUti/PmzSsGxt/+9rfx8MMP5+6o75S6sgqMAAAANdQtddSoUfnf11133bvOH3vssdG/f//4/ve/H3369MmBMQXFVatWFV9v+fLl+TlXXnnlu8717t07mjuBEQAA4P854ogjcvhLaykOHTq00blXXnklZs+encPixz72sXzssccee9/JdP7jP/4jPvjBD0ZdXe3FL5PeAAAA/D9t2rSJ3//+93lMYps2bRqd+8AHPpBnRr3ppptizpw58dBDD+UJcN5LWsfx1Vdfjc985jMxY8aM3A31/vvvz5PprF69Opo7gREAAGAtHTt2zNs7pRlR0yQ4M2fOzN1Qzz///PjWt74V7yV1W02T56RwePjhh8fuu+8e5513Xl6WI71ec9eqkkZz1phly5ZFp06dYunSpcWKBIBqGzlpRrWLUJNuHrFftYsAbKC0yP3cuXNjwIABsdVWW1W7OMR718nfm6maf6QFAACgKgRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAACgyaxZs6baRaAJ66Juo18BAABo8dq1a5fXFXz55Zeje/fueb9Vq1bVLlaLVKlUYtWqVfHXv/4110mqiw0lMAIAABstBZO03t+CBQtyaKT6tt566+jXr1+umw0lMAIAAE0itWSlgPL222/H6tWrq12cFq1NmzZRV1e30a28AiMAANBkUkBp27Zt3qh9Jr0BAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgqK58GABaoMknNdlLjV60JLYU1/b8WrWLAECVaGEEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAABg4wPjxIkTY7/99ovtttsuevToEccff3zMnj270TWHHHJItGrVqtF29tlnN7pm3rx5cfTRR8fWW2+dX+eiiy6Kt99+e32KAgAAwCZWtz4XP/roo3Huuefm0JgC3pe//OU4/PDD44UXXohtttmm4bozzzwzLr/88ob9FAzrrV69OofFXr16xRNPPBELFiyIz33uc9G2bdv4xje+0VRfFwAAAJszMN53332N9idNmpRbCGfOnBkHH3xwo4CYAmHJr371qxwwH3zwwejZs2fstddeccUVV8TFF18cl112WbRr125DvxYAAACayxjGpUuX5scuXbo0On777bdHt27dYrfddouxY8fGG2+80XBu+vTpsfvuu+ewWG/o0KGxbNmyeP7554vvs3Llynx+7Q0AAIBm1MK4tjVr1sR5550XBx54YA6G9U455ZTo379/9OnTJ5599tnccpjGOU6ZMiWfX7hwYaOwmNTvp3PrGjs5YcKEDS0qAAAAmzMwprGMzz33XDz22GONjp911lkN/04tib17947DDjssXnrppdhxxx036L1SK+WYMWMa9lMLY9++fTe06AAAAGyqLqmjRo2KqVOnxsMPPxzbb7/9e167//7758c5c+bkxzS2cdGiRY2uqd9f17jH9u3bR8eOHRttAAAANKPAWKlUclj82c9+Fg899FAMGDDgfZ8za9as/JhaGpPBgwfH7373u1i8eHHDNQ888EAOgbvsssv6fwUAAABUv0tq6oY6efLkuPvuu/NajPVjDjt16hQdOnTI3U7T+aOOOiq6du2axzCef/75eQbVPfbYI1+bluFIwfC0006Lq666Kr/GuHHj8munlkQAAABqsIXxhhtuyDOjHnLIIbnFsH6744478vm0JEZaLiOFwoEDB8YFF1wQw4YNi3vuuafhNdq0aZO7s6bH1Nr42c9+Nq/DuPa6jQAAANRYC2Pqkvpe0kQ0jz766Pu+TppF9Re/+MX6vDUAAAC1tA4jAAAAWy6BEQAAgCKBEQAAgCKBEQAAgCKBEQAAgCKBEQAAgCKBEQAAgCKBEQAAgCKBEQAAgKK68mEA+P9GTpoRLcHoRUuqXQQAaFa0MAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFBUF+th4sSJMWXKlPjDH/4QHTp0iI9+9KNx5ZVXxs4779xwzYoVK+KCCy6IH//4x7Fy5coYOnRoXH/99dGzZ8+Ga+bNmxfnnHNOPPzww7HtttvG8OHD82vX1a1XcQCAzWD0onGb780md46acsod1S4BQPNpYXz00Ufj3HPPjSeffDIeeOCBeOutt+Lwww+P119/veGa888/P+65556488478/Uvv/xynHjiiQ3nV69eHUcffXSsWrUqnnjiibj11ltj0qRJMX78+Kb9ygAAANgorSqVSmVDn/zXv/41evTokYPhwQcfHEuXLo3u3bvH5MmT45Of/GS+JrVGDho0KKZPnx4HHHBA/PKXv4xjjjkmB8n6Vscbb7wxLr744vx67dq1e9/3XbZsWXTq1Cm/X8eOHTe0+AD8nUZOmhEtwWZtSaNor75aGAE2h783U23UGMb04kmXLl3y48yZM3Or45AhQxquGThwYPTr1y8HxiQ97r777o26qKZuq6nAzz//fPF9UtfWdH7tDQAAgE1rgwPjmjVr4rzzzosDDzwwdtttt3xs4cKFuYWwc+fGfx1M4TCdq79m7bBYf77+XEka35jSb/3Wt2/fDS02AAAAmzowprGMzz33XJ7cZlMbO3Zsbs2s3+bPn7/J3xMAAKCl26BpSUeNGhVTp06NX//617H99ts3HO/Vq1eezGbJkiWNWhkXLVqUz9Vf8/TTTzd6vXS+/lxJ+/bt8wYAAEAzbWFM8+OksPizn/0sHnrooRgwYECj8/vss0+0bds2pk2b1nBs9uzZeRmNwYMH5/30+Lvf/S4WL17ccE2acTUNtNxll102/isCAABg87cwpm6oaQbUu+++O7bbbruGMYdpXGFalzE9jhw5MsaMGZMnwkkhcPTo0TkkphlSk7QMRwqGp512Wlx11VX5NcaNG5dfWysiALRss+YviVpybTOYQfjmEftVuwjAFmy9AuMNN9yQHw855JBGx2+55ZYYMWJE/vc111wTrVu3jmHDhuXZTdMMqNdff33DtW3atMndWc8555wcJLfZZpsYPnx4XH755U3zFQEAALD5A+Pfs2TjVlttFdddd13e1qV///7xi1/8Yn3eGgAAgFqY9AaAGjb5pPV+yuhFtdVNEACo8rIaAAAAbNkERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAJomMP7617+OY489Nvr06ROtWrWKu+66q9H5ESNG5ONrb0cccUSja1599dU49dRTo2PHjtG5c+cYOXJkLF++fH2LAgAAQHMKjK+//nrsueeecd11163zmhQQFyxY0LD96Ec/anQ+hcXnn38+HnjggZg6dWoOoWedddaGfQUAAABsEnXr+4Qjjzwyb++lffv20atXr+K53//+93HffffFjBkzYt99983Hrr322jjqqKPi6quvzi2XAAAAbKFjGB955JHo0aNH7LzzznHOOefEK6+80nBu+vTpuRtqfVhMhgwZEq1bt46nnnqq+HorV66MZcuWNdoAAACoscCYuqPedtttMW3atLjyyivj0UcfzS2Sq1evzucXLlyYw+Ta6urqokuXLvlcycSJE6NTp04NW9++fZu62AAAAGxsl9T3c/LJJzf8e/fdd4899tgjdtxxx9zqeNhhh23Qa44dOzbGjBnTsJ9aGIVGAACAGl9WY4cddohu3brFnDlz8n4a27h48eJG17z99tt55tR1jXtMYyLTjKprbwAAANR4YPzLX/6SxzD27t077w8ePDiWLFkSM2fObLjmoYceijVr1sT++++/qYsDAADApuqSmtZLrG8tTObOnRuzZs3KYxDTNmHChBg2bFhuLXzppZfiS1/6UnzoQx+KoUOH5usHDRqUxzmeeeaZceONN8Zbb70Vo0aNyl1ZzZAKAABQwy2MzzzzTHzkIx/JW5LGFqZ/jx8/Ptq0aRPPPvts/PM//3PstNNOMXLkyNhnn33iN7/5Te5WWu/222+PgQMH5jGNaTmNgw46KG666aam/coAAADYvC2MhxxySFQqlXWev//++9/3NVJL5OTJk9f3rQEAANiSxjACAABQmwRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAigRGAAAAiurKhwEAeD+jF42rdhEiJneOZueUO6pdAqCJaGEEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgqK58GGDLNHLSjGjpRi9aUu0iAAA1QgsjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAATRMYf/3rX8exxx4bffr0iVatWsVdd93V6HylUonx48dH7969o0OHDjFkyJB48cUXG13z6quvxqmnnhodO3aMzp07x8iRI2P58uXrWxQAAACaU2B8/fXXY88994zrrruueP6qq66K7373u3HjjTfGU089Fdtss00MHTo0VqxY0XBNCovPP/98PPDAAzF16tQcQs8666yN+0oAAABoUnXr+4QjjzwybyWpdfE73/lOjBs3Lo477rh87LbbbouePXvmlsiTTz45fv/738d9990XM2bMiH333Tdfc+2118ZRRx0VV199dW65BAAAYAsbwzh37txYuHBh7oZar1OnTrH//vvH9OnT8356TN1Q68Nikq5v3bp1bpEsWblyZSxbtqzRBgAAQA0FxhQWk9SiuLa0X38uPfbo0aPR+bq6uujSpUvDNe80ceLEHDzrt759+zZlsQEAAKjVWVLHjh0bS5cubdjmz59f7SIBAABs8Zo0MPbq1Ss/Llq0qNHxtF9/Lj0uXry40fm33347z5xaf807tW/fPs+ouvYGAABADQXGAQMG5NA3bdq0hmNpvGEamzh48OC8nx6XLFkSM2fObLjmoYceijVr1uSxjgAAANToLKlpvcQ5c+Y0muhm1qxZeQxiv3794rzzzouvfe1r8eEPfzgHyEsvvTTPfHr88cfn6wcNGhRHHHFEnHnmmXnpjbfeeitGjRqVZ1A1QyoAAEANB8ZnnnkmPv7xjzfsjxkzJj8OHz48Jk2aFF/60pfyWo1pXcXUknjQQQflZTS22mqrhufcfvvtOSQedthheXbUYcOG5bUbAQAAaD5aVdLiiTUmdXNNs6WmCXCMZwTWx8hJM6KlG71oXLWLADShvfp2jmbnlDuqXQKgiTJVTcySCgAAwOYnMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFAkMAIAAFBUVz4MsAWYfNK7Do1etKQqRQEAqEVaGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACgSGAEAACiyDiMAQA2bNb/5rS977aQZ0dzdPGK/ahcBaoIWRgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIoERgAAAIrqyocBAGDDjF40Lpq9yZ03/3uecsfmf0/YSFoYAQAAKBIYAQAAKBIYAQAAKBIYAQAAKBIYAQAAKBIYAQAAKBIYAQAAKBIYAQAA2DyB8bLLLotWrVo12gYOHNhwfsWKFXHuuedG165dY9ttt41hw4bFokWLmroYAAAANMcWxl133TUWLFjQsD322GMN584///y455574s4774xHH300Xn755TjxxBM3RTEAAADYCHWb5EXr6qJXr17vOr506dK4+eabY/LkyXHooYfmY7fccksMGjQonnzyyTjggAM2RXEAAABoLi2ML774YvTp0yd22GGHOPXUU2PevHn5+MyZM+Ott96KIUOGNFybuqv269cvpk+fvs7XW7lyZSxbtqzRBgAAQI0Fxv333z8mTZoU9913X9xwww0xd+7c+NjHPhavvfZaLFy4MNq1axedO3du9JyePXvmc+syceLE6NSpU8PWt2/fpi42AAAAm7pL6pFHHtnw7z322CMHyP79+8dPfvKT6NChwwa95tixY2PMmDEN+6mFUWgEAACo8WU1UmviTjvtFHPmzMnjGletWhVLlixpdE2aJbU05rFe+/bto2PHjo02AAAAajwwLl++PF566aXo3bt37LPPPtG2bduYNm1aw/nZs2fnMY6DBw/e1EUBAACgml1SL7zwwjj22GNzN9S0ZMZXv/rVaNOmTXzmM5/J4w9HjhyZu5d26dIltxSOHj06h0UzpAIAAGzhgfEvf/lLDoevvPJKdO/ePQ466KC8ZEb6d3LNNddE69atY9iwYXn206FDh8b111/f1MUAAABgI7WqVCqVqDFp0pvUWpnWdTSeEVinySe969Cs+Y3HUAPQMu3Vt/Gs/ZvFKXds/veEjcxUm3wMIwAAALWpybukApvPyEkzql2EZm30Iq2JAAAbQwsjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARXXlw0AtGb1oXLWLAADAFkgLIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEUCIwAAAEV15cMAAECTmnxStUvQfJ1yR7VLwDpoYQQAAKBIYAQAAKBIYAQAAKDIGEYAAFqcWfOXVLsINWevvp2rXQSqQAsjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARQIjAAAARXXlw9AMTT6p2iVodkYvWlLtIgAAsAXTwggAAECRFkYAAOB9zZq/6Xo2XTtpRmypbh6xX9QyLYwAAAAUCYwAAAAUCYwAAAAUGcPYHJkNFAAAaAa0MAIAAFCkhZEtftYtAACgBgPjddddF9/61rdi4cKFseeee8a1114b//iP/xi1buRGTgtsMXYAAKBFd0m94447YsyYMfHVr341/vM//zMHxqFDh8bixYurVSQAAACaQwvjt7/97TjzzDPj9NNPz/s33nhj3HvvvfGDH/wgLrnkkkbXrly5Mm/1li5dmh+XLVsWzdGqN5dv1POXr3i7ycoCAADN3cb+/tycLWummaW+XJVK5T2va1V5vys2gVWrVsXWW28dP/3pT+P4449vOD58+PBYsmRJ3H333Y2uv+yyy2LChAmbu5gAAABbtPnz58f222/fvFoY//a3v8Xq1aujZ8+ejY6n/T/84Q/vun7s2LG5+2q9NWvWxKuvvhpdu3aNVq1aNVnC7tu3b/6GdezYsUlek01HfdUOdVVb1FftUFe1Q13VFvVVO9TVxknthq+99lr06dOn9mdJbd++fd7W1rlz503yXul/Nv/D1Q71VTvUVW1RX7VDXdUOdVVb1FftUFcbrlOnTs1z0ptu3bpFmzZtYtGiRY2Op/1evXpVo0gAAAA0h8DYrl272GeffWLatGmNupmm/cGDB1ejSAAAADSXLqlpTGKa5GbffffNay9+5zvfiddff71h1tTNLXV5TUt8vLPrK82T+qod6qq2qK/aoa5qh7qqLeqrdqirzaMqs6TW+973vhff+ta3YuHChbHXXnvFd7/73dh///2rVRwAAACaS2AEAACg+arKGEYAAACaP4ERAACAIoERAACAIoERAACAohYXGC+77LJo1apVo23gwIEN51esWBHnnntudO3aNbbddtsYNmxYLFq0qKplbqner64OOeSQd50/++yzq1rmlux//ud/4rOf/Wz+7HTo0CF23333eOaZZxrOp/m1xo8fH717987nhwwZEi+++GJVy9ySvV99jRgx4l2fryOOOKKqZW6JPvjBD76rHtKW7lOJe1Zt1Zf7VvOxevXquPTSS2PAgAH5Z+COO+4YV1xxRb5X1XPfqp26cs/aQtdhrKZdd901HnzwwYb9urr//204//zz4957740777wzOnXqFKNGjYoTTzwxHn/88SqVtmV7r7pKzjzzzLj88ssb9rfeeuvNWj7+r//93/+NAw88MD7+8Y/HL3/5y+jevXu+qX7gAx9ouOaqq67KS+fceuut+Yd++uE/dOjQeOGFF2Krrbaqavlbmr+nvpJ0s73lllsa9q1ztfnNmDEj/7JU77nnnotPfOIT8alPfSrvu2fVVn0l7lvNw5VXXhk33HBDviel3zXSH8zSWuDpc/Sv//qv+Rr3rdqpq8Q9a9NpkYExhY5evXq96/jSpUvj5ptvjsmTJ8ehhx6aj6X/8QYNGhRPPvlkHHDAAVUobcu2rrpa+0b7XufZfD/M+/bt2+gHdbq51kt/BfzOd74T48aNi+OOOy4fu+2226Jnz55x1113xcknn1yVcrdU71dfa99sfb6qK4X5tX3zm9/Mf13/p3/6J/esGquveu5bzcMTTzyR70dHH310Q+vwj370o3j66afzvvtW7dRVPfesTafFdUlN0l/S+/TpEzvssEOceuqpMW/evHx85syZ8dZbb+UuB/VSF8h+/frF9OnTq1jilmtddVXv9ttvj27dusVuu+0WY8eOjTfeeKNqZW3Jfv7zn8e+++6b/4reo0eP+MhHPhLf//73G87PnTs3Fi5c2Oizlf4yuP/++/tsNcP6qvfII4/k8zvvvHOcc8458corr1SlvPxfq1atih/+8Idxxhln5O5W7lm1VV/13Leah49+9KMxbdq0+OMf/5j3f/vb38Zjjz0WRx55ZN5336qduqrnnrXptLgWxvRBnzRpUv6facGCBTFhwoT42Mc+lruNpB8M7dq1i86dOzd6TvprUjpH86mr7bbbLk455ZTo379/DpTPPvtsXHzxxTF79uyYMmVKtYve4vz3f/937i4yZsyY+PKXv5y7ZaVuIunzNHz48IbPT/osrc1nq3nWV33XntS1MbU8vvTSS/m6dHNOvyi1adOm2l9Ci5RaNZYsWZLH6iTuWbVVX4n7VvNxySWXxLJly/IfWdLPtNSV+Otf/3r+43TivlU7dZW4Z21aLS4wrv3XiD322COHkvTD+yc/+UkeSEtt1NXIkSPjrLPOajifJuxIg9IPO+yw/IMidQFi81mzZk1usfrGN76R91OLVQr2N954Y0MAobbqa+3uVunzlT6D6XOV/oKbPmdsfqn7afq5mMIGtVlf7lvNR/pdIrX2pi7daVzcrFmz4rzzzsv15b5Ve3XlnrVptcguqWtLf5ndaaedYs6cObnfc+pCkv4iuLY045w+0c2rrkpSoEzWdZ5NJ/3Ss8suuzQ6lsZR1Xchrv/8vHP2Rp+t5llfJalbeOpG5/NVHX/+85/zBGCf//znG465Z9VWfZW4b1XPRRddlFuuUtBIAeO0007Lk0hNnDgxn3ffqp26KnHPalotPjAuX748/2Uv/QK1zz77RNu2bXM/6Xqpq0j6JWrw4MFVLSeN66ok/cUpWdd5Np0042b6rKwtjTVILcJJ6iKSbrBrf7ZS95KnnnrKZ6sZ1lfJX/7ylzwexOerOtJkNmlsTv2kD4l7Vm3VV4n7VvWksaOtWzf+NTh1XUw9MBL3rdqpqxL3rCZWaWEuuOCCyiOPPFKZO3du5fHHH68MGTKk0q1bt8rixYvz+bPPPrvSr1+/ykMPPVR55plnKoMHD84bzauu5syZU7n88stzHaXzd999d2WHHXaoHHzwwdUudov09NNPV+rq6ipf//rXKy+++GLl9ttvr2y99daVH/7whw3XfPOb36x07tw519Wzzz5bOe644yoDBgyovPnmm1Ute0v0fvX12muvVS688MLK9OnT8+frwQcfrOy9996VD3/4w5UVK1ZUu/gtzurVq/N96eKLL37XOfes2qkv963mZfjw4ZV/+Id/qEydOjXXx5QpU/LvGF/60pcarnHfqo26cs/a9FpcYDzppJMqvXv3rrRr1y7/z5f20w/xeumHwL/8y79UPvCBD+RfoE444YTKggULqlrmluq96mrevHn5JtulS5dK+/btKx/60IcqF110UWXp0qXVLnaLdc8991R22223XB8DBw6s3HTTTY3Or1mzpnLppZdWevbsma857LDDKrNnz65aeVu696qvN954o3L44YdXunfvXmnbtm2lf//+lTPPPLOycOHCqpa5pbr//vvT6tTFz4t7Vu3Ul/tW87Js2bLKF7/4xRzut9pqqxzev/KVr1RWrlzZcI37Vm3UlXvWptcq/aepWy0BAACofS1+DCMAAABlAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABFAiMAAABR8n8AH/e2vUe9KYgAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import torch\n", + "from leaspy.io.logs.visualization import Plotter\n", + "from leaspy.io.outputs import Result\n", + "\n", + "n_individuals = 2000\n", + "patient_ids = [str(i) for i in range(n_individuals)]\n", + "\n", + "df_longitudinal = pd.DataFrame({'ID': np.repeat(patient_ids, 3), 'TIME': np.tile([60, 70, 80], n_individuals), 'Y0': np.random.rand(n_individuals * 3)})\n", + "data = Data.from_dataframe(df_longitudinal)\n", + "\n", + "df_cofactors = pd.DataFrame({'gender': np.random.choice(['Male', 'Female'], size=n_individuals)}, index=patient_ids)\n", + "df_cofactors.index.name = 'ID'\n", + "data.load_cofactors(df_cofactors, cofactors=['gender'])\n", + "\n", + "individual_parameters = {'tau': torch.tensor(np.random.normal(70, 5, (n_individuals, 1))), 'xi': torch.tensor(np.random.normal(0, 0.5, (n_individuals, 1)))}\n", + "result_obj = Result(data, individual_parameters)\n", + "\n", + "Plotter().plot_distribution(result_obj, parameter='tau', cofactor='gender')" + ] + }, + { + "cell_type": "markdown", + "id": "d35263b9", + "metadata": {}, + "source": [ + "It is important to note that attaching external data to the class through `data.load_cofactors` is different from loading cofactors inside the model using `factory_kws`:\n", + "\n", + "| Feature | **Covariates** (via `factory_kws`) | **Cofactors** (via `load_cofactors`) |\n", + "| :--- | :--- | :--- |\n", + "| **Purpose** | Used **inside the model** to modulate parameters (e.g., in `CovariateLogisticModel`). | Used for **analysis/metadata** (e.g., plotting, stratification) but ignored by the model's math. |\n", + "| **Loading** | Loaded **during** `Data` creation. | Loaded **after** `Data` creation. |\n", + "| **Storage** | Stored as a `numpy.ndarray` in `individual.covariates`. | Stored as a `dict` in `individual.cofactors`. |\n", + "| **Constraints** | Must be **integers**, constant per individual, and have no missing values. | Can be any type (strings, floats, etc.). |\n" + ] + }, + { + "cell_type": "markdown", + "id": "689f4849", + "metadata": {}, + "source": [ + "### **Best Practice:**\n", + "Always create a `Data` object first. It validates your input and handles irregularities (missing visits, different timelines) gracefully.\n", + "\n", + "---\n", + "\n", + "## 2. The `Dataset` Class: The Internal Engine\n", + "\n", + "The `Dataset` class is the **high-performance numerical engine**. It converts the flexible `Data` object into rigid PyTorch Tensors optimized for mathematical computation.\n", + "\n", + "### What it does\n", + "* **Tensorization**: Converts all values to PyTorch tensors.\n", + "* **Padding**: Standardizes patient timelines by padding them to the maximum number of visits (creating a rectangular data block).\n", + "* **Masking**: Creates a binary mask to distinguish real data from padding.\n", + "\n", + "### When to use it explicitly?\n", + "You rarely need to instantiate `Dataset` yourself. However, it is useful for **optimization**:\n", + "1. **Memory Efficiency**: For massive datasets, convert `Data` $\\to$ `Dataset` and delete the original `Data`/`DataFrame` to free up RAM.\n", + "2. **Performance**: If you are running multiple models on the same data, creating a `Dataset` once prevents `leaspy` from repeating the conversion process for every `fit()` call.\n", + "\n", + "---\n", + "\n", + "## 3. Workflow & Best Practices\n", + "\n", + "### The Standard Workflow\n", + "The most common and recommended workflow is straightforward:\n", + "\n", + "```\n", + "[CSV / DataFrame] -> Data.from_dataframe() -> [Data Object] -> model.fit(data)\n", + "```\n", + "\n", + "Inside `model.fit(data)`, Leaspy automatically converts your `Data` object into a `Dataset` for computation.\n", + "\n", + "### Guidelines for `model.fit()`\n", + "\n", + "| Input Type | Verdict | Reason |\n", + "| :--- | :--- | :--- |\n", + "| **`Data` Object** | ✅ **Recommended** | **Safe & Standard.** Handles all model types (including Joint models) correctly. Easy to inspect before fitting. |\n", + "| **`Dataset` Object** | ⚡ **Optimization** | **Fast.** Use for heavy datasets or repeated experiments to skip internal conversion steps. |\n", + "| **`pd.DataFrame`** | ❌ **Avoid** | **Risky.** Fails for complex models (e.g., `JointModel`) that require specific loading parameters. Leads to inconsistent code. |" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "leaspy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/index.md b/docs/index.md index e3d229003..a3007c7c9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -74,10 +74,18 @@ auto_examples/index :maxdepth: 3 user_guide +reference/api/index +changelog +references +``` + +```{toctree} + :hidden: + :caption: More + :maxdepth: 2 + glossary notations -references -changelog to_go_further license ``` diff --git a/docs/mathematics.md b/docs/mathematics.md index da8073f1a..0ee966748 100644 --- a/docs/mathematics.md +++ b/docs/mathematics.md @@ -2,14 +2,14 @@ ## Nonlinear mixed effect models -Mixed effects models have become a powerful and widely used statistical tool for analyzing longitudinal data, or more specifically repeated measurements, especially in the context of biomedical and epidemiological research. These models are particularly suited for studying neurodegenerative and brain diseases where patients are followed over time, and multiple clinical characteristics are measured repeatedly. Moreover these diseases have been known to be heterogeneous with subtypes that are often difficult to characterize. +Mixed effects models have become a powerful and widely used statistical tool for analyzing longitudinal, or more specifically repeated measurements, data, especially in the context of biomedical and epidemiological research. These models are particularly suited for studying neurodegenerative and brain diseases where patients are followed over time, and multiple clinical characteristics are measured repeatedly. Moreover these diseases have been known to be heterogenous with subtypes that are often difficult ot characterize. One of the key strengths of mixed-effects models lies in their ability to capture two types of variability: - **Within-subject variability (fixed effects)** - This reflects systematic influences that affect all patients in similar ways, such as the effect of treatment, demographic factors, or known disease subtypes. Fixed effects represent consistent trends or shifts in the data that apply across the **population** or across specific groups. + This reflects systematic influences that affect all patients in similar ways, such as the effect of treatment, demographic factors, or known disease subtypes. Fixed effects represent consistent trends or shifts in the data that apply across the **population** or across specific groups. - **Between-subject variability (random effects)** - This accounts for differences among individual patients. For example, even if patients share the same diagnosis, their disease progression rates and onset time can vary considerably. By incorporating random effects, the model can capture this heterogeneity, allowing for personalized trajectory estimates at an **individual** level. + This accounts for differences among individual patients. For example, even if patients share the same diagnosis, their disease progression rates and onset time can vary considerably. By incorporating random effects, the model can capture this heterogeneity, allowing for personalized trajectory estimates at an **individual** level. With a mixed effects model, we can estimate the long-term progression of a disease by reconstructing individual patient trajectories over time, even when measurements are irregularly spaced or partially missing. This is crucial for understanding the natural history of a disease, identifying typical progression patterns, and recognizing outliers. Importantly, understanding how a disease evolves over time and quantifying the expected variability between individuals have significant clinical and research implications. Clinically, it supports personalized medicine approaches, tailoring treatment plans based on predicted progression. From a research standpoint, it helps identify factors driving heterogeneity in disease progression, which can inform new hypotheses about disease mechanisms or guide the development of targeted therapies. @@ -17,11 +17,11 @@ Brain and neurodegenerative diseases typically do not progress at a constant rat ### Parameter estimation -We suppose that the parameters capturing the population and the individual variability, namely fixed and random effects respectively, are latent variables ($z$) that follow some prior distributions. The parameters that define these distributions are called model parameters ($\theta$). We can also include hyperparmeters ($\Pi$) that have known values. +We suppose that the parameters capturing the population and the individual variability, namely fixed and random effects respectively, are latent ($z$) follow some prior distributions. The parameters that define these distributions are called namely model parameters ($\theta$). We can also include hyperparmeters $\Pi$ that have known values. -Estimating the parameters of the models can be done through maximization of the likelihood. +Estimating the parameters of the models can be done through maximisation of the likelihood. -Assuming $y$ correspond to the observed data, $z$ the latent parameters, $\theta$ the model parameters and $\Pi$ the hyperparameters, the likelihood estimated by the model is the following: +Assuming $y$ correspond to the observed data, $z$ the latent parameters, $\theta$ the model parameters and $\Pi$ the hyperparameters the likelihood estimated by the model is the following: $$ p(y \mid \theta, \Pi) = \int_{z} p(y, z \mid \theta, \Pi) dz @@ -33,7 +33,7 @@ $$ \log p(y, z \mid \theta ,\Pi) = \log {p(y \mid z, \theta, \Pi)} + \log p(z \mid \theta , \Pi ) $$ -The prior attachment term can be separated into two terms: two terms for the prior attachment of latent parameters (fixed and random). We end up with the following expression : +The prior attachment term can be separated into two terms: two terms for the prior attachment of latent parameters (fixed and random). We end up with the following expression : $$ \log p(y, z \mid \theta, \Pi) = \log {p(y \mid z, \theta, \Pi)} + \log p(z_{re} \mid z_{fe}, \theta, \Pi) + \log p(z_{fe} \mid \theta, \Pi) diff --git a/examples/plot_01_quickstart.py b/examples/plot_01_quickstart.py index b3df2a4c4..a24683e23 100644 --- a/examples/plot_01_quickstart.py +++ b/examples/plot_01_quickstart.py @@ -24,25 +24,31 @@ # %% -# ```{warning} -# You **MUST** include both `ID` and `TIME`, either as indices or as columns. -# The remaining columns should correspond to the observed variables -# (also called features or endpoints). -# Each feature should have its own column, and each visit should occupy one row. -# ``` +# .. warning:: +# +# You **MUST** include both ``ID`` and ``TIME``, either as indices or as columns. +# The remaining columns should correspond to the observed variables +# (also called features or endpoints). +# Each feature should have its own column, and each visit should occupy one row. # %% -# ```{warning} -# - Leaspy supports *linear* and *logistic* models. -# - The features **MUST** be increasing over time. -# - For logistic models, data must be rescaled between 0 and 1. -# ``` +# .. warning:: +# +# - Leaspy supports *linear* and *logistic* models. +# - The features **MUST** be increasing over time. +# - For logistic models, data must be rescaled between 0 and 1. -from leaspy.io.data import Data, Dataset +from leaspy.io.data import Data data = Data.from_dataframe(alzheimer_df) -dataset = Dataset(data) + +# %% +# .. seealso:: +# +# For a deeper understanding of the ``Data`` and ``Dataset`` classes, including +# iteration, cofactors, and best practices, refer to the Data Containers Guide +# in the documentation. # %% # The core functionality of Leaspy is to estimate the group-average trajectory @@ -53,25 +59,40 @@ model = LogisticModel(name="test-model", source_dimension=2) model.fit( - dataset, + data, "mcmc_saem", seed=42, n_iter=100, progress_bar=False, ) +model.summary() # %% +# The ``fit`` method estimates the parameters of the model, which are then accessible +# through the ``summary`` method. The parameters are also stored in the ``parameters`` attribute of the model. + +model.info() + +# %% +# The method ``info`` provides the model configuration and the settings used for the fit, +# as well as the dataset information and the training information. +# # Leaspy can also estimate the *individual trajectories* of each participant. # This is done using a personalization algorithm, here `scipy_minimize`: individual_parameters = model.personalize( - dataset, "scipy_minimize", seed=0, progress_bar=False + data, "scipy_minimize", seed=0, progress_bar=False, use_jacobian=False ) print(individual_parameters.to_dataframe()) +# %% +# We have seen how to fit a model and personalize it to individuals. +# Leaspy also provides various plotting functions to visualize the results. +# Let's go to the next :doc:`section ` to see how to plot +# the group-average trajectory and the individual trajectories using the Parkinson's disease dataset. # %% -# To go further; +# To go further: # -# 1. See the [User Guide](../user_guide.md) and full API documentation. -# 2. Explore additional [examples](./index.rst). +# 1. See the :doc:`User Guide <../user_guide>` and full API documentation. +# 2. Explore additional :doc:`examples <./index>`. diff --git a/examples/plot_02_parkinson_example.py b/examples/plot_02_parkinson_example.py index f32a3b1ab..7e4282bd4 100644 --- a/examples/plot_02_parkinson_example.py +++ b/examples/plot_02_parkinson_example.py @@ -44,7 +44,6 @@ # %% # Visualization utilities from Leaspy and Matplotlib are imported. import matplotlib.pyplot as plt - from leaspy.io.logs.visualization.plotting import Plotting leaspy_plotting = Plotting(model) @@ -79,7 +78,7 @@ # %% # Individual parameters are obtained for the test data using the personalization step. -ip = model.personalize(data_test, "scipy_minimize", seed=0, progress_bar=False) +ip = model.personalize(data_test, "scipy_minimize", seed=0, progress_bar=False, use_jacobian=False) # %% # The test data with individually re-parametrized ages is plotted below. @@ -116,14 +115,13 @@ ip, patients_idx=["GS-187"], labels=["MDS1", "MDS2", "MDS3 (off)"], - alpha=1, - linestyle="-", - linewidth=2, - markersize=8, - obs_alpha=0.5, figsize=(16, 6), - factor_past=0.5, factor_future=5, ) ax.set_xlim(45, 120) plt.show() + +# %% +# This concludes the Parkinson's disease progression modeling example using Leaspy. +# Leaspy is also capable of handling various other types of models, as the Joint Models, +# which will be explored in the [next section](./plot_03_joint). \ No newline at end of file diff --git a/examples/plot_03_joint.py b/examples/plot_03_joint.py index 2a436fb7d..dedbd37f8 100644 --- a/examples/plot_03_joint.py +++ b/examples/plot_03_joint.py @@ -7,14 +7,11 @@ # %% # The following imports are required libraries for numerical computation and data manipulation. import os - import pandas as pd - import leaspy from leaspy.io.data import Data leaspy_root = os.path.dirname(leaspy.__file__) - data_path = os.path.join(leaspy_root, "datasets/data/simulated_data_for_joint.csv") df = pd.read_csv(data_path, dtype={"ID": str}, sep=";") @@ -39,7 +36,7 @@ from leaspy.models import JointModel data = Data.from_dataframe(df, "joint") -model = JointModel(name="test_model", nb_events=1) +model = JointModel(name="test_model", nb_events=1, source_dimension=2) # %% # The parameter `nb_events` should match the number of distinct event types @@ -51,8 +48,17 @@ # Once the model is initialized, we can fit it to the data. model.fit(data, "mcmc_saem", seed=1312, n_iter=100, progress_bar=False) +model.summary() + +# %% +# We can also access the model information and parameters after fitting it to the data. +model.info() # %% # The Joint Model includes specific parameters such as `log_rho_mean` and `zeta_mean`. print(model.parameters) + +# %% +# We have seend how to fit a Joint Model using Leaspy. It also provides other models as the +# [Mixture Model](./plot_04_mixture) that can be explored in the next examples. \ No newline at end of file diff --git a/examples/plot_04_mixture.py b/examples/plot_04_mixture.py index ddaa730aa..8ef5ea3e5 100644 --- a/examples/plot_04_mixture.py +++ b/examples/plot_04_mixture.py @@ -8,21 +8,18 @@ # %% # The following imports are required libraries for numerical computation, data manipulation, and visualization. import os - import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch - import leaspy -from leaspy.io.data import Data, Dataset +from leaspy.io.data import Data # %% # This toy example is part of a simulation study, carried out by Sofia Kaisaridi that will be included in # an article to be submitted in a biostatistics journal. The dataset contains 1000 individuals each with 6 visits and 6 scores. leaspy_root = os.path.dirname(leaspy.__file__) - data_path = os.path.join(leaspy_root, "datasets/data/simulated_data_for_mixture.csv") all_data = pd.read_csv(data_path, sep=";", decimal=",") @@ -34,7 +31,6 @@ from leaspy.models import LogisticMultivariateMixtureModel leaspy_data = Data.from_dataframe(all_data) -leaspy_dataset = Dataset(leaspy_data) # %% # Then we fit a model with 3 clusters and 2 sources. Note that we have an extra argument `n_clusters` than the @@ -48,7 +44,8 @@ obs_models="gaussian-diagonal", ) -model.fit(leaspy_dataset, "mcmc_saem", seed=1312, n_iter=100, progress_bar=False) +model.fit(leaspy_data, "mcmc_saem", seed=1312, n_iter=100, progress_bar=False) +model.summary() # %% # First we take a look in the population parameters. @@ -63,7 +60,6 @@ from torch.distributions import Normal - def get_ip(df_leaspy, model): """ leaspy_data : the dataframe with the correct indexing @@ -295,3 +291,10 @@ def get_ip(df_leaspy, model): plt.suptitle("Population progression", fontsize=18) plt.tight_layout() plt.show() + +# %% +# This concludes the Mixture Model example using Leaspy. We can also use these fit models to +# simulate new data according to the estimated parameters. This can be useful for +# validating the model, for generating synthetic datasets for further analysis or for +# generate a trajectory for a new individual given specific parameters. Let's check this +# in the [next example](./plot_05_simulate.py). diff --git a/examples/plot_05_simulate.py b/examples/plot_05_simulate.py index a9ae3ef59..6ba32858a 100644 --- a/examples/plot_05_simulate.py +++ b/examples/plot_05_simulate.py @@ -91,4 +91,7 @@ # %% # The simulated longitudinal dataset is displayed below. -print(df_sim) +df_sim.head(10) + +# %% +# This concludes the simulation example using Leaspy. Stay tuned for more examples on model fitting and analysis! \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 04570b094..039825755 100644 --- a/poetry.lock +++ b/poetry.lock @@ -501,7 +501,7 @@ description = "Python library for calculating contours of 2D quadrilateral grids optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934"}, {file = "contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989"}, @@ -793,7 +793,7 @@ description = "A platform independent file lock." optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"}, {file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"}, @@ -888,7 +888,7 @@ description = "Tools to manipulate font files" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "fonttools-4.61.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c7db70d57e5e1089a274cbb2b1fd635c9a24de809a231b154965d415d6c6d24"}, {file = "fonttools-4.61.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5fe9fd43882620017add5eabb781ebfbc6998ee49b35bd7f8f79af1f9f99a958"}, @@ -1272,7 +1272,7 @@ description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.11" groups = ["docs"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "ipython-9.2.0-py3-none-any.whl", hash = "sha256:fef5e33c4a1ae0759e0bba5917c9db4eb8c53fee917b6a526bd973e1ca5159f6"}, {file = "ipython-9.2.0.tar.gz", hash = "sha256:62a9373dbc12f28f9feaf4700d052195bf89806279fc8ca11f3f54017d04751b"}, @@ -1306,7 +1306,7 @@ description = "Defines a variety of Pygments lexers for highlighting IPython cod optional = false python-versions = ">=3.8" groups = ["docs"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"}, {file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"}, @@ -1607,7 +1607,7 @@ description = "A fast implementation of the Cassowary constraint solver" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88c6f252f6816a73b1f8c904f7bbe02fd67c09a69f7cb8a0eecdbf5ce78e63db"}, {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72941acb7b67138f35b879bbe85be0f6c6a70cab78fe3ef6db9c024d9223e5b"}, @@ -1894,7 +1894,7 @@ description = "Python plotting package" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "matplotlib-3.10.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ff2ae14910be903f4a24afdbb6d7d3a6c44da210fc7d42790b87aeac92238a16"}, {file = "matplotlib-3.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0721a3fd3d5756ed593220a8b86808a36c5031fce489adb5b31ee6dbb47dd5b2"}, @@ -2075,7 +2075,7 @@ description = "An extended [CommonMark](https://spec.commonmark.org/) compliant optional = false python-versions = ">=3.10" groups = ["docs"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"}, {file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"}, @@ -2180,7 +2180,7 @@ description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, @@ -2766,7 +2766,7 @@ description = "Python Imaging Library (fork)" optional = false python-versions = ">=3.10" groups = ["main", "docs"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "pillow-12.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:fb125d860738a09d363a88daa0f59c4533529a90e564785e20fe875b200b6dbd"}, {file = "pillow-12.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cad302dc10fac357d3467a74a9561c90609768a6f73a1923b0fd851b6486f8b0"}, @@ -3626,7 +3626,7 @@ description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" +markers = "python_version >= \"3.11\"" files = [ {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"}, {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"}, @@ -3695,7 +3695,7 @@ files = [ {file = "setuptools-80.3.1-py3-none-any.whl", hash = "sha256:ea8e00d7992054c4c592aeb892f6ad51fe1b4d90cc6947cc45c45717c40ec537"}, {file = "setuptools-80.3.1.tar.gz", hash = "sha256:31e2c58dbb67c99c289f51c16d899afedae292b978f8051efaf6262d8212f927"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version == \"3.12\" or python_full_version == \"3.13.0\"", docs = "python_version == \"3.12\" or python_full_version == \"3.13.0\""} +markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\"", docs = "python_version >= \"3.12\""} [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] @@ -3821,6 +3821,25 @@ docs = ["furo (>=2024.1.29)"] numpy = ["nptyping (>=2.5)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.4.4)", "defusedxml (>=0.7.1)", "diff-cover (>=9)", "pytest (>=8.1.1)", "pytest-cov (>=5)", "sphobjinv (>=2.3.1)", "typing-extensions (>=4.11)"] +[[package]] +name = "sphinx-copybutton" +version = "0.5.2" +description = "Add a copy button to each of your code cells." +optional = false +python-versions = ">=3.7" +groups = ["docs"] +files = [ + {file = "sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd"}, + {file = "sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e"}, +] + +[package.dependencies] +sphinx = ">=1.8" + +[package.extras] +code-style = ["pre-commit (==2.12.1)"] +rtd = ["ipython", "myst-nb", "sphinx", "sphinx-book-theme", "sphinx-examples"] + [[package]] name = "sphinx-gallery" version = "0.19.0" @@ -4583,5 +4602,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" -python-versions = ">=3.9,<=3.13" -content-hash = "ed8e866b46609095efa90cb8c48eb33df5cbb69947eb0c9c941c0278d7346633" +python-versions = ">=3.9,<3.14" +content-hash = "b56744a4283022f6f0f7c21fa58284b73750d4233834ff907ffbc63fcbfb522c" diff --git a/pyproject.toml b/pyproject.toml index 479236a5b..c1e3fa8aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ maintainers = [ ] readme = "README.md" packages = [{include = "leaspy", from = "src"}] -requires-python = ">=3.9,<=3.13" +requires-python = ">=3.9,<3.14" keywords = [ "leaspy", "longitudinal", @@ -72,6 +72,7 @@ sphinx-autoapi = "*" myst-nb = "*" sphinxcontrib-bibtex = "*" pydata-sphinx-theme = "^0.16.1" +sphinx-copybutton = "*" # Security fixes - prevent downgrades urllib3 = ">=2.6.3" requests = ">=2.32.5" diff --git a/src/leaspy/algo/algo_with_device.py b/src/leaspy/algo/algo_with_device.py index b0566f2de..e0397d8b1 100644 --- a/src/leaspy/algo/algo_with_device.py +++ b/src/leaspy/algo/algo_with_device.py @@ -1,4 +1,5 @@ import contextlib +import warnings import torch @@ -57,10 +58,14 @@ def _device_manager(self, model: McmcSaemCompatibleModel, dataset: Dataset): algorithm_tensor_type = "torch.cuda.FloatTensor" try: - yield torch.set_default_tensor_type(algorithm_tensor_type) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*torch.set_default_tensor_type.*") + yield torch.set_default_tensor_type(algorithm_tensor_type) finally: if self.algorithm_device != self._default_algorithm_device.type: model.move_to_device(self._default_algorithm_device) dataset.move_to_device(self._default_algorithm_device) - torch.set_default_tensor_type(self._default_algorithm_tensor_type) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*torch.set_default_tensor_type.*") + torch.set_default_tensor_type(self._default_algorithm_tensor_type) diff --git a/src/leaspy/algo/base.py b/src/leaspy/algo/base.py index d91fabcc6..4a1751454 100644 --- a/src/leaspy/algo/base.py +++ b/src/leaspy/algo/base.py @@ -115,7 +115,7 @@ def _initialize_seed(seed: Optional[int]): np.random.seed(seed) torch.manual_seed(seed) # TODO: use logger instead (level=INFO) - print(f" ==> Setting seed to {seed}") + # print(f" ==> Setting seed to sdfsdf {seed}") # Silenced to reduce verbosity def run( self, model: ModelType, dataset: Optional[Dataset] = None, **kwargs @@ -262,7 +262,7 @@ def load_parameters(self, parameters: dict): self.algo_parameters[k] = v @staticmethod - def _duration_to_str(seconds: float, *, seconds_fmt=".0f") -> str: + def _duration_to_str(seconds: float, *, seconds_fmt=".2f") -> str: """ Convert a float representing computation time in seconds to a string giving time in hour, minutes and seconds ``%h %min %s``. diff --git a/src/leaspy/models/base.py b/src/leaspy/models/base.py index 3818f757b..d1ced52b0 100644 --- a/src/leaspy/models/base.py +++ b/src/leaspy/models/base.py @@ -1,3 +1,4 @@ +import time import warnings from abc import ABC, abstractmethod from enum import Enum @@ -15,6 +16,8 @@ from leaspy.io.outputs.result import Result from leaspy.utils.typing import DictParamsTorch, FeatureType, IDType, KwargsType +from .summary import DatasetInfo, Info, Summary, TrainingInfo + __all__ = [ "InitializationMethod", "BaseModel", @@ -387,6 +390,35 @@ def simulate( """ raise NotImplementedError + def _compute_dataset_statistics(self, dataset: Dataset) -> DatasetInfo: + """Compute descriptive statistics of the dataset used for training.""" + stats = { + "n_subjects": dataset.n_individuals, + "n_scores": dataset.dimension, + "n_visits": dataset.n_visits, + "n_observations": int(dataset.mask.sum().item()), + } + + # Per-subject observations + visits_per_ind = np.array(dataset.n_visits_per_individual) + stats["visits_per_subject"] = { + "median": float(np.median(visits_per_ind)), + "min": int(np.min(visits_per_ind)), + "max": int(np.max(visits_per_ind)), + "iqr": float(np.percentile(visits_per_ind, 75) - np.percentile(visits_per_ind, 25)) + } + + # Missing data (Total possible points - observed points) + total_possible = stats["n_visits"] * stats["n_scores"] + stats["n_missing"] = total_possible - stats["n_observations"] + stats["pct_missing"] = (stats["n_missing"] / total_possible) * 100 if total_possible > 0 else 0.0 + + # Joint model specific + if getattr(dataset, "event_bool", None) is not None: + stats["n_events"] = int(dataset.event_bool.sum().item()) + + return stats + class BaseModel(ModelInterface): """Base model class from which all ``Leaspy`` models should inherit. @@ -402,6 +434,8 @@ def __init__(self, name: str, **kwargs): ) self._features: Optional[list[FeatureType]] = user_provided_features self._dimension: Optional[int] = user_provided_dimension + self.dataset_info: DatasetInfo = {} + self.training_info: TrainingInfo = {} self.initialization_method: InitializationMethod = InitializationMethod.DEFAULT if "initialization_method" in kwargs: self.initialization_method = InitializationMethod( @@ -664,6 +698,8 @@ def to_dict(self, **kwargs) -> KwargsType: "parameters": { k: tensor_to_list(v) for k, v in (self.parameters or {}).items() }, + "dataset_info": self.dataset_info, + "training_info": self.training_info, } @classmethod @@ -692,6 +728,11 @@ def load(cls, path_to_model_settings: Union[str, Path]): reader = ModelSettings(path_to_model_settings) instance = model_factory(reader.name, **reader.hyperparameters) instance.load_parameters(reader.parameters) + + # Load extra info if available + instance.dataset_info = reader.dataset_info + instance.training_info = reader.training_info + instance._is_initialized = True return instance @@ -750,7 +791,153 @@ def fit( ) ) is None: return + + # Compute and store dataset statistics + self.dataset_info = self._compute_dataset_statistics(dataset) + + # Store training metadata (converged captured after run) + self.training_info = { + "algorithm": algorithm.name, + "seed": algorithm.seed, + "n_iter": algorithm.algo_parameters.get("n_iter"), + } + + t0 = time.perf_counter() algorithm.run(self, dataset) + elapsed = time.perf_counter() - t0 + + self.training_info["converged"] = getattr(algorithm, "converged", None) + self.training_info["duration"] = f"{elapsed:.3f}s" + + def info(self) -> Info: + """Return model configuration and training context. + + When called directly (e.g. ``model.info()``), prints the information. + When stored in a variable, provides programmatic access. + + Returns + ------- + :class:`~leaspy.models.summary.Info` + Model configuration and training context. + + Examples + -------- + >>> model.info() # prints info + >>> i = model.info() # store for programmatic access + >>> i.n_subjects # 150 + >>> i.pct_missing # 2.5 + >>> i.help() # list available attributes + """ + # Observation model names + obs_model_names = None + if hasattr(self, "obs_models"): + obs_model_names = [om.to_string() for om in self.obs_models] + + # Parameter count + n_total_params = None + if self.parameters: + n_total_params = sum(p.numel() for p in self.parameters.values()) + + # Leaspy version + try: + from leaspy import __version__ as version + except ImportError: + version = None + + return Info( + name=self.name, + model_type=self.__class__.__name__, + dimension=self.dimension, + features=self.features, + source_dimension=getattr(self, "source_dimension", None), + n_clusters=getattr(self, "n_clusters", None), + obs_models=obs_model_names, + n_total_params=n_total_params, + training_info=dict(self.training_info), + dataset_info=dict(self.dataset_info), + leaspy_version=version, + ) + + def summary(self) -> Summary: + """Generate a structured summary of the model. + + When called directly (e.g., `model.summary()`), prints a formatted summary. + When stored in a variable, provides programmatic access to model attributes. + + Returns + ------- + :class:`Summary` + A structured summary object. + + Raises + ------ + :exc:`.LeaspyModelInputError` + If the model is not initialized or has no parameters. + + Examples + -------- + >>> model.summary() # Prints the summary + >>> s = model.summary() # Store to access attributes + >>> s.nll # Get specific value + >>> s.help() # Show available attributes + """ + if not self.is_initialized: + raise LeaspyModelInputError("Model is not initialized. Call fit() first.") + + if self.parameters is None or len(self.parameters) == 0: + raise LeaspyModelInputError("Model has no parameters. Call fit() first.") + + # Get NLL if available + nll = None + if (fm := getattr(self, "fit_metrics", None)) and (nll_val := fm.get("nll_tot")): + nll = float(nll_val) + + # Get observation model names if available + obs_model_names = None + if hasattr(self, "obs_models"): + obs_model_names = [om.to_string() for om in self.obs_models] + + # Get leaspy version + try: + from leaspy import __version__ as version + except ImportError: + version = None + + # Build parameters dictionary grouped by category + params_by_category = {} + if hasattr(self, "_param_categories"): + cats = self._param_categories + cat_names = { + "population": "Population Parameters", + "individual_priors": "Individual Parameters", + "noise": "Noise Model", + } + for cat_key, display_name in cat_names.items(): + param_names = cats.get(cat_key, []) + if param_names: + params_by_category[display_name] = { + name: self.parameters[name] for name in param_names if name in self.parameters + } + else: + # Fallback: all params under "Parameters" + params_by_category["Parameters"] = dict(self.parameters) + + return Summary( + name=self.name, + model_type=self.__class__.__name__, + dimension=self.dimension, + features=self.features, + source_dimension=getattr(self, "source_dimension", None), + n_clusters=getattr(self, "n_clusters", None), + obs_models=obs_model_names, + nll=nll, + training_info=dict(self.training_info), + dataset_info=dict(self.dataset_info), + parameters=params_by_category, + leaspy_version=version, + _param_axes=getattr(self, "_param_axes", {}), + _feature_names=self.features, + ) @staticmethod def _get_dataset( diff --git a/src/leaspy/models/joint.py b/src/leaspy/models/joint.py index 00bbed136..72bc7008a 100644 --- a/src/leaspy/models/joint.py +++ b/src/leaspy/models/joint.py @@ -50,6 +50,14 @@ class JointModel(LogisticModel): init_tolerance: float = 0.3 + # Extend axis definitions for JointModel-specific parameters + _param_axes = { + **LogisticModel._param_axes, + "n_log_nu_mean": ("event",), + "log_rho_mean": ("event",), + "zeta_mean": ("source", "event"), + } + def __init__(self, name: str, **kwargs): super().__init__(name, **kwargs) self._configure_observation_models() diff --git a/src/leaspy/models/mcmc_saem_compatible.py b/src/leaspy/models/mcmc_saem_compatible.py index 1a01c739b..c4c8a40c7 100644 --- a/src/leaspy/models/mcmc_saem_compatible.py +++ b/src/leaspy/models/mcmc_saem_compatible.py @@ -20,6 +20,7 @@ from .obs_models import ObservationModel from .stateful import StatefulModel +from .summary import get_axis_labels __all__ = ["McmcSaemCompatibleModel"] @@ -60,6 +61,62 @@ class McmcSaemCompatibleModel(StatefulModel): Private instance holding all values for model variables and their derived variables. """ + # Base parameter categories for summary display (override in subclasses) + + _individual_prior_params: tuple[str, ...] = ( + "tau_mean", + "tau_std", + "xi_mean", + "xi_std", + "sources_mean", + "sources_std", + "zeta_mean" + ) + + _noise_params: tuple[str, ...] = ("noise_std",) + + # Explicit axis labels for multi-dimensional parameters + # Maps param_name -> tuple of axis names, e.g., ("feature",) or ("feature", "source") + # Subclasses can extend this with: _param_axes = {**ParentClass._param_axes, "new_param": ("axis",)} + _param_axes: dict[str, tuple[str, ...]] = { + "log_g_mean": ("feature",), + "log_g_std": ("feature",), + "log_v0_mean": ("feature",), + "betas_mean": ("basis", "source"), # basis vectors, not features (dim-1) + "mixing_matrix": ("source", "feature"), + "noise_std": ("feature",), + } + + @property + def _param_categories(self) -> dict[str, list[str]]: + """Categorize parameters for summary display.""" + ind_priors = set(self._individual_prior_params) + noise = set(self._noise_params) + all_params = set(self.parameters.keys()) if self.parameters else set() + pop = all_params - ind_priors - noise + + def sort_key(name: str) -> tuple[int, str, str]: + # Sort by number of columns (ascending), then primary axis, then name + val = self.parameters[name] + axes = self._param_axes.get(name, ()) + primary_axis = axes[0] if axes else "" + + n_cols = 1 + if val.ndim == 1 and axes: + # Check if this axis produces labeled columns + if get_axis_labels(primary_axis, len(val), self.features) is not None: + n_cols = len(val) + elif val.ndim == 2: + n_cols = val.shape[1] + + return (n_cols, primary_axis, name) + + return { + "population": sorted((k for k in pop if k in all_params), key=sort_key), + "individual_priors": sorted((k for k in ind_priors if k in all_params), key=sort_key), + "noise": sorted((k for k in noise if k in all_params), key=sort_key), + } + def __init__( self, name: str, diff --git a/src/leaspy/models/mixture.py b/src/leaspy/models/mixture.py index c95c9285b..8a794d025 100644 --- a/src/leaspy/models/mixture.py +++ b/src/leaspy/models/mixture.py @@ -77,6 +77,23 @@ class TimeReparametrizedMixtureModel(McmcSaemCompatibleModel): _tau_std = 5.0 _noise_std = 0.1 _sources_mean = 0 + + # Override: in mixture models, tau/xi/sources means define cluster centers (population-level) + _individual_prior_params = ("tau_std", "xi_std", "sources_std", "tau_mean", "xi_mean") + _noise_params = ("noise_std",) + + # Extend base _param_axes with mixture-specific cluster-indexed parameters + _param_axes = { + **McmcSaemCompatibleModel._param_axes, + "tau_mean": ("cluster",), + "tau_std": ("cluster",), + "xi_mean": ("cluster",), + "xi_std": ("cluster",), + "sources_mean": ("source", "cluster"), + "sources_std": ("source", "cluster"), + "probs": ("cluster",), + } + _sources_std = 1.0 @property diff --git a/src/leaspy/models/settings.py b/src/leaspy/models/settings.py index 51183e7cb..2b74b2c8f 100644 --- a/src/leaspy/models/settings.py +++ b/src/leaspy/models/settings.py @@ -5,6 +5,8 @@ from leaspy.exceptions import LeaspyModelInputError from leaspy.utils.typing import KwargsType +from .summary import DatasetInfo, TrainingInfo + __all__ = ["ModelSettings"] @@ -50,10 +52,13 @@ def __init__(self, path_to_model_settings_or_dict: Union[str, dict]): ModelSettings._check_settings(settings) self.name: str = settings["name"].lower() self.parameters: KwargsType = settings["parameters"] + self.dataset_info: DatasetInfo = settings.get("dataset_info", {}) + self.training_info: TrainingInfo = settings.get("training_info", {}) + self.hyperparameters: KwargsType = { k.lower(): v for k, v in settings.items() - if k not in ("name", "parameters", "hyperparameters", "leaspy_version") + if k not in ("name", "parameters", "hyperparameters", "leaspy_version", "dataset_info", "training_info") } @staticmethod diff --git a/src/leaspy/models/summary.py b/src/leaspy/models/summary.py new file mode 100644 index 000000000..dd01ff8f5 --- /dev/null +++ b/src/leaspy/models/summary.py @@ -0,0 +1,724 @@ +"""Model inspection objects for programmatic access to model metadata. + +This module provides :class:`Summary` and :class:`Info`, returned by +``model.summary()`` and ``model.info()`` respectively, along with the +:class:`~typing.TypedDict` schemas for training and dataset metadata. + +Both classes auto-print when their return value is discarded (e.g. +``model.summary()``) and stay silent when stored in a variable +(e.g. ``s = model.summary()``). See :class:`AutoPrintMixin` for details. +""" + +import textwrap +from dataclasses import dataclass, field +from typing import Any, Optional, TypedDict + +import torch + +__all__ = [ + "AutoPrintMixin", + "DatasetInfo", + "Info", + "Summary", + "TrainingInfo", + "VisitsPerSubject", + "get_axis_labels", +] + + +# --------------------------------------------------------------------------- +# TypedDict schemas for metadata +# --------------------------------------------------------------------------- + + +class VisitsPerSubject(TypedDict, total=False): + """Per-subject visit distribution statistics.""" + + median: float + min: int + max: int + iqr: float + + +class DatasetInfo(TypedDict, total=False): + """Statistics of the training dataset, computed during ``fit()``.""" + + n_subjects: int + n_scores: int + n_visits: int + n_observations: int + visits_per_subject: VisitsPerSubject + n_missing: int + pct_missing: float + n_events: int + + +class TrainingInfo(TypedDict, total=False): + """Metadata about the training process, captured during ``fit()``.""" + + algorithm: str + seed: int + n_iter: int + converged: bool + duration: str + + +# --------------------------------------------------------------------------- +# Shared utilities +# --------------------------------------------------------------------------- + +# ANSI formatting constants +_WIDTH = 80 +_BOLD = "\033[1m" +_RESET = "\033[0m" + + +def get_axis_labels( + axis_name: Optional[str], + size: int, + feature_names: Optional[list[str]] = None, +) -> Optional[list[str]]: + """Resolve human-readable labels for a parameter axis. + + Parameters + ---------- + axis_name : str or None + Semantic axis name (``"feature"``, ``"source"``, ``"cluster"``, + ``"basis"``). + size : int + Number of elements along the axis. + feature_names : list[str], optional + Feature names used when *axis_name* is ``"feature"``. + + Returns + ------- + list[str] or None + Labels for the axis, or ``None`` if no meaningful labels are available. + """ + if axis_name is None: + return None + + if axis_name == "feature": + if feature_names is not None: + feats = feature_names[:size] + return [f[:8] if len(f) <= 8 else f[:7] + "." for f in feats] + return [f"f{i}" for i in range(size)] + elif axis_name == "source": + return [f"s{i}" for i in range(size)] + elif axis_name == "cluster": + return [f"c{i}" for i in range(size)] + elif axis_name == "event": + return None + elif axis_name == "basis": + return [f"b{i}" for i in range(size)] + else: + return None + + +def _wrap_text(label: str, text: str, indent: int = 0) -> list[str]: + """Wrap *text* with a bold *label* prefix to fit within ``_WIDTH``.""" + prefix = f"{label}: " if label else "" + initial_indent = " " * indent + prefix + subsequent_indent = " " * (indent + 4) + wrapper = textwrap.TextWrapper( + width=_WIDTH, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + break_long_words=False, + break_on_hyphens=False, + ) + return wrapper.wrap(text) + + +# --------------------------------------------------------------------------- +# Auto-print mixin +# --------------------------------------------------------------------------- + + +class AutoPrintMixin: + """Mixin that auto-prints when the object is discarded. + + Relies on CPython reference counting: when the return value of e.g. + ``model.summary()`` is not assigned, the object is immediately + garbage-collected, triggering ``__del__`` which prints it. + + When stored (``s = model.summary()``), any public attribute access + sets ``_printed = True``, suppressing the ``__del__`` output. + + Subclasses must define a ``_printed: bool`` field (via dataclass) + and a ``__str__`` method. + """ + + def __del__(self): + if not object.__getattribute__(self, "_printed"): + print(str(self)) + + def __repr__(self) -> str: + object.__setattr__(self, "_printed", True) + return str(self) + + def _repr_html_(self) -> str: + """Rich HTML display for Jupyter notebooks (enables bold rendering).""" + object.__setattr__(self, "_printed", True) + text = str(self) + text = text.replace("\033[1m", "").replace("\033[0m", "") + return f"
{text}
" + + def __getattribute__(self, name: str): + value = object.__getattribute__(self, name) + # Suppress auto-print once any public attribute is accessed + if not name.startswith("_") and name != "help": + object.__setattr__(self, "_printed", True) + return value + + +# --------------------------------------------------------------------------- +# Info +# --------------------------------------------------------------------------- + + +@dataclass(repr=False) +class Info(AutoPrintMixin): + """Model configuration and training context (no parameter values). + + Returned by ``model.info()``. Auto-prints when discarded; provides + programmatic access when stored in a variable. + + Examples + -------- + >>> model.info() # prints info + >>> i = model.info() # store for programmatic access + >>> i.n_subjects # 150 + >>> i.pct_missing # 2.5 + >>> i.help() # list available attributes + """ + + name: str + model_type: str + dimension: Optional[int] = None + features: Optional[list[str]] = None + source_dimension: Optional[int] = None + n_clusters: Optional[int] = None + obs_models: Optional[list[str]] = None + n_total_params: Optional[int] = None + training_info: TrainingInfo = field(default_factory=dict) + dataset_info: DatasetInfo = field(default_factory=dict) + leaspy_version: Optional[str] = None + _printed: bool = field(default=False, repr=False) + + # -- Convenience properties: training ------------------------------------ + + @property + def algorithm(self) -> Optional[str]: + """Algorithm name used for training.""" + return self.training_info.get("algorithm") + + @property + def seed(self) -> Optional[int]: + """Random seed used for training.""" + return self.training_info.get("seed") + + @property + def n_iter(self) -> Optional[int]: + """Number of iterations.""" + return self.training_info.get("n_iter") + + @property + def converged(self) -> Optional[bool]: + """Whether training converged.""" + return self.training_info.get("converged") + + @property + def duration(self) -> Optional[str]: + """Training duration.""" + return self.training_info.get("duration") + + # -- Convenience properties: dataset ------------------------------------- + + @property + def n_subjects(self) -> Optional[int]: + """Number of subjects in the training dataset.""" + return self.dataset_info.get("n_subjects") + + @property + def n_visits(self) -> Optional[int]: + """Total number of visits.""" + return self.dataset_info.get("n_visits") + + @property + def n_scores(self) -> Optional[int]: + """Number of scored features.""" + return self.dataset_info.get("n_scores") + + @property + def n_observations(self) -> Optional[int]: + """Total number of observed data points.""" + return self.dataset_info.get("n_observations") + + @property + def pct_missing(self) -> Optional[float]: + """Percentage of missing observations.""" + return self.dataset_info.get("pct_missing") + + @property + def n_missing(self) -> Optional[int]: + """Number of missing observations.""" + return self.dataset_info.get("n_missing") + + @property + def visits_per_subject(self) -> Optional[VisitsPerSubject]: + """Per-subject visit distribution statistics.""" + return self.dataset_info.get("visits_per_subject") + + @property + def n_events(self) -> Optional[int]: + """Number of observed events (joint models only).""" + return self.dataset_info.get("n_events") + + # -- Display ------------------------------------------------------------- + + def __str__(self) -> str: + lines = [] + sep = "=" * _WIDTH + + lines.append(sep) + lines.append(f"{_BOLD}{'Model Information':^{_WIDTH}}{_RESET}") + lines.append(sep) + + # Statistical Model + lines.append(f"{_BOLD}Statistical Model{_RESET}") + lines.append("-" * _WIDTH) + lines.append(f"Type: {self.model_type}") + lines.append(f"Name: {self.name}") + lines.append(f"Dimension: {self.dimension}") + if self.source_dimension is not None: + lines.append(f"Source Dimension: {self.source_dimension}") + if self.obs_models: + lines.append(f"Observation Models: {', '.join(self.obs_models)}") + if self.n_total_params is not None: + lines.append(f"Total Parameters: {self.n_total_params}") + if self.n_clusters is not None: + lines.append(f"Clusters: {self.n_clusters}") + + # Training Dataset + if self.dataset_info: + lines.append("") + lines.append(f"{_BOLD}Training Dataset{_RESET}") + lines.append("-" * _WIDTH) + di = self.dataset_info + lines.append(f"Subjects: {di.get('n_subjects', 'N/A')}") + lines.append(f"Visits: {di.get('n_visits', 'N/A')}") + lines.append(f"Scores (Features): {di.get('n_scores', 'N/A')}") + lines.append(f"Total Observations: {di.get('n_observations', 'N/A')}") + if "visits_per_subject" in di: + vps = di["visits_per_subject"] + lines.append( + f"Visits per Subject: Median {vps['median']:.1f} " + f"[Min {vps['min']}, Max {vps['max']}, IQR {vps['iqr']:.1f}]" + ) + if "n_missing" in di: + lines.append( + f"Missing Data: {di['n_missing']} " + f"({di.get('pct_missing', 0):.2f}%)" + ) + if "n_events" in di: + lines.append(f"Events Observed: {di['n_events']}") + + # Training Details + if self.training_info: + lines.append("") + lines.append(f"{_BOLD}Training Details{_RESET}") + lines.append("-" * _WIDTH) + ti = self.training_info + lines.append(f"Algorithm: {ti.get('algorithm', 'N/A')}") + if "seed" in ti: + lines.append(f"Seed: {ti['seed']}") + lines.append(f"Iterations: {ti.get('n_iter', 'N/A')}") + if ti.get("converged") is not None: + lines.append(f"Converged: {ti['converged']}") + if "duration" in ti: + lines.append(f"Duration: {ti['duration']}") + + # Leaspy Version + if self.leaspy_version: + lines.append("") + lines.append(f"Leaspy Version: {self.leaspy_version}") + + lines.append(sep) + return "\n".join(lines) + + def help(self) -> None: + """Print available attributes and their meanings.""" + help_text = f""" +{_BOLD}Info Help{_RESET} +{'=' * 60} + +The Info object provides access to model configuration and training context. + +{_BOLD}Usage:{_RESET} + model.info() # Print model information + i = model.info() # Store to access individual attributes + +{_BOLD}Available Attributes:{_RESET} + + {_BOLD}Model:{_RESET} + name Model name (str) + model_type Model class name (str) + dimension Number of features (int) + features Feature names (list[str]) + source_dimension Number of sources (int or None) + n_clusters Number of clusters (int or None) + obs_models Observation model names (list[str] or None) + n_total_params Total number of scalar parameters (int) + + {_BOLD}Training:{_RESET} + algorithm Algorithm name (str) + seed Random seed (int) + n_iter Number of iterations (int) + converged Whether training converged (bool or None) + duration Training duration (str) + + {_BOLD}Dataset:{_RESET} + n_subjects Number of subjects (int) + n_visits Total visits (int) + n_scores Number of scored features (int) + n_observations Total observations (int) + pct_missing Percent missing data (float) + n_missing Count of missing observations (int) + visits_per_subject Visit distribution stats (dict) + n_events Observed events, joint models only (int or None) + + {_BOLD}Other:{_RESET} + training_info Full training metadata (TrainingInfo) + dataset_info Full dataset statistics (DatasetInfo) + leaspy_version Leaspy version (str) + +{_BOLD}Examples:{_RESET} + >>> i = model.info() + >>> i.algorithm # 'mcmc_saem' + >>> i.n_subjects # 150 + >>> i.pct_missing # 2.5 +""" + print(help_text) + object.__setattr__(self, "_printed", True) + + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + + +@dataclass(repr=False) +class Summary(AutoPrintMixin): + """Structured summary of a Leaspy model including parameter values. + + Returned by ``model.summary()``. Auto-prints when discarded; provides + programmatic access when stored in a variable. + + Examples + -------- + >>> model.summary() # prints the formatted summary + >>> s = model.summary() # store for programmatic access + >>> s.algorithm # 'mcmc_saem' + >>> s.get_param('tau_std') # tensor([10.5]) + >>> s.help() # list available attributes + """ + + name: str + model_type: str + dimension: Optional[int] = None + features: Optional[list[str]] = None + source_dimension: Optional[int] = None + n_clusters: Optional[int] = None + obs_models: Optional[list[str]] = None + nll: Optional[float] = None + training_info: TrainingInfo = field(default_factory=dict) + dataset_info: DatasetInfo = field(default_factory=dict) + parameters: dict[str, dict[str, Any]] = field(default_factory=dict) + leaspy_version: Optional[str] = None + _param_axes: dict = field(default_factory=dict, repr=False) + _feature_names: Optional[list[str]] = field(default=None, repr=False) + _printed: bool = field(default=False, repr=False) + + # -- Convenience properties ---------------------------------------------- + + @property + def sources(self) -> Optional[list[str]]: + """Source names (e.g. ``['s0', 's1']``) or ``None``.""" + if self.source_dimension is None: + return None + return [f"s{i}" for i in range(self.source_dimension)] + + @property + def clusters(self) -> Optional[list[str]]: + """Cluster names (e.g. ``['c0', 'c1']``) or ``None``.""" + if self.n_clusters is None: + return None + return [f"c{i}" for i in range(self.n_clusters)] + + @property + def algorithm(self) -> Optional[str]: + """Algorithm name used for training.""" + return self.training_info.get("algorithm") + + @property + def seed(self) -> Optional[int]: + """Random seed used for training.""" + return self.training_info.get("seed") + + @property + def n_iter(self) -> Optional[int]: + """Number of iterations.""" + return self.training_info.get("n_iter") + + @property + def converged(self) -> Optional[bool]: + """Whether training converged.""" + return self.training_info.get("converged") + + @property + def n_subjects(self) -> Optional[int]: + """Number of subjects in the training dataset.""" + return self.dataset_info.get("n_subjects") + + @property + def n_visits(self) -> Optional[int]: + """Total number of visits.""" + return self.dataset_info.get("n_visits") + + @property + def n_observations(self) -> Optional[int]: + """Total number of observations.""" + return self.dataset_info.get("n_observations") + + def get_param(self, name: str) -> Optional[Any]: + """Get a parameter value by name, searching across all categories. + + Parameters + ---------- + name : str + Parameter name (e.g. ``'betas_mean'``, ``'tau_std'``). + + Returns + ------- + value + The parameter value (typically a ``torch.Tensor``), or ``None``. + """ + for category_params in self.parameters.values(): + if name in category_params: + return category_params[name] + return None + + # -- Display ------------------------------------------------------------- + + def __str__(self) -> str: + lines = [] + sep = "=" * _WIDTH + + # Header + lines.append(sep) + lines.append(f"{_BOLD}{'Model Summary':^{_WIDTH}}{_RESET}") + lines.append(sep) + lines.append(f"{_BOLD}Model Name:{_RESET} {self.name}") + lines.append(f"{_BOLD}Model Type:{_RESET} {self.model_type}") + + if self.features is not None: + feat_str = ", ".join(self.features) + lines.extend( + _wrap_text( + f"{_BOLD}Features ({self.dimension}){_RESET}", feat_str + ) + ) + + if self.source_dimension is not None: + sources = [f"Source {i} (s{i})" for i in range(self.source_dimension)] + lines.extend( + _wrap_text( + f"{_BOLD}Sources ({self.source_dimension}){_RESET}", + ", ".join(sources), + ) + ) + + if self.n_clusters is not None: + clusters = [f"Cluster {i} (c{i})" for i in range(self.n_clusters)] + lines.extend( + _wrap_text( + f"{_BOLD}Clusters ({self.n_clusters}){_RESET}", + ", ".join(clusters), + ) + ) + + if self.obs_models: + lines.extend( + _wrap_text( + f"{_BOLD}Observation Models{_RESET}", + ", ".join(self.obs_models), + ) + ) + + if self.nll is not None: + lines.append(f"{_BOLD}Neg. Log-Likelihood:{_RESET} {self.nll:.4f}") + + # Training Metadata + if self.training_info: + lines.append("") + lines.append(f"{_BOLD}Training Metadata{_RESET}") + lines.append("-" * _WIDTH) + ti = self.training_info + lines.append(f"Algorithm: {ti.get('algorithm', 'N/A')}") + if "seed" in ti: + lines.append(f"Seed: {ti['seed']}") + lines.append(f"Iterations: {ti.get('n_iter', 'N/A')}") + if ti.get("converged") is not None: + lines.append(f"Converged: {ti['converged']}") + + # Data Context + if self.dataset_info: + lines.append("") + lines.append(f"{_BOLD}Data Context{_RESET}") + lines.append("-" * _WIDTH) + di = self.dataset_info + lines.append(f"Subjects: {di.get('n_subjects', 'N/A')}") + lines.append(f"Visits: {di.get('n_visits', 'N/A')}") + lines.append(f"Total Observations: {di.get('n_observations', 'N/A')}") + + # Leaspy Version + if self.leaspy_version: + lines.append("") + lines.append(f"Leaspy Version: {self.leaspy_version}") + + lines.append(sep) + + # Parameters by category + for category, params in self.parameters.items(): + if params: + lines.append("") + lines.append(f"{_BOLD}{category}{_RESET}") + lines.append("-" * _WIDTH) + lines.extend(self._format_parameter_group(params)) + + lines.append(sep) + return "\n".join(lines) + + def help(self) -> None: + """Print available attributes and their meanings.""" + help_text = f""" +{_BOLD}Summary Help{_RESET} +{'=' * 60} + +The Summary object provides access to model metadata and parameters. + +{_BOLD}Usage:{_RESET} + model.summary() # Print the formatted summary + s = model.summary() # Store to access individual attributes + +{_BOLD}Available Attributes:{_RESET} + + {_BOLD}Model Information:{_RESET} + name Model name (str) + model_type Model class name, e.g., 'LogisticModel' (str) + dimension Number of features (int) + features List of feature names (list[str]) + sources Source names, e.g., ['s0', 's1'] (list[str] or None) + clusters Cluster names, e.g., ['c0', 'c1'] (list[str] or None) + source_dimension Number of sources (int or None) + n_clusters Number of clusters (int or None) + obs_models Observation model names (list[str] or None) + + {_BOLD}Training:{_RESET} + algorithm Algorithm name, e.g., 'mcmc_saem' (str) + seed Random seed used (int) + n_iter Number of iterations (int) + converged Whether training converged (bool or None) + nll Negative log-likelihood (float or None) + + {_BOLD}Dataset:{_RESET} + n_subjects Number of subjects in training data (int) + n_visits Total number of visits (int) + n_observations Total number of observations (int) + + {_BOLD}Parameters:{_RESET} + parameters All parameters grouped by category (dict) + get_param(name) Get a specific parameter by name + + {_BOLD}Other:{_RESET} + training_info Full training metadata (TrainingInfo) + dataset_info Full dataset statistics (DatasetInfo) + leaspy_version Leaspy version used (str) + +{_BOLD}Examples:{_RESET} + >>> s = model.summary() + >>> s.algorithm # 'mcmc_saem' + >>> s.seed # 42 + >>> s.n_subjects # 150 + >>> s.get_param('tau_std') # tensor([10.5]) +""" + print(help_text) + object.__setattr__(self, "_printed", True) + + # -- Private formatting helpers ------------------------------------------ + + def _format_parameter_group(self, params: dict[str, Any]) -> list[str]: + """Format a group of parameters for display.""" + lines = [] + for name, value in params.items(): + if isinstance(value, torch.Tensor): + lines.append(self._format_tensor(name, value)) + else: + lines.append(f" {name:<18} {value}") + return lines + + def _format_tensor(self, name: str, value: torch.Tensor) -> str: + """Format a tensor parameter with axis labels.""" + param_axes = object.__getattribute__(self, "_param_axes") + feature_names = object.__getattribute__(self, "_feature_names") + axes = param_axes.get(name, ()) + + if value.ndim == 0: + return f" {name:<18} {value.item():.4f}" + + elif value.ndim == 1: + n = len(value) + if n > 10: + return f" {name:<18} Tensor of shape ({n},)" + + axis_name = axes[0] if len(axes) >= 1 else None + col_labels = get_axis_labels(axis_name, n, feature_names) + + if col_labels: + header = " " * 20 + " ".join(f"{lbl:>8}" for lbl in col_labels) + values = f" {name:<18}" + " ".join( + f"{v.item():>8.4f}" for v in value + ) + return header + "\n" + values + else: + val_str = "[" + ", ".join(f"{v.item():.4f}" for v in value) + "]" + return f" {name:<18} {val_str}" + + elif value.ndim == 2: + rows, cols = value.shape + if rows > 8 or cols > 8: + return f" {name:<18} Tensor of shape {tuple(value.shape)}" + + row_axis = axes[0] if len(axes) >= 1 else None + col_axis = axes[1] if len(axes) >= 2 else None + row_labels = get_axis_labels(row_axis, rows, feature_names) + col_labels = get_axis_labels(col_axis, cols, feature_names) + + result = [f" {name}:"] + if col_labels: + header = " " * 20 + " ".join(f"{lbl:>8}" for lbl in col_labels) + result.append(header) + + for i, row in enumerate(value): + row_lbl = row_labels[i] if row_labels else f"[{i}]" + row_str = ( + f" {row_lbl:<8}" + + " ".join(f"{v.item():>8.4f}" for v in row) + ) + result.append(row_str) + + return "\n".join(result) + + else: + return f" {name:<18} Tensor of shape {tuple(value.shape)}" diff --git a/tests/functional_tests/api/test_api_fit.py b/tests/functional_tests/api/test_api_fit.py index 054c8842c..ba0df9211 100644 --- a/tests/functional_tests/api/test_api_fit.py +++ b/tests/functional_tests/api/test_api_fit.py @@ -124,6 +124,14 @@ def check_model_consistency( None, ) + # Don't compare training metadata - these are informational and vary by run + # The core model parameters are still fully verified + # TODO (Sebastian): we need to create a separated file to thest model.summary and + # model.info, as this file is centered to the model results. + for key in ["dataset_info", "training_info"]: + model_parameters_new.pop(key, None) + expected_model_parameters.pop(key, None) + self.assertDictAlmostEqual( model_parameters_new, expected_model_parameters, **allclose_kwds )