diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 000000000..40426ba5b --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,49 @@ +name: Docs + +on: + pull_request: + paths: + - "docs/**" + - "kale/**" + - "pyproject.toml" + - "uv.lock" + - ".readthedocs.yaml" + - ".github/workflows/docs.yaml" + push: + branches: + - main + paths: + - "docs/**" + - "kale/**" + - "pyproject.toml" + - "uv.lock" + - ".readthedocs.yaml" + - ".github/workflows/docs.yaml" + +jobs: + build: + name: Build documentation + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.11" + + - name: Install uv + run: pip install uv + + - name: Install docs dependencies + run: uv sync --extra docs + + - name: Build Sphinx site + run: uv run sphinx-build -b html -W --keep-going docs/source docs/_build/html + + - name: Upload built site + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: kale-docs-html + path: docs/_build/html diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..ff73fbe05 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,20 @@ +# Read the Docs configuration for the Kale documentation site. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html +version: 2 + +build: + os: ubuntu-24.04 + tools: + python: "3.11" + jobs: + post_create_environment: + - pip install uv + post_install: + - UV_PROJECT_ENVIRONMENT=$READTHEDOCS_VIRTUALENV_PATH uv sync --extra docs + +sphinx: + configuration: docs/source/conf.py + fail_on_warning: false + +formats: + - htmlzip diff --git a/Makefile b/Makefile index ca7474d56..1c2eff570 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ clean clean-venv lock lock-upgrade check-uv \ jupyter jupyter-kfp watch-labextension \ docker-build docker-run \ + docs docs-serve docs-clean \ release verify check-versions UV := uv @@ -186,6 +187,21 @@ jupyter-kfp: ## Start JupyterLab with KFP dev environment (run kfp-serve first!) watch-labextension: ## Watch labextension for changes (run in separate terminal) cd labextension && $(JLPM) watch +##@ Documentation + +docs: ## Build the documentation site (HTML) + @printf "$(BLUE)Building documentation...\n$(NC)" + $(UV) sync --extra docs + $(UV) run sphinx-build -b html docs/source docs/_build/html + @printf "$(GREEN)Docs built: docs/_build/html/index.html\n$(NC)" + +docs-serve: docs ## Build and serve the docs locally on port 8000 + @printf "$(BLUE)Serving docs on http://localhost:8000\n$(NC)" + cd docs/_build/html && python3 -m http.server 8000 + +docs-clean: ## Remove built documentation + rm -rf docs/_build + ##@ Release check-versions: ## Verify backend and labextension versions match diff --git a/README.md b/README.md index 590270856..9c2c4fa2c 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ Kale bridges this gap by providing a simple UI to define Kubeflow Pipelines workflows directly from your JupyterLab interface, without the need to change a single line of code. +📖 **Documentation:** + See the `Kale v2.0 Demo` video at the bottom of the `README` for more details. Read more about Kale and how it works in this Medium post: diff --git a/docs/imgs/kale_logo.svg b/docs/imgs/kale_logo.svg new file mode 100644 index 000000000..ee73ae7b5 --- /dev/null +++ b/docs/imgs/kale_logo.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/docs/imgs/quickstart-01-jupyterlab.png b/docs/imgs/quickstart-01-jupyterlab.png new file mode 100644 index 000000000..1f074806c Binary files /dev/null and b/docs/imgs/quickstart-01-jupyterlab.png differ diff --git a/docs/imgs/quickstart-02-kale-panel.png b/docs/imgs/quickstart-02-kale-panel.png new file mode 100644 index 000000000..278deedcd Binary files /dev/null and b/docs/imgs/quickstart-02-kale-panel.png differ diff --git a/docs/imgs/quickstart-03-cell-tags.png b/docs/imgs/quickstart-03-cell-tags.png new file mode 100644 index 000000000..5a8feee78 Binary files /dev/null and b/docs/imgs/quickstart-03-cell-tags.png differ diff --git a/docs/imgs/quickstart-04-panel-config.png b/docs/imgs/quickstart-04-panel-config.png new file mode 100644 index 000000000..784f00482 Binary files /dev/null and b/docs/imgs/quickstart-04-panel-config.png differ diff --git a/docs/imgs/quickstart-05-compile-run.png b/docs/imgs/quickstart-05-compile-run.png new file mode 100644 index 000000000..724733140 Binary files /dev/null and b/docs/imgs/quickstart-05-compile-run.png differ diff --git a/docs/imgs/quickstart-06-kfp-run.png b/docs/imgs/quickstart-06-kfp-run.png new file mode 100644 index 000000000..0daa61622 Binary files /dev/null and b/docs/imgs/quickstart-06-kfp-run.png differ diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 000000000..3ffebbbf9 --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,719 @@ +/* + * Kale documentation custom styles. + * + * Mirrors the Kubeflow SDK docs site (sdk.kubeflow.org) so that Kale's + * documentation stays visually consistent with the rest of the Kubeflow + * project. Layered on top of the Furo Sphinx theme. + */ + +/* ============================================ + Color Palette + ============================================ */ + +:root { + /* Primary colors */ + --kf-blue: #4299e1; + --kf-blue-light: #63b3ed; + --kf-blue-dark: #3182ce; + + /* Text colors */ + --kf-heading: #2d3748; + --kf-text: #4a5568; + --kf-text-light: #718096; + + /* Backgrounds and borders */ + --kf-bg-subtle: #f7fafc; + --kf-border: #e2e8f0; + + /* Accent */ + --kf-accent: #81e6d9; + + /* API docs colors */ + --kf-api-name: #3182ce; + --kf-api-param: #4a5568; + --kf-api-text: #000; +} + +/* Dark mode overrides */ +@media (prefers-color-scheme: dark) { + :root:not([data-theme="light"]) { + --kf-blue: #63b3ed; + --kf-blue-light: #90cdf4; + --kf-blue-dark: #4299e1; + --kf-heading: #e2e8f0; + --kf-text: #cbd5e0; + --kf-text-light: #a0aec0; + --kf-bg-subtle: #2d3748; + --kf-border: #4a5568; + --kf-api-name: #63b3ed; + --kf-api-param: #a0aec0; + --kf-api-text: #e2e8f0; + } +} + +[data-theme="dark"] { + --kf-blue: #63b3ed; + --kf-blue-light: #90cdf4; + --kf-blue-dark: #4299e1; + --kf-heading: #e2e8f0; + --kf-text: #cbd5e0; + --kf-text-light: #a0aec0; + --kf-bg-subtle: #2d3748; + --kf-border: #4a5568; + --kf-api-name: #63b3ed; + --kf-api-param: #a0aec0; + --kf-api-text: #e2e8f0; +} + +/* ============================================ + TOP NAVIGATION BAR + ============================================ */ + +/* Override Furo's announcement styling */ +.announcement { + background: var(--kf-bg-subtle) !important; + border-bottom: 1px solid var(--kf-border) !important; + padding: 0 !important; + position: fixed !important; + top: 0 !important; + left: 0 !important; + right: 0 !important; + z-index: 100 !important; +} + +/* Add padding to page content to account for fixed navbar */ +.page { + padding-top: 52px !important; +} + +/* Offset sidebar sticky elements for fixed navbar */ +.sidebar-sticky { + top: 52px !important; +} + +.toc-sticky { + top: 52px !important; +} + +/* Mobile header offset */ +.mobile-header { + top: 52px !important; +} + +.announcement-content { + max-width: none !important; + padding: 0 !important; +} + +/* Top navigation container */ +.top-nav { + display: flex; + align-items: center; + justify-content: space-between; + max-width: 1400px; + margin: 0 auto; + padding: 0.6rem 1.5rem; +} + +/* Brand section (logo + name) */ +.top-nav-brand { + display: flex; + align-items: center; + gap: 0.5rem; + text-decoration: none !important; + color: var(--kf-heading) !important; + font-weight: 600; + font-size: 1.1rem; +} + +.top-nav-brand:hover { + color: var(--kf-blue) !important; +} + +.top-nav-logo { + height: 28px; + width: auto; +} + +/* Navigation links */ +.top-nav-links { + display: flex; + gap: 0.5rem; +} + +.top-nav-links a { + color: var(--kf-text) !important; + text-decoration: none !important; + padding: 0.4rem 0.75rem; + border-radius: 4px; + font-size: 0.9rem; + transition: all 0.15s ease; +} + +.top-nav-links a:hover { + color: var(--kf-blue-dark) !important; + background: rgba(66, 153, 225, 0.12); +} + +/* Hide on mobile - let sidebar handle navigation */ +@media (max-width: 768px) { + .top-nav-links { + display: none; + } + .top-nav { + justify-content: center; + } +} + +/* ============================================ + LOGO + ============================================ */ + +/* Hide logo/name from sidebar - it's in the top navbar now */ +.sidebar-brand { + display: none !important; +} + +.sidebar-logo { + max-height: 5rem; + width: auto; +} + +/* ============================================ + HEADINGS - Grey with blue accents + ============================================ */ + +h1, h2, h3, h4, h5, h6 { + font-weight: 600; + color: var(--kf-heading); +} + +h1 { + font-size: 2rem; + color: var(--kf-heading); + border-bottom: 2px solid var(--kf-blue-light); + padding-bottom: 0.5rem; +} + +h2 { + font-size: 1.4rem; + margin-top: 1rem; + color: var(--kf-heading); + border-bottom: 1px solid var(--kf-border); + padding-bottom: 0.3rem; +} + +h3 { + font-size: 1.15rem; + color: var(--kf-text); +} + +/* ============================================ + LINKS + ============================================ */ + +a { + color: var(--kf-blue); +} + +a:visited { + color: var(--kf-blue); +} + +a:hover { + color: var(--kf-blue-dark); +} + +/* ============================================ + SIDEBAR - Calm, muted colors + ============================================ */ + +.sidebar-tree a, +.sidebar-tree .reference, +.sidebar-drawer a, +.toctree-l1 > a, +.toctree-l2 > a, +.toctree-l3 > a, +.caption-text { + color: var(--kf-text) !important; + transition: color 0.15s; +} + +.sidebar-tree a:hover, +.sidebar-tree .reference:hover { + color: var(--kf-blue) !important; +} + +.sidebar-tree .current > .reference, +.sidebar-tree .current-page > .reference { + color: var(--kf-blue-dark) !important; + font-weight: 600; +} + +.sidebar-tree .caption { + color: var(--kf-text-light) !important; + font-weight: 600; + text-transform: uppercase; + font-size: 0.75rem; + letter-spacing: 0.5px; +} + +/* ============================================ + API DOCS - Minimal two-color scheme + Color 1 (blue): API/method/class names + Color 2 (grey): Parameter names + Everything else: Uses --kf-api-text (black in light, light in dark) + ============================================ */ + +/* Main class/function container */ +dl.py.class, +dl.py.function { + margin: 2rem 0 !important; +} + +dl.py.method { + margin: 1.5rem 0 !important; +} + +/* Remove all hover effects */ +dl.py:hover, +dl.py.class:hover, +dl.py.function:hover, +dl.py.method:hover, +dl.py dt:hover, +dl.py dd:hover { + background: transparent !important; + box-shadow: none !important; +} + +/* Class/function signature header */ +dl.py.class > dt.sig, +dl.py.function > dt.sig { + font-family: ui-monospace, SFMono-Regular, Consolas, monospace; + font-size: 1rem; + color: var(--kf-api-text) !important; + padding: 0.5rem 0; + border-bottom: 1px solid var(--kf-border); + margin: 0; +} + +/* Class/method/function names - BLUE (the only colored element for names) */ +dl.py dt.sig .sig-name { + font-weight: 700; + color: var(--kf-api-name) !important; +} + +/* Method signature */ +dl.py.method > dt.sig, +dl.py.method dt.sig { + font-family: ui-monospace, SFMono-Regular, Consolas, monospace; + font-size: 0.9rem !important; + color: var(--kf-api-text) !important; + padding: 0.25rem 0 !important; + border-bottom: 1px solid var(--kf-border); + margin-top: 1rem; +} + +/* All signature text except names */ +dl.py dt.sig, +dl.py dt.sig .sig-paren, +dl.py dt.sig .sig-param, +dl.py dt.sig .n, +dl.py dt.sig .o, +dl.py dt.sig .default_value, +dl.py dt.sig .p, +.sig .sig-return, +.sig .sig-return-typehint { + color: var(--kf-api-text) !important; + font-style: normal !important; +} + +/* Description body */ +dl.py.class > dd, +dl.py.function > dd { + margin: 0 !important; + padding: 0.75rem 0; +} + +dl.py.method > dd { + margin: 0 !important; + padding: 0.5rem 0 !important; +} + +/* All docstring text */ +dl.py dd > p, +dl.py dd p { + color: var(--kf-api-text) !important; + font-size: 1rem !important; + line-height: 1.6; + margin: 0.5rem 0; +} + +/* Nested methods within a class */ +dl.py.class dd dl.py.method { + margin: 1rem 0 !important; +} + +/* Parameters/Returns/Raises section styling */ +dl.field-list { + margin: 0.75rem 0; +} + +/* Section headers (Parameters, Returns, Raises) - bold */ +dl.field-list > dt, +dl.field-list dt.field-odd, +dl.field-list dt.field-even { + font-weight: 700 !important; + color: var(--kf-api-text) !important; + font-size: 1rem !important; + text-transform: none !important; + margin-bottom: 0.5rem; + margin-top: 0.75rem; +} + +dl.field-list > dd, +dl.field-list dd.field-odd, +dl.field-list dd.field-even { + margin-left: 0 !important; + padding-left: 0 !important; +} + +/* Parameter list - use bullet points */ +dl.field-list dd ul { + list-style: disc !important; + padding-left: 1.5rem !important; + margin: 0.25rem 0 !important; +} + +dl.field-list dd ul li { + padding: 0.2rem 0; + color: var(--kf-api-text) !important; +} + +dl.field-list dd ul li::marker { + color: var(--kf-api-text) !important; +} + +/* All text in parameter sections */ +dl.field-list dd, +dl.field-list dd p, +dl.field-list dd ul li, +dl.field-list dd ul li p { + color: var(--kf-api-text) !important; + font-size: 1rem !important; + margin: 0.1rem 0 !important; +} + +/* Parameter names - GREY */ +dl.field-list dd strong, +dl.field-list dd p strong { + color: var(--kf-api-param) !important; + font-weight: 600 !important; +} + +/* Type annotations */ +dl.field-list dd .sphinx_autodoc_typehints-type, +dl.field-list dd .sphinx_autodoc_typehints-type code, +dl.field-list dd code, +dl.field-list dd .pre { + color: var(--kf-api-text) !important; + background: transparent !important; + border: none !important; + padding: 0 !important; +} + +/* Autosummary - display as simple list, no boxes */ +.table-wrapper.autosummary { + border: none !important; + overflow: visible; + background: transparent !important; +} + +table.autosummary { + width: 100%; + border-collapse: collapse; + border: none !important; + margin: 0.25rem 0; + background: transparent !important; +} + +table.autosummary tbody { + background: transparent !important; +} + +table.autosummary tr { + border: none !important; + background: transparent !important; +} + +table.autosummary td { + padding: 0.2rem 0; + padding-left: 1rem; + border: none !important; + vertical-align: top; + background: transparent !important; +} + +/* Bullet point before each item */ +table.autosummary td:first-child::before { + content: "•"; + color: var(--kf-blue); + margin-right: 0.5rem; +} + +table.autosummary td:first-child { + font-family: ui-monospace, SFMono-Regular, Consolas, monospace; + font-weight: 600; + padding-right: 1rem; + width: auto; +} + +/* Remove ALL backgrounds and borders from code elements */ +table.autosummary code, +table.autosummary .pre, +table.autosummary td code, +table.autosummary td .pre, +table.autosummary code.xref, +table.autosummary .docutils code, +.autosummary code.xref, +.autosummary .docutils code, +.autosummary code.docutils, +.autosummary .literal, +.autosummary .notranslate { + background: none !important; + background-color: transparent !important; + border: none !important; + padding: 0 !important; + box-shadow: none !important; + color: var(--kf-blue-dark) !important; + font-size: 0.9rem; +} + +table.autosummary td:last-child { + color: var(--kf-text); + font-size: 0.9rem; +} + +/* Property/attribute styling */ +dl.py.property > dt.sig, +dl.py.attribute > dt.sig { + font-family: ui-monospace, SFMono-Regular, Consolas, monospace; + font-size: 0.88rem; + padding: 0.5rem 0; +} + +/* "Bases:" inheritance line styling */ +.class dd > p:first-child { + font-size: 0.85rem; + color: var(--kf-text-light); + margin-bottom: 1rem; +} + +/* View source link */ +.viewcode-link { + float: right; + font-size: 0.8rem; + color: var(--kf-text-light) !important; + font-weight: normal; +} + +.viewcode-link:hover { + color: var(--kf-blue) !important; +} + +/* ============================================ + CODE BLOCKS - Dark theme + ============================================ */ + +pre { + border-radius: 8px; + border: none; + background: #1e1e1e !important; + padding: 1rem !important; +} + +pre code { + color: #d4d4d4 !important; + background: transparent !important; +} + +/* Syntax highlighting for dark theme */ +.highlight { + background: #1e1e1e !important; + border-radius: 8px; +} + +/* Default text color for code */ +.highlight pre { + color: #d4d4d4 !important; +} + +.highlight span { + color: #d4d4d4; +} + +/* Bash/shell specific */ +.highlight-bash .highlight span, +.highlight-shell .highlight span, +.highlight-console .highlight span { + color: #d4d4d4 !important; +} + +.highlight .k, .highlight .kn, .highlight .kd { color: #569cd6 !important; } /* keywords */ +.highlight .n, .highlight .nn { color: #d4d4d4 !important; } /* names */ +.highlight .s, .highlight .s1, .highlight .s2 { color: #ce9178 !important; } /* strings */ +.highlight .c, .highlight .c1, .highlight .cm { color: #6a9955 !important; } /* comments */ +.highlight .o { color: #d4d4d4 !important; } /* operators */ +.highlight .p { color: #d4d4d4 !important; } /* punctuation */ +.highlight .mi, .highlight .mf { color: #b5cea8 !important; } /* numbers */ +.highlight .nb, .highlight .bp { color: #4ec9b0 !important; } /* builtins */ +.highlight .nf, .highlight .fm { color: #dcdcaa !important; } /* functions */ +.highlight .nc { color: #4ec9b0 !important; } /* classes */ +.highlight .ow { color: #569cd6 !important; } /* operator word */ +.highlight .gp { color: #6a9955 !important; } /* prompt */ +.highlight .w { color: #d4d4d4 !important; } /* whitespace */ + +/* Inline code - keep light */ +code { + color: var(--kf-heading); + background: var(--kf-bg-subtle); + padding: 0.15em 0.4em; + border-radius: 3px; + font-size: 0.9em; +} + +/* ============================================ + CARDS + ============================================ */ + +.sd-card { + border-radius: 8px; + border: 1px solid var(--kf-border); + box-shadow: 0 1px 4px rgba(0, 0, 0, 0.04); + transition: all 0.2s ease; +} + +.sd-card:hover { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); + border-color: var(--kf-blue-light); +} + +.sd-card-title { + color: var(--kf-heading); + font-weight: 600; +} + +/* ============================================ + TABLES + ============================================ */ + +table { + border-radius: 6px; + overflow: hidden; + border: 1px solid var(--kf-border); +} + +th { + background: var(--kf-bg-subtle); + color: var(--kf-text); + font-weight: 600; +} + + +/* ============================================ + ADMONITIONS + ============================================ */ + +.admonition { + border-radius: 6px; + border: none; + border-left: 3px solid var(--kf-blue); +} + +.admonition.tip { + background: rgba(129, 230, 217, 0.15); + border-left-color: var(--kf-accent); +} + +.admonition.note { + background: rgba(66, 153, 225, 0.1); +} + +.admonition.warning { + background: rgba(251, 211, 141, 0.2); + border-left-color: #f6ad55; +} + +/* ============================================ + MISC + ============================================ */ + +hr { + border: none; + height: 1px; + background: var(--kf-border); + margin: 2rem 0; +} + +p { + color: var(--kf-text); + line-height: 1.7; +} + +li::marker { + color: var(--kf-blue); +} + +/* ============================================ + LANDING PAGE ENHANCEMENTS + ============================================ */ + +/* Hero tagline styling */ +article > section > p:first-of-type { + font-size: 1.2rem; + color: var(--kf-text); +} + +/* Section descriptions (italic text after h2) */ +h2 + p > em:only-child, +section > p > em:only-child { + display: block; + font-size: 1rem; + color: var(--kf-text-light); + font-style: normal; + margin-bottom: 1.5rem; +} + +/* Section dividers */ +hr { + margin: 1.5rem 0 !important; + opacity: 0.4; +} + +/* Borderless cards for feature sections */ +.sd-card.sd-border-0 { + border: none !important; + box-shadow: none !important; + background: transparent !important; +} + +.sd-card.sd-border-0:hover { + box-shadow: none !important; + border: none !important; +} + +/* Feature card titles (bold text in cards) */ +.sd-card-body strong { + color: var(--kf-heading); + font-size: 1.1rem; +} + +/* Code blocks in cards - smaller */ +.sd-card .highlight { + font-size: 0.8rem; +} + +/* Quick start section code blocks */ +.highlight-bash + .highlight-python { + margin-top: -0.5rem; +} diff --git a/docs/source/_static/kale-icon.svg b/docs/source/_static/kale-icon.svg new file mode 100644 index 000000000..ee73ae7b5 --- /dev/null +++ b/docs/source/_static/kale-icon.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/docs/source/_static/kale-symbol.svg b/docs/source/_static/kale-symbol.svg new file mode 100644 index 000000000..9675c2603 --- /dev/null +++ b/docs/source/_static/kale-symbol.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/docs/source/api/cli.md b/docs/source/api/cli.md new file mode 100644 index 000000000..01dca0ca3 --- /dev/null +++ b/docs/source/api/cli.md @@ -0,0 +1,82 @@ +# CLI Reference + +Kale ships a single `kale` command-line entry point, declared as +`[project.scripts]` in `pyproject.toml`. It compiles (and optionally runs) a +notebook against Kubeflow Pipelines. + +## `kale` + +The primary CLI. Parses a notebook, builds the pipeline DAG, compiles it to +KFP v2 DSL, and optionally uploads and runs it. + +```bash +kale --nb path/to/notebook.ipynb [options] +``` + +### General options + +| Flag | Type | Description | +| --------------------- | ------ | -------------------------------------------------------------------- | +| `--nb` | str | Path to the source notebook. **Required.** | +| `--upload_pipeline` | flag | Upload the compiled pipeline to KFP. | +| `--run_pipeline` | flag | Upload and then create a KFP run. | +| `--debug` | flag | Enable verbose logging. | +| `--dev` | flag | Bake a local devpi index URL into generated components. | +| `--pip-index-urls` | str | Comma-separated PEP 503 simple indexes baked into components. | +| `--devpi-simple-url` | str | Devpi simple URL (used when `--dev` is set). | + +### Notebook metadata overrides + +All the flags in this group override the corresponding fields in the +notebook's Kale metadata. If both are set, the CLI value wins. + +| Flag | Description | +| ------------------------- | ------------------------------------------------------------ | +| `--experiment_name` | KFP experiment name. Default: `Kale-Pipeline-Experiment`. | +| `--pipeline_name` | Name of the deployed pipeline. Default: `kale-pipeline`. | +| `--pipeline_description` | Description shown in the KFP UI. | +| `--docker_image` | Default base image for every step. | +| `--kfp_host` | KFP API endpoint, as `:` or a full URL. | +| `--storage-class-name` | Storage class for pipeline-created volumes. | +| `--volume-access-mode` | Access mode for pipeline-created volumes. | + +### Examples + +Compile only, leave the generated script in `.kale/`: + +```bash +kale --nb examples/base/candies_sharing.ipynb +``` + +Compile, upload, and run on a local KFP port-forward: + +```bash +kale --nb examples/base/candies_sharing.ipynb \ + --kfp_host http://127.0.0.1:8080 \ + --run_pipeline +``` + +Override pipeline naming: + +```bash +kale --nb notebooks/my_pipeline.ipynb \ + --pipeline_name "weekly-churn" \ + --experiment_name "production" \ + --run_pipeline --kfp_host http://127.0.0.1:8080 +``` + +## Environment variables + +A few environment variables affect `kale`: + +| Variable | Effect | +| ------------------------- | -------------------------------------------------------------------------------------- | +| `KF_PIPELINES_ENDPOINT` | Default KFP API endpoint when `--kfp_host` is not provided. | +| `KF_PIPELINES_UI_ENDPOINT` | KFP UI URL used when rendering run links. | +| `KALE_PIP_INDEX_URLS` | Comma-separated list of pip indexes baked into generated components. | +| `KALE_PIP_TRUSTED_HOSTS` | Trusted hosts for HTTP pip indexes (required when using HTTP URLs). | +| `KALE_DEV_MODE` | Equivalent to passing `--dev`. | +| `KALE_DEVPI_SIMPLE_URL` | Equivalent to `--devpi-simple-url` when `--dev` is set. | + +See [Running Pipelines](../user-guide/running-pipelines.md) for concrete scenarios where +these are useful. diff --git a/docs/source/api/compiler.rst b/docs/source/api/compiler.rst new file mode 100644 index 000000000..2720f27b7 --- /dev/null +++ b/docs/source/api/compiler.rst @@ -0,0 +1,7 @@ +Compiler +======== + +.. automodule:: kale.compiler + :members: + :exclude-members: Environment, FileSystemLoader, PackageLoader + :show-inheritance: diff --git a/docs/source/api/marshal.rst b/docs/source/api/marshal.rst new file mode 100644 index 000000000..12f60687e --- /dev/null +++ b/docs/source/api/marshal.rst @@ -0,0 +1,27 @@ +Marshal +======= + +Kale's marshalling system is a small, extensible dispatcher that serializes +data flowing between pipeline steps. See :doc:`../concepts/data-passing` +for the conceptual overview; this page is the API reference. + +Dispatcher and base class +------------------------- + +.. automodule:: kale.marshal.backend + :members: + :show-inheritance: + +Built-in backends +----------------- + +.. automodule:: kale.marshal.backends + :members: + :show-inheritance: + +Decorator +--------- + +.. automodule:: kale.marshal.decorator + :members: + :show-inheritance: diff --git a/docs/source/api/pipeline.rst b/docs/source/api/pipeline.rst new file mode 100644 index 000000000..8b811b364 --- /dev/null +++ b/docs/source/api/pipeline.rst @@ -0,0 +1,6 @@ +Pipeline +======== + +.. automodule:: kale.pipeline + :members: + :show-inheritance: diff --git a/docs/source/api/step.rst b/docs/source/api/step.rst new file mode 100644 index 000000000..7c8a8750b --- /dev/null +++ b/docs/source/api/step.rst @@ -0,0 +1,6 @@ +Step +==== + +.. automodule:: kale.step + :members: + :show-inheritance: diff --git a/docs/source/architecture/index.md b/docs/source/architecture/index.md new file mode 100644 index 000000000..59dc4bd63 --- /dev/null +++ b/docs/source/architecture/index.md @@ -0,0 +1,263 @@ +# Architecture + +This page is a map of the Kale codebase — what the major components are, +how they fit together, and what each directory contains. It is aimed at +contributors who want to navigate the repo with confidence. + +## The components + +Kale has no server, no operator, and nothing Kale-specific to deploy on +Kubernetes. Kale has three main components: + +1. **The `kale` Python library** — the `kale/` package in this repo. All + the "work" Kale does (parsing notebooks, analyzing dependencies, + generating KFP v2 DSL, calling the KFP SDK) happens in regular Python + function calls inside this library. +2. **The `kale` CLI** — a thin wrapper in `kale/cli.py` around the library. + `kale --nb notebook.ipynb` is basically `import kale; kale.compile(...)` + with argument parsing on top. +3. **The Kale JupyterLab extension** — the `labextension/` package, a + JupyterLab 4 extension written in TypeScript and React. It runs in the + user's browser and provides the Kale side panel and the per-cell + metadata editors. + +The interesting question is how the browser-side extension ends up invoking +functions from a Python library, since one is JavaScript and the other is +Python. The short answer: the extension starts its own Python kernel inside +the Jupyter server and calls Kale functions in it over JSON-RPC. The rest +of this section unpacks what that actually means. + +## How the extension talks to the library + +### Where the Jupyter server actually runs + +When you use JupyterLab there are always _two_ things running, and it's +worth being explicit about which is where: + +- A **web frontend** — HTML/CSS/JavaScript served to your browser. Every + JupyterLab extension (including Kale) executes here, in the browser. +- A **Jupyter server** — a Python process that serves notebooks from disk, + spawns and manages kernels, and exposes all of that over HTTP/WebSocket. + +Where those two live depends on how you launched JupyterLab. If you run +`jupyter lab` on your laptop, both are local and your browser talks to +`localhost`. But if you're using JupyterLab from a Kubeflow Notebook, your +browser is still on your laptop while the Jupyter server is running in a +**Pod inside your Kubernetes cluster** — you only reach it through your +browser the same way you'd reach any web application. The Jupyter server +always lives next to the files and the kernels it manages, **not** next to +your browser. + +This matters for Kale because everything the `kale` library does — reading +notebooks off disk, running static analysis, submitting pipelines to KFP — +has to happen where the Jupyter server is, not where your browser is. + +### A Kale-managed kernel + +When the Kale side panel is activated in JupyterLab, the extension asks +the Jupyter server to start a new Python kernel. This kernel is **not** +the one attached to any notebook you've opened; it's owned by the extension +itself and invisible in the JupyterLab kernels sidebar. Think of it as a +hidden Python REPL that Kale uses as its workhorse, with the `kale` library +already imported. + +Because it's a normal Jupyter kernel, it runs exactly where all kernels +run: inside the Jupyter server. When Jupyter is local, the kernel is local. +When Jupyter is in a Kubeflow Notebook Pod, the kernel is in that same +Pod, alongside the user's notebook kernels. + +### JSON-RPC over the kernel channel + +With that kernel running, the extension needs a way to call specific +functions in it. It uses JSON-RPC: the extension sends a message naming a +function (e.g. `nb.compile_notebook`) and its arguments, the kernel runs +the corresponding Python function, and the result comes back the same way. +All of the message passing happens over standard Jupyter kernel comms — no +extra ports, no extra services, no HTTP server that Kale has to host. + +Inside the kernel, the call routes through `kale/rpc/` (the dispatcher and +endpoint modules) into the rest of the Kale library: notebook parsing, +dependency analysis, DSL compilation, and finally the KFP SDK calls that +upload and run the pipeline. + +A nice consequence of this design is that when the Jupyter server lives +inside a Kubeflow Notebook Pod, the Kale kernel is _already inside the cluster_. +It can reach the in-cluster Kubeflow Pipelines API directly, +using the Pod's service account — no port-forwards, no VPN, no extra +credentials. Kale "just works" in a Kubeflow installation because the +Python that calls the KFP SDK is already a cluster citizen. + +### Diagram + +``` + Browser (your laptop) + ┌──────────────────────────────┐ + │ JupyterLab UI │ + │ Kale labextension (TS/React)│ + └──────────────┬───────────────┘ + │ WebSocket / HTTP + │ + ─ ─ ─ ─ ─ ─ ─ ─│─ ─ ─ ─ ─ ─ ─ ─ ─ (local ⇄ network boundary) + │ + Jupyter server (laptop OR Pod in cluster) + ┌──────────────▼───────────────┐ + │ Jupyter server │ + │ │ + │ ┌────────────────────────┐ │ + │ │ Kale-managed kernel │ │ + │ │ └─ import kale │ │ + │ │ └─ kale.rpc/* │ │ + │ │ └─ library │ │──┐ + │ └────────────────────────┘ │ │ KFP SDK + │ │ │ + │ (user notebook kernels) │ │ + └──────────────────────────────┘ │ + ▼ + Kubeflow Pipelines API + (in the same cluster when + Jupyter runs in a KF Notebook) +``` + +## Python library layout + +The Python package lives at `kale/`. The interesting modules are: + +### Core pipeline model + +- `pipeline.py` — defines {py:class}`kale.pipeline.Pipeline` (a + `networkx.DiGraph` of steps) along with the configuration classes + {py:class}`~kale.pipeline.PipelineConfig`, `VolumeConfig`, and + `KatibConfig`. +- `step.py` — defines {py:class}`kale.step.Step` and + {py:class}`~kale.step.StepConfig`, plus the `PipelineParam` and + `Artifact` named tuples used across the backend. + +### Notebook processing + +- `processors/nbprocessor.py` — {py:class}`kale.processors.NotebookProcessor` + reads an `.ipynb`, parses tags, resolves data dependencies, and returns + a ready-to-compile `Pipeline`. + +### Compilation + +- `compiler.py` — {py:class}`kale.compiler.Compiler` renders the + templates in `kale/templates/` to produce KFP v2 DSL. +- `templates/nb_function_template.jinja2` — per-step component template. +- `templates/pipeline_template.jinja2` — pipeline wrapper template. + +### Marshalling + +- `marshal/backend.py` — {py:class}`~kale.marshal.backend.Dispatcher` and + {py:class}`~kale.marshal.backend.MarshalBackend` base class. +- `marshal/backends.py` — nine concrete backends for numpy, pandas, + sklearn, XGBoost, PyTorch, Keras, TensorFlow, functions, and a `dill` + fallback. +- `marshal/decorator.py` — the `@marshal` decorator used by the marshal + entrypoint. + +### Static analysis + +- `common/astutils.py` — AST helpers for detecting marshal candidates, + parsing metrics print statements, and resolving imports. +- `common/flakeutils.py` — PyFlakes integration. + +### KFP and Kubernetes integration + +- `common/kfputils.py` — compile DSL, upload pipelines, create runs via + the KFP SDK. +- `common/k8sutils.py`, `common/podutils.py` — K8s API helpers used by + volume management and the in-pod runtime. +- `common/katibutils.py` — Katib hyperparameter tuning helpers (legacy, + under re-evaluation for v2). + +### Configuration framework + +- `config/config.py` — a small Pydantic-inspired validation framework used + by all the `*Config` classes in `pipeline.py` and `step.py`. +- `config/validators.py` — validators for Kubernetes names, image refs, + and other common field types. + +### CLI + +- `cli.py` — implements the `kale` entry point declared in `pyproject.toml`, + which compiles and optionally runs notebooks against KFP. + +### RPC layer + +These modules are what the Kale-managed kernel executes when the +labextension calls into it over JSON-RPC (see [](#how-the-extension-talks-to-the-library)). + +- `rpc/nb.py` — notebook compilation RPC endpoints. +- `rpc/kfp.py` — KFP operations (upload, run, list experiments). +- `rpc/katib.py` — Katib operations. +- `rpc/run.py` — the JSON-RPC dispatcher. +- `rpc/errors.py` — error types (`RPCNotFoundError`, etc.). + +## JupyterLab extension layout + +The JupyterLab extension lives at `labextension/`. It's a standard +JupyterLab 4 extension built with TypeScript and React, and it runs in +the user's browser. + +Key source files: + +- `src/index.ts` — extension activation. +- `src/widget.tsx` — the main Kale left sidebar widget. +- `src/widgets/LeftPanel.tsx` — top-level panel with the master toggle, + pipeline settings form, and Deploy button. +- `src/widgets/cell-metadata/CellMetadataEditor.tsx` — per-cell Kale row + (cell type dropdown, step name, dependency picker). +- `src/widgets/deploys-progress/` — progress notifications for running + deploys. +- `src/lib/RPCUtils.tsx` — JSON-RPC client that talks to the + Kale-managed Python kernel. +- `src/lib/CellUtils.ts`, `TagsUtils.ts`, `NotebookUtils.tsx` — helpers for + manipulating notebook metadata. +- `schema/kale-settings.json` — JupyterLab settings schema for any + user-facing preferences. + +The extension manages the lifecycle of its dedicated Python kernel (the +"Kale-managed kernel" from the previous section) and dispatches every UI +action through `RPCUtils` as a JSON-RPC call into that kernel. There is no +other server or network hop in between — the Jupyter kernel *is* the +execution environment for the Kale library. + +## Data flow end-to-end + +Putting all the pieces together, here's what happens when you click +**Compile and Run** in the Kale side panel on a notebook called +`my_notebook.ipynb`: + +``` +JupyterLab UI (React, in the browser) + │ RPCUtils.post("nb.compile_notebook", {path}) + ▼ +Kale-managed kernel (Python, in the Jupyter server) + │ kale/rpc/nb.compile_notebook() + ▼ +NotebookProcessor ── parse tags, build Pipeline DAG + │ + ▼ +Compiler ── render Jinja templates, autopep8 + │ + ▼ +.kale/my_notebook.kale.py ── plain KFP v2 DSL + │ + ▼ +kfp.compiler.Compiler ── compile DSL → YAML IR + │ + ▼ +KFP REST API ── upload pipeline, create run + │ + ▼ +Kubeflow Pipelines ── schedule step pods + │ + ▼ +Step pod ── load inputs, run user code, save outputs +``` + +The generated DSL is a normal KFP v2 pipeline — no runtime dependency on +Kale beyond the marshalling helper that lives inside each component. This +means a Kale-produced pipeline keeps running even if Kale is uninstalled, +and it can be inspected, edited, or re-uploaded by anyone with the KFP +SDK. diff --git a/docs/source/concepts/cell-types.md b/docs/source/concepts/cell-types.md new file mode 100644 index 000000000..6d6671577 --- /dev/null +++ b/docs/source/concepts/cell-types.md @@ -0,0 +1,170 @@ +# Cell Types & Annotations + +Kale reads Jupyter cell **tags** — strings stored under `metadata.tags` in the +`.ipynb` — to decide what role each cell plays in the generated pipeline. +This page documents every tag Kale understands, with examples. + +You can set these tags visually through the Kale JupyterLab side panel, or by +editing the notebook JSON directly. + +## The full tag vocabulary + +| Tag | Example | Effect | +| --------------------------- | ------------------------------- | ---------------------------------------------------------------------- | +| `imports` | - | Cell is prepended to every pipeline step. **All `import` statements must live in an `imports` cell.** | +| `functions` | - | Cell is prepended to every step after `imports`. Put function and class definitions here. | +| `pipeline-parameters` | - | Variables defined here become KFP pipeline parameters. | +| `pipeline-metrics` | - | `print()` statements in the cell are converted to KFP pipeline metrics. | +| `step:` | `step:train_model` | Declares (or appends to) a pipeline step named ``. | +| `prev:` | `prev:load_data` | Adds a dependency from the current step to ``. | +| `skip` | - | Cell is excluded from the pipeline entirely. | +| `annotation::` | `annotation:team:ml` | Adds a Kubernetes annotation to the step's pod. | +| `label::` | `label:env:prod` | Adds a Kubernetes label to the step's pod. | +| `limit::` | `limit:nvidia.com/gpu:1` | Adds a Kubernetes resource limit to the step's pod. | +| `image:` | `image:pytorch/pytorch:2.0` | Overrides the base image for this step only. | +| `cache:enabled` / `cache:disabled` | `cache:disabled` | Opts the step into or out of KFP's built-in caching. | + + +## Per-cell-type details + +### `imports` + +The `imports` cell is where **every module import in your notebook must +live**. Kale prepends this cell's source to every pipeline step's generated +component, so any step can assume those imports are available. + +```python +# tag: imports +import numpy as np +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +``` + +```{warning} +If you import a library in a `step` or `functions` cell, Kale will **not** +add it to the step's `packages_to_install` list, and the step will fail at +runtime with `ModuleNotFoundError` unless the base image happens to include +the package. +``` + +### `functions` + +Put function and class definitions here. Like `imports`, this cell is +prepended to every step. + +```python +# tag: functions +def clean(df: pd.DataFrame) -> pd.DataFrame: + return df.dropna() + +class FeaturePipeline: + def __init__(self, model): + self.model = model +``` + +Keep these definitions **pure**: no top-level executable statements, no +prints, no imports, no global state mutation. + +### `pipeline-parameters` + +Variables defined in a `pipeline-parameters` cell become top-level KFP +pipeline parameters. They become inputs to the `@kfp_dsl.pipeline` function +and can be overridden at submission time. + +```python +# tag: pipeline-parameters +learning_rate = 0.01 +batch_size = 128 +num_epochs = 10 +``` + +Supported parameter types are `int`, `float`, `str`, and `bool`. If you +declare the same parameter in multiple `pipeline-parameters` cells, the last +value wins. + +### `pipeline-metrics` + +Any `print(...)` statements in a `pipeline-metrics` cell are parsed out by +Kale's AST helper +({py:func}`kale.common.astutils.parse_metrics_print_statements`) and emitted +as KFP pipeline metrics, making them visible in the KFP UI's run metrics +tab. + +```python +# tag: pipeline-metrics +print("accuracy:", accuracy) +print("f1:", f1_score) +``` + +### `step:` + +The workhorse tag. Any cell tagged `step:data_processing` contributes code +to a pipeline step named `data_processing`. Multiple cells can share the +same step name — they will be concatenated in notebook order. + +```python +# tag: step:load_data +df = pd.read_csv("data.csv") +df = clean(df) +``` + +Dependencies are declared with `prev:`: + +```python +# tags: step:train, prev:load_data +model = RandomForestClassifier() +model.fit(df.drop("y", axis=1), df["y"]) +``` + +You can add as many `prev:` tags as you want — one per dependency. + +### Per-step configuration + +A step cell can carry additional tags to customize its pod spec: + +```python +# tags: step:train_gpu, prev:prepare_data, +# image:pytorch/pytorch:2.0-cuda12, +# limit:nvidia.com/gpu:1, +# annotation:team:ml, +# label:env:prod, +# cache:disabled +``` + +- **`image:`** — use a custom base image for just this step. +- **`limit::`** — request GPU, memory, or any other + resource (e.g. `limit:memory:8Gi`). +- **`annotation::`** / **`label::`** — add Kubernetes metadata + to the step's pod. Useful for cost allocation, scheduling hints, or + integration with observability tooling. +- **`cache:disabled`** — opt the step out of KFP's caching. Use `cache:enabled` + to force caching when it's been disabled globally. + +### `skip` + +Cells tagged `skip` are dropped from the pipeline. Use them for exploratory +code or debugging that you want to keep in the notebook but not run on the +cluster. + +```python +# tag: skip +df.describe() +df.plot.hist() +``` + +## Best practices + +- **Keep `imports` at the top** of the notebook. Don't spread imports across + cells — Kale won't pick them up. +- **Never mutate global state from inside a step**. If you need to configure + a library (e.g. `warnings.simplefilter`), do it once in an `imports` or + `functions` cell. +- **Use `pipeline-parameters` for values you want to tweak between runs**. + Resist hard-coding hyperparameters inside step cells. +- **Use `skip` liberally** during development for cells that don't belong in + the pipeline, like `df.head()` or plotting code. +- **Name your steps explicitly** — `step:load_data`, `step:train`, + `step:evaluate` — rather than leaving them auto-named. + +See [Troubleshooting](../user-guide/troubleshooting.md) for the common failure modes these +practices prevent. diff --git a/docs/source/concepts/compilation.md b/docs/source/concepts/compilation.md new file mode 100644 index 000000000..1f28293b3 --- /dev/null +++ b/docs/source/concepts/compilation.md @@ -0,0 +1,124 @@ +# Pipeline Compilation + +This page describes what actually happens when you run `kale --nb +my_notebook.ipynb`, step by step, so you can read the generated code, +debug problems, and extend Kale. + +## Stage-by-stage + +### 1. NotebookProcessor + +{py:class}`kale.processors.NotebookProcessor` is the entry point. Given a +notebook path and (optionally) a dictionary of metadata overrides, it: + +1. Reads the `.ipynb` via `nbformat`. +2. Extracts the Kale-specific notebook metadata (pipeline name, image, + experiment, volumes, ...). +3. Walks every cell and classifies it by its Kale tag. +4. Builds a {py:class}`kale.pipeline.Pipeline` — internally a + `networkx.DiGraph` — where each node is a {py:class}`kale.step.Step` + carrying its source code, dependencies, inputs and outputs. +5. Runs + {py:func}`kale.common.astutils.get_marshal_candidates` + on every step to resolve data dependencies between steps. + +The processor returns the `Pipeline` object along with the combined string +of `imports` + `functions` code, ready to be pasted into every generated +component. + +### 2. Compiler.generate_lightweight_component + +For each `Step` in the pipeline, the +{py:class}`kale.compiler.Compiler` renders +`kale/templates/nb_function_template.jinja2` to produce a +`@kfp_dsl.component` function. The template: + +- Starts with the shared `imports_and_functions` block so the step can use + the same modules and helpers as the notebook. +- Emits `marshal.load("")` calls for each input the step consumes. +- Pastes the original cell source code verbatim — Kale does not rewrite + your code, it just decorates it. +- Emits `marshal.save("", )` calls for each output the step + produces. +- Adds a `packages_to_install=[...]` list built from the `imports` cell via + {py:func}`kale.compiler.Compiler._get_package_list_from_imports`, which + walks the imports AST and resolves module names to pip package names. + +### 3. Compiler.generate_pipeline + +Next, the compiler renders +`kale/templates/pipeline_template.jinja2` to generate the top-level +`@kfp_dsl.pipeline` function. This template: + +- Declares the pipeline parameters from the `pipeline-parameters` cell. +- Instantiates each component in dependency order. +- Threads parameters into the tasks that need them. +- Wires task dependencies using KFP's `.after(...)` and input/output + references so KFP builds the same DAG Kale has in memory. + +### 4. Compiler.generate_dsl + +The compiler concatenates: + +- A header with imports for the KFP SDK and Kale marshal helpers. +- All component functions from stage 2. +- The pipeline function from stage 3. +- A `__main__` block that invokes `kfp.compiler.Compiler().compile()` so the + generated file can be executed directly. + +The final text is formatted with `autopep8` and written to +`.kale/.kale.py`. **This file is plain KFP v2 DSL** — no +Kale-specific runtime, just standard Kubeflow Pipelines code. + +### 5. Submission (optional) + +If you pass `--run_pipeline` (or the equivalent extension setting), +{py:func}`kale.common.kfputils.compile_pipeline` invokes the KFP SDK +compiler to turn the DSL into YAML IR, then uploads the pipeline and +starts a run via the KFP REST API. + +## The `.kale/` directory + +After a compile, you'll find the generated files in a `.kale/` directory +created in the current working directory (not necessarily the same directory +as the notebook): + +``` +.kale/ +├── my_notebook.kale.py # generated KFP v2 DSL +└── my_notebook.yaml # KFP YAML IR (if submission was triggered) +``` + +Inspecting this file is the fastest way to debug a misbehaving pipeline. +You can: + +- Read the `packages_to_install` list for each component to catch missing + imports. +- Read the `marshal.load(...)` / `marshal.save(...)` calls to confirm Kale + detected the right data dependencies. +- Run the file directly (`python .kale/my_notebook.kale.py`) to reproduce + KFP compilation errors locally. + +## Package detection from imports + +Kale pulls its `packages_to_install` lists out of your `imports` cell by +walking the AST. For a line like `import pandas as pd`, it records `pandas` +as a dependency of every step. For `from sklearn.ensemble import +RandomForestClassifier`, it records `scikit-learn` (Kale knows about some +module-name → pip-name mappings). + +This is why **all imports must live in `imports` cells**: any module used +from a `step` cell but imported elsewhere will be missed, and the +`@kfp_dsl.component` will not declare it in `packages_to_install`, causing +a `ModuleNotFoundError` at runtime. + +## Templates to read + +If you want to go deeper, these two files are the source of truth for the +generated code: + +- `kale/templates/nb_function_template.jinja2` — per-step component +- `kale/templates/pipeline_template.jinja2` — pipeline wrapper + +They're ~170 lines of Jinja combined, and they are the clearest way to +learn exactly what Kale emits. diff --git a/docs/source/concepts/data-passing.md b/docs/source/concepts/data-passing.md new file mode 100644 index 000000000..13c53da12 --- /dev/null +++ b/docs/source/concepts/data-passing.md @@ -0,0 +1,187 @@ +# Data Passing & Marshalling + +One of Kale's main jobs is making multi-step pipelines work as a single +notebook. You write code across many cells, Kale figures out which variables +need to move between the resulting pipeline steps, and emits the right +serialization calls for each object type. + +This page explains how that works. + +## Detecting data dependencies + +When you write this in a notebook: + +```python +# tag: step:load +df = pd.read_csv("data.csv") + +# tags: step:train, prev:load +model = train(df) + +# tags: step:evaluate, prev:train +score = model.score(df) +``` + +Kale needs to know that: + +- `df` must be saved at the end of `load` and loaded at the start of `train` + and `evaluate`. +- `model` must be saved at the end of `train` and loaded at the start of + `evaluate`. + +It discovers this by static AST analysis via +{py:func}`kale.common.astutils.get_marshal_candidates`. For each cell, Kale +walks the AST looking for: + +- Names assigned at the top level of the cell (these are candidates for + **outputs**). +- Names read but not assigned (these are candidates for **inputs**). +- Names that come from `imports` / `functions` cells (those are shared + across all steps, so they're not treated as data). + +The result is a set of variables each step consumes and produces. Kale then +intersects these sets across the dependency graph: if step `B` depends on +`A` and reads `df`, and `A` assigns `df`, then `df` becomes an artifact +flowing from `A` to `B`. + +## The marshalling system + +Once Kale knows _what_ to pass, it still needs to decide _how_ to serialize +it. Pickling everything with `dill` works for many Python objects, but +breaks down for: + +- Objects that hold native resources (TensorFlow session graphs, open files, + database cursors). +- Large objects that have a more efficient native format (numpy arrays, + parquet-able DataFrames). +- ML framework models, which have their own `save` / `load` contracts. + +Kale solves this with a small, extensible dispatcher system in +{py:mod}`kale.marshal`. + +### `MarshalBackend` and the `Dispatcher` + +Every supported type has a dedicated backend class that implements: + +- `save(obj, path)` — how to write this object to disk. +- `load(path)` — how to read it back. +- `object_type_pattern` — a regex matched against + `type(obj).__module__ + "." + type(obj).__qualname__` so the dispatcher + knows when to use this backend. + +At marshal time, the {py:class}`kale.marshal.backend.Dispatcher` inspects +the object's type, picks the first backend whose pattern matches, and falls +back to a generic `dill` backend if none does. + +### Built-in backends + +| Backend | Library | File extension | Matches | +| ------------------------ | ------------- | -------------- | --------------------------------------------- | +| `FunctionBackend` | (builtin) | `.pyfn` | Python `function` objects | +| `SKLearnBackend` | scikit-learn | `.joblib` | `sklearn.*` classes | +| `NumpyBackend` | numpy | `.npy` | `numpy.*` | +| `PandasBackend` | pandas | `.pdpkl` | `pandas.*(DataFrame|Series)` | +| `XGBoostModelBackend` | xgboost | `.json` | `xgboost.core.Booster` | +| `XGBoostDMatrixBackend` | xgboost | `.dmatrix` | `xgboost.core.DMatrix` | +| `PyTorchBackend` | pytorch | `.pt` | `torch.nn.modules.module.Module` (and subclasses) | +| `KerasBackend` | keras | `.keras` | `keras.*` | +| `TensorflowKerasBackend` | tensorflow | `.tfkeras` | `tensorflow.python.keras.*` | +| `DillBackend` (fallback) | dill | `.dillpkl` | Any object no other backend matches | + +### Extending the dispatcher + +You can add a new backend by subclassing +{py:class}`kale.marshal.backend.MarshalBackend` and registering it with the +`@register_backend` decorator. Your backend declares an +`object_type_pattern` and implements `save` / `load`. Kale will pick it up +automatically at compile time and inject the right calls into the generated +DSL. + +This is the recommended way to support new libraries: open an issue or a PR +with a new backend rather than forcing your objects through `dill`. + +## Common pitfalls + +The marshalling model is powerful, but it is still **static** — Kale can +only see what the AST shows. These are the patterns that trip people up. + +### Aliasing + +```python +# step:A +model1 = model2 = SomeModel() + +# step:B (prev: A) +model2.add_layer(SomeLayer()) + +# step:C (prev: B) +print(model1) +``` + +Kale saves `model1` and `model2` separately at the end of `A`, so mutating +`model2` in `B` has no effect on what `C` sees. **Solution**: don't alias +across steps — give each variable a single name and pass it explicitly. + +### Mutating global state + +```python +# imports +import warnings + +# step:A +warnings.simplefilter("ignore") +warnings.warn("A", DeprecationWarning) + +# step:B (prev: A) +warnings.warn("B", DeprecationWarning) +``` + +Step `B` runs in a fresh container, so `warnings.simplefilter("ignore")` +never happens there. **Solution**: configure global state in the `imports` +or `functions` cell, once, so every step gets the same configuration. + +### Non-serializable objects + +```python +# step:A +f = open("log.txt", "a") + +# step:B (prev: A) +f.write("hello") +``` + +Kale tries to pickle `f` at the end of `A` and fails — open file handles +can't be serialized. **Solution**: rebuild the resource at the start of each +step that needs it, typically via a helper in a `functions` cell: + +```python +# functions +def get_log_file(): + return open("log.txt", "a") + +# step:A +with get_log_file() as f: + f.write("A") + +# step:B (prev: A) +with get_log_file() as f: + f.write("B") +``` + +### Star imports + +```python +# imports +from mymodule import * + +# step:A +result = myfoo() +``` + +Kale can't see inside `mymodule`, so it doesn't know that `myfoo` comes from +there. It may try to marshal `myfoo` as an input to `A` and crash. +**Solution**: use explicit imports (`from mymodule import myfoo`). Star +imports should be avoided in Kale notebooks. + +See [Troubleshooting](../user-guide/troubleshooting.md) for the wider list of runtime +issues and how to diagnose them. diff --git a/docs/source/concepts/index.md b/docs/source/concepts/index.md new file mode 100644 index 000000000..02da06995 --- /dev/null +++ b/docs/source/concepts/index.md @@ -0,0 +1,125 @@ +# How Kale Works + +Kale turns an annotated Jupyter notebook into a Kubeflow Pipelines v2 pipeline +through a small, deterministic compilation pipeline. This page gives you a +mental model for each stage; the rest of the concepts section drills into +details. + +## The big picture + +``` + .ipynb notebook + │ + ▼ +┌─────────────────────┐ +│ NotebookProcessor │ parses cell tags, builds a DAG of Steps +└─────────────────────┘ + │ + ▼ +┌─────────────────────┐ +│ Dependency analysis │ static AST analysis finds shared variables +└─────────────────────┘ + │ + ▼ +┌─────────────────────┐ +│ Compiler │ renders Jinja2 templates → KFP v2 DSL +└─────────────────────┘ + │ + ▼ + .kale/.kale.py + │ + ▼ +┌─────────────────────┐ +│ KFP SDK │ compiles DSL → YAML IR → uploads to KFP +└─────────────────────┘ + │ + ▼ + Running pipeline on Kubeflow +``` + +## Stage 1 — Parsing the notebook + +When you run `kale --nb `, a +{py:class}`kale.processors.NotebookProcessor` reads the `.ipynb` file with +`nbformat` and walks every cell. Cell tags (stored in notebook metadata under +`tags`) tell Kale what to do with the cell: + +- Cells tagged `imports` or `functions` are collected and will be **prepended + to every pipeline step**, so every step has access to the same set of + modules and helpers. +- Cells tagged `pipeline-parameters` define the KFP parameters of the + generated pipeline. +- Cells tagged `step:` become pipeline steps. Multiple cells can share + the same step name and will be concatenated in notebook order. +- `prev:` tags add dependency edges between steps. +- `skip` cells are excluded entirely — useful for exploration code. + +The result is a {py:class}`kale.pipeline.Pipeline` object, which is a +{py:class}`networkx.DiGraph` where nodes are {py:class}`kale.step.Step` +instances. + +## Stage 2 — Finding data dependencies + +Having a DAG of steps is not enough. Kale also needs to know **which variables +flow between steps** — if step `B` reads a `DataFrame` that step `A` +produced, Kale has to save it in `A` and load it back in `B`. + +To figure this out, Kale uses +{py:func}`kale.common.astutils.get_marshal_candidates`, a static-analysis +helper that walks the cell's AST to find: + +- Names that are assigned in one step and read in another. +- Names that escape function bodies via return or assignment. +- Names introduced by the shared `imports` / `functions` cells (which are + **not** treated as data, because they are available everywhere). + +The output is a set of "marshal candidates" per step, which Kale later turns +into save/load calls in the generated DSL. + +## Stage 3 — Compiling to KFP v2 + +Once Kale has a DAG of steps, each annotated with its inputs and outputs, the +{py:class}`kale.compiler.Compiler` renders two Jinja2 templates: + +1. `nb_function_template.jinja2` generates a `@kfp_dsl.component` function + per step. Each component: + - Starts with the shared imports and function definitions. + - Loads inputs via Kale's marshalling dispatcher. + - Executes the original cell source code (verbatim). + - Saves any outputs that downstream steps will consume. + +2. `pipeline_template.jinja2` wraps all components in a single + `@kfp_dsl.pipeline` function that wires tasks together according to the + dependency graph and plumbs pipeline parameters through. + +The result is assembled, formatted with `autopep8`, and written to +`.kale/.kale.py`. You can read it — it's plain KFP v2 DSL — +and even hand-tweak it if you need to. + +## Stage 4 — Submitting to KFP + +When you pass `--run_pipeline`, Kale hands the generated script to +{py:mod}`kale.common.kfputils`, which invokes the KFP SDK compiler to turn +the DSL into YAML IR, uploads it to KFP, and starts a run via the KFP REST +API. The pipeline name, experiment name, and KFP host all come from your +notebook's Kale metadata or from command line overrides. + +## Why it's built this way + +Kale's compilation pipeline is intentionally a **pure, deterministic +transformation**: notebook → DSL. That means: + +- The output is just Python. No hidden runtime. You can inspect it, debug it, + version-control it, and run it without Kale installed (once the pipeline is + submitted, it's a KFP pipeline like any other). +- Cells run **as if they were in a single notebook** — the imports, functions + and parameters are available everywhere, and data passing happens behind the + scenes. +- Because the transformation is static, Kale can detect most common mistakes + at compile time instead of failing at pipeline runtime. + +## Dive deeper + +- [Cell Types & Annotations](cell-types.md) — the full tag vocabulary +- [Data Passing & Marshalling](data-passing.md) — how marshalling works and which types are supported +- [Pipeline Compilation](compilation.md) — the exact compilation pipeline, file by file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 000000000..c9fa4f1ca --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,183 @@ +# Copyright 2026 The Kubeflow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Sphinx configuration for the Kale documentation site.""" + +from __future__ import annotations + +import os +import sys + +# Make the kale package importable for autodoc. +sys.path.insert(0, os.path.abspath("../..")) + +# -- Project information ----------------------------------------------------- + +project = "Kale" +copyright = "2026, The Kubeflow Authors" +author = "The Kubeflow Authors" + +# -- General configuration --------------------------------------------------- + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx_autodoc_typehints", + "sphinx_copybutton", + "sphinx_design", + "myst_parser", +] + +templates_path = ["_templates"] +exclude_patterns: list[str] = [] + +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# -- HTML output ------------------------------------------------------------- + +html_theme = "furo" +html_title = "Kale" +html_logo = "_static/kale-symbol.svg" +html_favicon = "_static/kale-symbol.svg" +html_static_path = ["_static"] +html_css_files = ["custom.css"] + +# Furo theme options. Mirrors the Kubeflow SDK docs site so that Kale's +# documentation keeps visual parity with the rest of the Kubeflow project. +html_theme_options = { + "sidebar_hide_name": False, + "navigation_with_keys": True, + "top_of_page_buttons": ["view", "edit"], + "source_repository": "https://github.com/kubeflow/kale", + "source_branch": "main", + "source_directory": "docs/source/", + # Top navigation bar, implemented via Furo's announcement slot and + # styled by _static/custom.css (matches sdk.kubeflow.org). + "announcement": """ + + """, +} + +# -- Autodoc ----------------------------------------------------------------- + +autodoc_default_options = { + "members": True, + "member-order": "bysource", + "undoc-members": False, + "exclude-members": "__weakref__,__init__", + "show-inheritance": True, +} + +# Generate autosummary pages automatically (matches Kubeflow SDK setup). +autosummary_generate = True +autosummary_imported_members = True + +# Type hints: show types in the signature so the Kubeflow SDK-style CSS for +# ``dl.py dt.sig`` elements renders correctly. +autodoc_typehints = "signature" +typehints_use_signature = True +typehints_use_signature_return = True +typehints_fully_qualified = False +always_document_param_types = False +typehints_document_rtype = False + + +import typing as _typing + + +def typehints_formatter(annotation, config=None): # noqa: ARG001 + """Render the builtin ``type`` (and ``type[X]``) without a cross-reference. + + Several Kale classes expose a ``type`` attribute (``Artifact.type``, + ``VolumeConfig.type``), so a cross-reference to the builtin ``type`` + collides with them and Sphinx emits an ambiguous-target warning. Rendering + the builtin as plain literal text avoids the lookup entirely. + """ + if annotation is type: + return "``type``" + if _typing.get_origin(annotation) is type: + args = _typing.get_args(annotation) + inner = ", ".join( + getattr(a, "__qualname__", None) or getattr(a, "__name__", None) or repr(a) + for a in args + ) + return f"``type[{inner}]``" + return None + + +# -- Napoleon (Google-style docstrings) -------------------------------------- + +napoleon_google_docstring = True +napoleon_numpy_docstring = False +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True +napoleon_use_admonition_for_references = True +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_type_aliases = None + +# -- MyST Parser ------------------------------------------------------------- + +myst_enable_extensions = [ + "colon_fence", + "deflist", + "linkify", + "substitution", + "tasklist", +] +myst_heading_anchors = 3 + +# -- Intersphinx ------------------------------------------------------------- + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "kfp": ("https://kubeflow-pipelines.readthedocs.io/en/stable/", None), +} + +# -- Copy button ------------------------------------------------------------- + +copybutton_prompt_text = r">>> |\.\.\. |\$ " +copybutton_prompt_is_regexp = True + +# -- Suppress noisy warnings ------------------------------------------------- + +# Autodoc will frequently fail to import optional ML dependencies when Sphinx +# runs on Read the Docs; degrade those to warnings that don't fail the build. +nitpicky = False +suppress_warnings = ["autodoc.import_object", "config.cache"] diff --git a/docs/source/contributing.md b/docs/source/contributing.md new file mode 100644 index 000000000..8ea5a9b3a --- /dev/null +++ b/docs/source/contributing.md @@ -0,0 +1,127 @@ +# Contributing + +This page is the shorter, docs-site-friendly version of +[CONTRIBUTING.md](https://github.com/kubeflow/kale/blob/main/CONTRIBUTING.md) +in the repository. If you're about to open a PR, start here and then read +the full document for release-engineering details. + +## Before you start + +If you haven't already, skim [Architecture](architecture/index.md) — it's a quick tour +of the codebase and saves a lot of time when you need to find the right +file to change. + +## Prerequisites + +- **Python 3.11+** (3.12 is recommended). +- **uv** — installed automatically by `make dev`, or manually: + ```bash + curl -LsSf https://astral.sh/uv/install.sh | sh + ``` +- **Node.js 22+** and `jlpm` — only required if you're touching the + labextension. `jlpm` ships with JupyterLab. +- **A Kubernetes cluster** (optional) — only needed when testing + generated pipelines end-to-end. + +## Set up your environment + +```bash +git clone https://github.com/kubeflow/kale.git +cd kale + +make dev # one-time setup +make test # run backend + labextension tests +make jupyter # open JupyterLab with the extension loaded +``` + +You only need to re-run `make dev` after pulling changes that touch +`pyproject.toml` / `package.json`, or after `make clean`. + +## Make targets you'll use + +| Target | Description | +| ---------------------- | ------------------------------------------------ | +| `make dev` | One-time environment setup. | +| `make test` | Run all tests (backend + labextension). | +| `make test-backend` | Backend tests only. | +| `make test-backend-unit` | Fast unit tests only. | +| `make test-labextension` | Labextension tests only. | +| `make lint` | Run all linters. | +| `make lint-backend` | Ruff check on the Python package. | +| `make lint-labextension` | ESLint + Prettier on the TypeScript source. | +| `make format-backend` | Auto-fix Ruff findings. | +| `make build` | Build a wheel. | +| `make docs` | Build the docs site (this website). | +| `make docs-serve` | Build and serve docs locally on port 8000. | + +Run `make help` in the repository root for the full list. + +## Managing Python dependencies + +Edit `pyproject.toml` under the right section: + +- Runtime dependencies: `[project.dependencies]` +- JupyterLab runtime: `[project.optional-dependencies.jupyter]` +- Dev tools: `[project.optional-dependencies.dev]` +- Documentation: `[project.optional-dependencies.docs]` + +Then update the lockfile with `make lock` and sync the environment with +`uv sync --all-extras`. + +## Pre-commit hooks + +Install pre-commit hooks with: + +```bash +uv run pre-commit install +``` + +Installed hooks: + +- `uv-lock` — ensure `uv.lock` is in sync. +- `trailing-whitespace`, `end-of-file-fixer` — standard hygiene. +- `ruff` — Python linting and formatting. + +## Contributing to the docs + +This docs site lives under `docs/source/` and is built with Sphinx + Furo ++ MyST Parser. To add or edit a page: + +1. Drop a new `.md` or `.rst` file into the appropriate directory under + `docs/source/`. +2. Reference it from the toctree in `docs/source/index.md` so it appears + in the sidebar. +3. Run `make docs` to build the site locally. +4. Run `make docs-serve` to view it at . + +API reference pages use Sphinx autodoc via `.. automodule::`. If you're +adding or modifying public Python API, make sure you use Google-style +docstrings (Napoleon is enabled) and add/update the corresponding page in +`docs/source/api/`. + +## Development checklist + +Before opening a pull request: + +1. `make test` — all tests pass. +2. `make lint` — no linter findings. +3. If you changed the generated KFP DSL, update the golden fixtures under + `kale/tests/assets/kfp_dsl/`. +4. If you changed public Python API, update the `docs/source/api/` pages. +5. If you added or changed a user-visible feature, update the relevant + pages under `docs/source/user-guide/` or `docs/source/concepts/`. +6. Write a clear commit message describing the change and the motivation. + +## Releasing + +Release procedures are documented in +[RELEASE.md](https://github.com/kubeflow/kale/blob/main/RELEASE.md). If you +don't have publish rights, you don't need to worry about this file. + +## Getting help + +- **GitHub issues** — [github.com/kubeflow/kale/issues](https://github.com/kubeflow/kale/issues) +- **Slack** — `#kubeflow-ml-experience` on the Kubeflow Slack workspace +- **WG meetings** — ML Experience WG on the Kubeflow community calendar + +Happy hacking! diff --git a/docs/source/getting-started/installation.md b/docs/source/getting-started/installation.md new file mode 100644 index 000000000..9500c3553 --- /dev/null +++ b/docs/source/getting-started/installation.md @@ -0,0 +1,102 @@ +# Installation + +This page walks you through the prerequisites, installation options, and a +quick sanity check for a working Kale setup. + +## Prerequisites + +| Requirement | Version | Notes | +| ------------------ | ------------------ | --------------------------------------------------------------------------------------- | +| Python | 3.11 or later | Kale uses modern typing features; older Pythons are not supported. | +| Kubeflow Pipelines | **v2.16.0+** | Older KFP servers reject the `securityContext` field Kale emits for step pods. | +| Kubernetes cluster | any | `minikube`, `kind`, Docker Desktop, or a managed cluster all work. | +| JupyterLab | 4.0+ (for the UI) | Only required if you want to use the Kale side panel inside JupyterLab. | + +Install KFP by following the +[official Kubeflow Pipelines installation guide](https://www.kubeflow.org/docs/components/pipelines/operator-guides/installation/). +Make sure to set `PIPELINE_VERSION=2.16.0` or later. If you are upgrading an +older environment, ensure your Python deps include `kfp[kubernetes]>=2.16.0`. + +## Install from PyPI + +```{admonition} Kale v2.0 pre-release +:class: important + +Kale v2.0 is not yet published on PyPI. Until it is, use the "Install from +source" instructions below. This section will become the recommended path +once v2.0 ships. +``` + +When v2.0 is released, you will be able to install the package directly: + +```bash +pip install "jupyterlab>=4.0.0" "kubeflow-kale[jupyter]" +jupyter lab +``` + +## Install from source + +Clone the repository and use the `make` targets provided by the project: + +```bash +git clone https://github.com/kubeflow/kale.git +cd kale + +make dev # Install Python + Node deps, build and link the labextension +make jupyter # Start JupyterLab with the Kale panel +``` + +`make dev` takes care of: + +- Installing [uv](https://github.com/astral-sh/uv) if it is not already present. +- Running `uv sync --all-extras` to create the virtual environment and install + the Python package in editable mode. +- Building the JupyterLab 4 extension and linking it so it appears in your + JupyterLab UI. +- Installing pre-commit hooks for linting. + +See [Contributing](../contributing.md) for a full breakdown of the available `make` targets +and the development workflow. + +## Try Kale in Docker (no cluster required) + +If you just want to play with Kale in an isolated environment, the repo ships +with a Dockerfile based on the official Kubeflow `jupyter-scipy` image: + +```bash +make docker-build # Build Kale wheels and bake them into the image +make docker-run # Start JupyterLab on http://localhost:8889 +``` + +To also connect to a KFP cluster, follow the multi-terminal setup in +[Running Pipelines](../user-guide/running-pipelines.md). + +## Verify your installation + +1. **Start a cluster and KFP port-forward** (minikube example): + + ```bash + minikube start + kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80 + ``` + +2. **Test the CLI** against an example notebook: + + ```bash + kale --nb examples/base/candies_sharing.ipynb \ + --kfp_host http://127.0.0.1:8080 \ + --run_pipeline + ``` + + This compiles the notebook into `.kale/.kale.py`, uploads the + pipeline, and starts a run. + +3. **Test the JupyterLab extension**: + - Open JupyterLab (`make jupyter` or `jupyter lab`). + - Open any notebook from `examples/base/`. + - Click the Kale icon in the left sidebar. + - Toggle the Kale panel on — you should see cell type dropdowns appear on + each notebook cell. + +If any of these steps fail, head to [Troubleshooting](../user-guide/troubleshooting.md) — the +most common issues are covered there. diff --git a/docs/source/getting-started/quickstart.md b/docs/source/getting-started/quickstart.md new file mode 100644 index 000000000..bd837d46a --- /dev/null +++ b/docs/source/getting-started/quickstart.md @@ -0,0 +1,146 @@ +# Quickstart + +This walk-through takes you from a stock Kale install to a running pipeline +on Kubeflow Pipelines in under ten minutes, using the `candies_sharing` +example that ships with the repository. + +The recommended path is the **JupyterLab UI**: you annotate cells, compile, +and submit the pipeline without ever leaving the notebook. If you'd rather +drive Kale from a terminal, the [CLI flow](#advanced-cli-flow) at the bottom +of this page covers the same journey. + +## Prerequisites + +Before you start, make sure you have: + +- Kale installed, including the JupyterLab extension (see [Installation](installation.md)). +- A running Kubernetes cluster with Kubeflow Pipelines v2.16.0+ deployed. +- The KFP API reachable on `http://127.0.0.1:8080` — for a minikube setup you + can run: + ```bash + kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80 + ``` + +## 1. Launch JupyterLab with Kale + +From the repository root, start the bundled JupyterLab environment: + +```bash +make jupyter +``` + +Then open `examples/base/candies_sharing.ipynb` from the file browser. The +notebook defines a toy pipeline that demonstrates every Kale concept in the +minimum amount of code, and it ships already annotated with Kale tags. + +![JupyterLab right after opening `examples/base/candies_sharing.ipynb` — file browser on the left, notebook in the main area.](../../imgs/quickstart-01-jupyterlab.png) + +## 2. Open the Kale side panel + +Click the Kale icon in the JupyterLab left sidebar to toggle the Kale +Deployment Panel. This is the control surface you'll use for the rest of the +quickstart: it inspects cell tags, compiles the notebook, and submits runs +to Kubeflow Pipelines. + +![The Kale side panel expanded on the left, showing the pipeline metadata fields (name, description, experiment).](../../imgs/quickstart-02-kale-panel.png) + +## 3. Review the cell tags + +Each cell now has a dropdown showing its Kale cell type. The +`candies_sharing` notebook uses most of Kale's core tag types: + +- **Imports** — all `import` statements go here. Kale prepends this cell to + every step in the pipeline. +- **Pipeline Parameters** — defines values that will become KFP parameters, + tweakable at submission time. +- **Step** — one or more named steps, each with optional dependencies on + earlier steps (declared via `prev:`). + +![Notebook cells with the Kale tag dropdowns visible (imports / pipeline parameters / step).](../../imgs/quickstart-03-cell-tags.png) + +See [Cell Types & Annotations](../concepts/cell-types.md) for the full tag vocabulary. + +## 4. Configure the pipeline metadata + +In the Kale side panel, confirm the basics: + +- **Pipeline name** — defaults to the notebook filename. +- **Experiment** — defaults to `Default` (or the first available KFP + experiment). + +![The side panel with pipeline name, experiment, and description fields filled out.](../../imgs/quickstart-04-panel-config.png) + +## 5. Compile and run from the panel + +Click **Compile and Run** at the bottom of the Kale panel. Kale will, in +order: + +1. Parse the notebook and extract the Kale tags from cell metadata. +2. Build a pipeline DAG (Directed Acyclic Graph) from the `step` and `prev:` annotations. +3. Detect which variables need to flow between steps. +4. Generate a KFP v2 DSL Python script under `.kale/`. +5. Upload the pipeline to KFP and start a new run in the selected experiment. + +Each phase updates in place in the panel, with a link to the generated +`.kale/.kale.py` once it exists. + +![The Kale panel mid-run, with the phase indicators (parse → compile → upload → run) visible.](../../imgs/quickstart-05-compile-run.png) + +## 6. Watch the run in the KFP UI + +When the upload finishes, the panel shows a **View run** link pointing at +the Kubeflow Pipelines UI. Follow it to watch the DAG execute step-by-step; +click any step to see its logs, artifacts, and the data Kale marshalled in +and out of it. + +![The KFP UI run page with the `candies_sharing` DAG in progress or completed.](../../imgs/quickstart-06-kfp-run.png) + +## What's next? + +- Learn how Kale detects and moves data between steps in + [Data Passing & Marshalling](../concepts/data-passing.md). +- Explore the rest of the panel (volumes, snapshots, parameters) in + [Running Pipelines](../user-guide/running-pipelines.md). +- Browse the [examples](https://github.com/kubeflow/kale/tree/main/examples) gallery for more realistic pipelines. + +--- + +## Advanced: CLI flow + +If you'd rather drive Kale from a terminal — for example in CI, on a remote +box without a JupyterLab install, or when scripting multi-notebook builds — +you can do the same thing with the `kale` CLI. + +### Compile the notebook + +```bash +kale --nb examples/base/candies_sharing.ipynb +``` + +Look inside the `.kale/` directory that was just created: + +```bash +ls .kale/ +# candies_sharing.kale.py ← generated KFP v2 DSL +``` + +Open `candies_sharing.kale.py` — you'll see one `@kfp_dsl.component` function +per step, a `@kfp_dsl.pipeline` function wiring them together, and a +`__main__` block that you can run directly to compile the pipeline to YAML. + +### Compile and submit in one step + +Add `--run_pipeline` to compile **and** submit the pipeline to KFP in one +shot: + +```bash +kale --nb examples/base/candies_sharing.ipynb \ + --kfp_host http://127.0.0.1:8080 \ + --run_pipeline +``` + +This uploads the pipeline, creates an experiment (default: +`Kale-Pipeline-Experiment`), and starts a run. Open the KFP UI at + and navigate to **Runs** to watch it execute. + +See [CLI Reference](../api/cli.md) for the complete CLI reference. diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 000000000..71d1dacd9 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,155 @@ +--- +hide-toc: true +--- + +# Kale + +```{raw} html +
+

From Jupyter Notebook to Kubeflow Pipeline — Zero Boilerplate

+
+``` + +**Kale** (Kubeflow Automated pipeLines Engine) turns annotated Jupyter notebooks +into production-ready [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) +without requiring you to write a single line of KFP SDK code. + +Tag cells in your notebook, let Kale figure out the data dependencies between +them, and compile the whole thing into a KFP v2 pipeline you can run on any +Kubeflow cluster. + + +## Why Kale? + +::::{grid} 1 2 2 2 +:gutter: 3 + +:::{grid-item-card} No SDK boilerplate +Annotate cells, compile, run. Kale generates the KFP v2 DSL for you — no need +to learn components, artifacts, or Python decorators. +::: + +:::{grid-item-card} Automatic data passing +Variables flow between steps as if you were still in a single notebook. Kale's +type-aware marshalling handles numpy, pandas, scikit-learn, PyTorch, Keras, +TensorFlow, XGBoost and more. +::: + +:::{grid-item-card} JupyterLab integration +Tag cells visually, define step dependencies, and submit pipelines from the +Kale side panel inside JupyterLab 4. +::: + +:::{grid-item-card} KFP v2 native +Compiles to the modern KFP v2 pipeline DSL with full artifact support. Runs on +any compliant Kubeflow Pipelines backend. +::: +:::: + +## Get started + +::::{grid} 1 2 2 2 +:gutter: 3 + +:::{grid-item-card} {octicon}`rocket` Quickstart +:link: getting-started/quickstart +:link-type: doc + +Compile and run your first notebook on Kubeflow Pipelines in a few minutes. +::: + +:::{grid-item-card} {octicon}`book` Core Concepts +:link: concepts/index +:link-type: doc + +Understand cell annotations, data marshalling, and how Kale compiles to KFP. +::: + +:::{grid-item-card} {octicon}`tools` User Guide +:link: user-guide/annotating-notebooks +:link-type: doc + +Practical walkthroughs for annotating, parameterizing, and running pipelines. +::: + +:::{grid-item-card} {octicon}`code` API Reference +:link: api/pipeline +:link-type: doc + +Python API for the Pipeline, Step, Compiler and marshalling modules. +::: +:::: + +## Kale in the Kubeflow ecosystem + +Kale is part of the Kubeflow **ML Experience Working Group**, alongside the +[Kubeflow SDK](https://sdk.kubeflow.org/), [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/) +and [Kubeflow Notebooks](https://www.kubeflow.org/docs/components/notebooks/). +It lives at the notebook layer — where data scientists prototype — and bridges +the gap to the pipeline layer, where production workloads run. + +If KFP is the "how" of running ML pipelines on Kubernetes, Kale is the "what +you meant": take the notebook you already have, and turn it into a pipeline +without rewriting anything. + +## Community + +- **GitHub**: [kubeflow/kale](https://github.com/kubeflow/kale) +- **Slack**: [#kubeflow-ml-experience](https://kubeflow.slack.com/) on the Kubeflow workspace +- **Working group**: ML Experience WG meetings — see the [Kubeflow community calendar](https://www.kubeflow.org/docs/about/community/) +- **Issues & feature requests**: [github.com/kubeflow/kale/issues](https://github.com/kubeflow/kale/issues) + +```{toctree} +:hidden: +:caption: Getting Started + +why-kale +getting-started/installation +getting-started/quickstart +``` + +```{toctree} +:hidden: +:caption: Core Concepts + +concepts/index +concepts/cell-types +concepts/data-passing +concepts/compilation +``` + +```{toctree} +:hidden: +:caption: User Guide + +user-guide/annotating-notebooks +user-guide/pipeline-parameters +user-guide/running-pipelines +user-guide/troubleshooting +``` + +```{toctree} +:hidden: +:caption: Architecture + +architecture/index +``` + +```{toctree} +:hidden: +:caption: API Reference + +api/pipeline +api/step +api/compiler +api/marshal +api/cli +``` + +```{toctree} +:hidden: +:caption: Project + +roadmap +contributing +``` diff --git a/docs/source/roadmap.md b/docs/source/roadmap.md new file mode 100644 index 000000000..a1f7ca841 --- /dev/null +++ b/docs/source/roadmap.md @@ -0,0 +1,93 @@ +# Roadmap + +Kale is under active development again after a hiatus, with a re-energized +maintainer team and a growing roster of contributors. This page captures the +current direction in broad strokes — it is intentionally not a detailed spec. +Things will move around as the community gives feedback. + +## Where we are + +Kale **v2.0** is the headline release for this cycle. It brings the project +back in sync with the Kubeflow ecosystem: + +- Full compatibility with **Kubeflow Pipelines v2** (the KFP v2 DSL and YAML + IR, artifact model, and component spec). +- A **modernized JupyterLab 4.x extension** rewritten for the current + labextension API. +- **Python 3.11+** support across the backend. +- A cleaner, testable compiler pipeline with golden-file fixtures for the + generated KFP DSL. + +Tracking issue: [kubeflow/kale#457 — Road to 2.0](https://github.com/kubeflow/kale/issues/457). + +## What we're focused on now + +### GSoC 2026: composable notebooks + +Kale is participating in **Google Summer of Code 2026** under the Kubeflow +umbrella. The accepted project focuses on **multi-notebook coordination and +composable pipelines** — letting users build pipelines that span more than +one notebook, and composing them into larger workflows. This is a concrete, +community-driven effort landing in Kale over the coming months, and it +shapes a lot of the near-term priorities below. + +If you want to contribute, the GSoC project is a great way to get involved. +Subscribe to the GitHub milestone for that work, drop a note in the +ML Experience WG meeting, or ping us on Slack. + +## Directional themes + +These are high-level directions the maintainers are aligning around. None of +them are committed features yet; they indicate _where_ we expect Kale to go, +not a fixed schedule. + +### Deeper notebook experience + +The notebook is where Kale differentiates, and we want to make that +experience richer: + +- **Incremental execution** — compile and run only the parts of a pipeline + that changed since the last run, without recomputing upstream steps. +- **Dependency visualization** — surface Kale's DAG view directly in the + side panel so you can see the pipeline shape while editing. +- **Run sweeps and experiment comparison** — easy fan-out across parameter + grids, with side-by-side metrics comparisons in the KFP UI. + +### From development to production + +- **Local execution mode** — run a Kale pipeline end-to-end on the local + machine for fast feedback, without a Kubernetes cluster in the loop. +- **Kubeflow SDK integration** — tighter alignment with the emerging + [Kubeflow SDK](https://sdk.kubeflow.org/), so that Kale-generated + pipelines can interop with hand-written SDK pipelines and components. +- **Artifact management and model registry** — better default support for + typed artifacts, model cards, and registries so that pipelines can hand + off to serving and evaluation infrastructure without glue code. + +### Community and ecosystem + +- **Documentation** — that's what this site is about. Expect the docs to + grow in scope as features land. +- **Contributor onboarding** — simpler setup, better `make` targets, + clearer contribution docs, more "good first issue" labeling. +- **Ecosystem alignment** — closer collaboration with the Kubeflow ML + Experience WG, KFP, Notebooks, and Katib maintainers. + +## There's much more to come + +The maintainers are happy to say: this is the most active Kale has been in +years, and we're treating the roadmap as a living document. Expect more +detail to appear on this page as concrete designs emerge from the WG. + +## Get involved in shaping the roadmap + +- **GitHub issues and discussions** — file issues for bugs, feature + requests, and design discussions. Label suggestions welcome. +- **Milestones and project boards** — see the + [milestones](https://github.com/kubeflow/kale/milestones) page for + in-flight work. +- **ML Experience Working Group meetings** — bi-weekly on the Kubeflow + community calendar. Kale roadmap updates are a recurring agenda item. +- **Slack** — `#kubeflow-ml-experience` on the Kubeflow Slack workspace. + +The best way to influence the roadmap is to show up with a use case. diff --git a/docs/source/user-guide/annotating-notebooks.md b/docs/source/user-guide/annotating-notebooks.md new file mode 100644 index 000000000..ff126520e --- /dev/null +++ b/docs/source/user-guide/annotating-notebooks.md @@ -0,0 +1,113 @@ +# Annotating Notebooks + +Kale turns cell tags into pipeline structure. You can set those tags two +ways: + +1. **From the Kale JupyterLab extension** — point-and-click, recommended for + interactive development. +2. **By editing notebook metadata directly** — useful for scripting, code + review, and when you don't have JupyterLab running. + +Both produce the same tags, so the resulting `.ipynb` looks identical either +way. + +## Using the JupyterLab extension + +After you run `make jupyter` (or start JupyterLab any other way with the +Kale extension installed), open a notebook and click the Kale icon in the +left sidebar. The Kale panel appears with a master toggle at the top. + +When you enable Kale, two things happen: + +1. Every notebook cell grows a Kale metadata row at the top showing its cell + type (Imports, Functions, Step, ...), step name, and dependencies. +2. The side panel unlocks pipeline-level settings: pipeline name, experiment + name, and description. + +### Setting a cell's type + +Click the cell type dropdown on the cell's Kale row. You'll see: + +- **Imports** — tag the cell as `imports`. +- **Functions** — tag the cell as `functions`. +- **Pipeline Parameters** — tag the cell as `pipeline-parameters`. +- **Pipeline Metrics** — tag the cell as `pipeline-metrics`. +- **Step** — tag the cell as a pipeline step. When you pick this, you'll be + prompted for the step name and given a dropdown of possible dependencies + (other steps in the notebook). +- **Skip Cell** — tag the cell as `skip`. + +### Setting step dependencies + +For cells tagged as `Step`, the panel lets you pick zero or more previous +steps. Each choice becomes a `prev:` tag. Kale validates the choices +so you can't create cycles. + +### Pipeline-level settings + +The side panel's pipeline settings form is mapped to fields on +{py:class}`kale.pipeline.PipelineConfig`. Changes are saved into the +notebook's top-level metadata under the `kubeflow_noteobok` key, so they +travel with the notebook and show up on the next open. + +### Submitting + +The **Compile and Run** button invokes the Kale backend from the JupyterLab +extension, which compiles the pipeline with exactly the same code path as +`kale --nb ...` on the CLI, then uploads and runs it against the configured +KFP host. + +## Editing metadata by hand + +Each cell in a notebook is JSON. Kale tags live in `metadata.tags` as a list +of strings. A minimal step cell looks like this: + +```json +{ + "cell_type": "code", + "metadata": { + "tags": ["step:load_data"] + }, + "source": [ + "df = pd.read_csv('data.csv')\n" + ] +} +``` + +A step with a dependency and a GPU request: + +```json +{ + "cell_type": "code", + "metadata": { + "tags": [ + "step:train", + "prev:load_data", + "limit:nvidia.com/gpu:1", + "image:pytorch/pytorch:2.0-cuda12" + ] + }, + "source": ["..."] +} +``` + +Pipeline-level Kale settings live on the notebook (not on a cell) under the +`metadata.kubeflow_noteobok` key — these are the same fields the side panel +exposes. + +## Organising a notebook for Kale + +A notebook that compiles well with Kale usually follows this order: + +1. **One `imports` cell** at the top with every `import` statement. +2. **One or more `functions` cells** below with pure function and class + definitions. +3. **One `pipeline-parameters` cell** declaring tunable inputs. +4. **A sequence of `step` cells**, each doing one logical thing, with + `prev:` tags describing the DAG. +5. **Optional `pipeline-metrics` cells** at the end of training steps to + surface accuracy / loss / etc. in the KFP UI. +6. **`skip` cells** wherever you want exploratory code to live without + affecting the pipeline. + +See the [examples](https://github.com/kubeflow/kale/tree/main/examples) gallery for notebooks that follow this pattern. diff --git a/docs/source/user-guide/pipeline-parameters.md b/docs/source/user-guide/pipeline-parameters.md new file mode 100644 index 000000000..307b69345 --- /dev/null +++ b/docs/source/user-guide/pipeline-parameters.md @@ -0,0 +1,85 @@ +# Pipeline Parameters + +Pipeline parameters let you change a pipeline's behavior at submission time +without recompiling. In Kale, they come from a dedicated cell type: + +```python +# tag: pipeline-parameters +learning_rate = 0.01 +batch_size = 128 +num_epochs = 10 +model_name = "rf" +use_gpu = False +``` + +Everything you assign in this cell becomes a top-level KFP pipeline +parameter of the matching type. + +## Supported types + +Kale infers the parameter type from the Python literal you assign. Supported +types: + +| Python literal | KFP parameter type | +| ------------------ | ------------------ | +| `int` (e.g. `10`) | `int` | +| `float` (e.g. `0.01`) | `float` | +| `str` (e.g. `"rf"`) | `str` | +| `bool` (e.g. `True`) | `bool` | + +Anything else — lists, dicts, NumPy scalars, objects — will either fail at +compile time or fall back to string encoding. Stick to the four basic types +unless you know what you're doing. + +## How parameters are used in steps + +Once a parameter is declared, you can use it in any step cell as if it were +a local variable: + +```python +# tags: step:train, prev:load +model = train(df, lr=learning_rate, epochs=num_epochs) +``` + +Kale wires `learning_rate` and `num_epochs` into the `train` component's +inputs automatically, using KFP's pipeline-parameter plumbing. + +## Multiple parameter cells + +You can have more than one `pipeline-parameters` cell. Kale concatenates +them in notebook order, so the **last definition of a parameter wins**. +This is sometimes convenient for overriding defaults near where they are +used, but it can also make notebooks hard to read — prefer a single +parameters cell. + +## Overriding parameters at submission time + +When you click **Compile and Run** (or pass `--run_pipeline` on the CLI), +Kale submits a pipeline run with default parameter values. To override +them: + +- **In the KFP UI**: after the pipeline is uploaded, start a new run from + the Pipelines tab. The KFP UI will prompt you for each parameter. +- **Via the KFP SDK**: load the compiled YAML IR and call + `client.run_pipeline(..., params={...})`. This is the recommended way to + kick off runs programmatically. + +## Viewing parameters in the KFP UI + +Every run page shows the parameter values it was started with, so you can +correlate pipeline outputs with the inputs that produced them. Use this in +combination with `pipeline-metrics` cells to build lightweight experiment +tracking without needing a separate tracking service. + +## Defaults vs. placeholders + +The value you assign in the `pipeline-parameters` cell is the **default**. +It's the value used when no override is provided at submission time. So: + +```python +# tag: pipeline-parameters +batch_size = 128 # sensible default +``` + +reads naturally as "pipeline parameter `batch_size` with default 128", and +you can still override it on a per-run basis. diff --git a/docs/source/user-guide/running-pipelines.md b/docs/source/user-guide/running-pipelines.md new file mode 100644 index 000000000..88397377c --- /dev/null +++ b/docs/source/user-guide/running-pipelines.md @@ -0,0 +1,111 @@ +# Running Pipelines + +Once your notebook is annotated, you can compile and run it three ways: + +1. **CLI** — `kale --nb ...`, good for scripts and CI. +2. **JupyterLab extension** — interactive Compile and Run button. +3. **Compile-only** — inspect the generated KFP DSL before submitting. + +All three call into the same underlying Python API, so the behavior is +identical. + +## From the command line + +The `kale` CLI is the fastest path to a running pipeline. The core +invocation is: + +```bash +kale --nb path/to/notebook.ipynb +``` + +This compiles the notebook into `.kale/.kale.py` and exits. +To also submit the pipeline to a running KFP instance, add: + +```bash +kale --nb path/to/notebook.ipynb \ + --kfp_host http://127.0.0.1:8080 \ + --run_pipeline +``` + +### Useful CLI flags + +| Flag | Effect | +| ---------------------- | -------------------------------------------------------------- | +| `--nb` | Path to the notebook (required). | +| `--kfp_host` | KFP API endpoint for upload and run. | +| `--upload_pipeline` | Upload the pipeline without starting a run. | +| `--run_pipeline` | Upload *and* create a run. | +| `--pipeline_name` | Override the pipeline name (default comes from notebook metadata). | +| `--experiment_name` | Override the KFP experiment (default `Kale-Pipeline-Experiment`). | +| `--pipeline_description` | Set a pipeline description shown in the KFP UI. | +| `--docker_image` | Override the default base image for all steps. | +| `--debug` | Keep intermediate files and print verbose logs. | + +See [CLI Reference](../api/cli.md) for the complete list. + +## From the JupyterLab extension + +Open your notebook in JupyterLab, click the Kale icon in the left sidebar, +and toggle the Kale panel on. At the bottom of the panel you'll see: + +- **Pipeline Name** and **Experiment Name** — override notebook defaults. +- **Docker Image** — base image used for every step that doesn't declare + its own via an `image:` tag. +- **Compile and Save** — generate the KFP DSL only. +- **Compile and Run** — generate, upload, and start a run. + +The Deploy button streams progress through a notification area at the top +of the panel, and surfaces the KFP run URL when the run is created so you +can click straight through to the KFP UI. + +## Compile-only mode + +When you want to read or debug the generated code before sending it to KFP, +skip the `--run_pipeline` flag on the CLI (or use **Compile and Save** in +the extension). You'll end up with: + +``` +.kale/ +├── my_notebook.kale.py +└── my_notebook.yaml # KFP YAML IR (produced when running the DSL) +``` + +The `.kale.py` file is pure KFP v2 DSL. The `.yaml` file is the compiled +pipeline IR that can be manually uploaded to the KFP UI without using Kale's +"Compile and Run" button. You can: + +- Read it line by line to verify that your step dependencies, inputs, and + outputs look right. +- Run it directly (`python .kale/my_notebook.kale.py`) to reproduce KFP + compilation errors locally. +- Edit it to experiment with changes before committing them to the + notebook. + +## Monitoring runs + +Once a run is submitted, open the KFP UI and navigate to **Runs**. You can: + +- Watch step status in real time. +- Click a step to see its logs, the generated component source, and + artifact inputs/outputs. +- See pipeline parameters and pipeline metrics in the run summary. +- Compare two runs side by side from the Runs list. + +Kale doesn't add any custom tracking on top of KFP — everything runs +through the standard KFP backend, so anything you can do with a hand-rolled +KFP pipeline, you can also do with a Kale-generated one. + +## Environment variables + +A few environment variables are useful when running Kale: + +| Variable | Purpose | +| ------------------------ | ------------------------------------------------------------------------------------------ | +| `KF_PIPELINES_ENDPOINT` | Default KFP API endpoint if `--kfp_host` is not set. | +| `KF_PIPELINES_UI_ENDPOINT` | KFP UI URL used when Kale renders run links. | +| `KALE_PIP_INDEX_URLS` | Comma-separated pip index URLs baked into the generated components (used for local dev). | +| `KALE_PIP_TRUSTED_HOSTS` | Trusted hosts for HTTP pip index URLs. | + +The last two are most useful when testing an unpublished version of Kale +against a local KFP cluster — see the "Testing with KFP Clusters" section +of [Contributing](../contributing.md). diff --git a/docs/source/user-guide/troubleshooting.md b/docs/source/user-guide/troubleshooting.md new file mode 100644 index 000000000..d858c05e6 --- /dev/null +++ b/docs/source/user-guide/troubleshooting.md @@ -0,0 +1,118 @@ +# Troubleshooting + +Most Kale problems fall into one of a few categories. This page covers the +common ones. For anything not listed here, open an issue on +[GitHub](https://github.com/kubeflow/kale/issues) with the notebook and the +generated `.kale/.kale.py`. + +## `ModuleNotFoundError` in a pipeline step + +You'll see this most often on your first pipeline. A step fails with: + +``` +ModuleNotFoundError: No module named 'seaborn' +``` + +even though you can clearly `import seaborn` in your notebook. + +**Cause.** Kale **does not rebuild a Docker image** for your pipeline. It +uses the base image configured in pipeline metadata (or overridden with +`--docker_image`), and it installs extra Python packages at step startup +via `packages_to_install`. That list is built from the `imports` cell only +— if you import a package in a `step` or `functions` cell, Kale won't add +it to the install list and the step will fail. + +**Fix.** + +- Move every `import` statement into an `imports` cell. +- For packages the AST can't resolve to a pip name, add them to the + pipeline's base docker image (or build a custom one). + +See [Cell Types & Annotations](../concepts/cell-types.md) for details on the imports rule. + +## Pickle / marshal errors + +``` +TypeError: cannot pickle '_thread.RLock' object +``` + +or + +``` +AttributeError: Can't pickle local object 'train..inner' +``` + +**Cause.** A variable flowing between steps can't be serialized. Kale's +marshalling system handles many ML-framework types natively (numpy, pandas, +sklearn, PyTorch, Keras, TF, XGBoost), but falls back to `dill` for +anything unknown. `dill` can serialize a lot, but not: + +- Open file handles, sockets, database connections. +- Objects holding native thread locks or OS resources. +- Closures that capture local variables. + +**Fix.** + +- Don't pass the offending object between steps. Recreate it from scratch + at the start of each step that needs it, using a helper in a `functions` + cell — see [Data Passing: Non-serializable objects](../concepts/data-passing.md#non-serializable-objects). +- If the object is a model from an ML library Kale doesn't yet support, + consider contributing a new `MarshalBackend` — see + [Extending the dispatcher](../concepts/data-passing.md#extending-the-dispatcher). + +## Compiler errors from KFP + +``` +Internal compiler error: Compiler has produced Argo-incompatible workflow. +``` + +Kale's compile step hands the generated DSL to the KFP SDK's compiler, +which runs `argo lint` if `argo` is on your `PATH`. An outdated `argo` +binary can produce false positives here. + +**Fix.** Remove `argo` from your `PATH` or upgrade it to the version +recommended by your KFP installation. Kale itself does not require `argo`. + +## Aliasing: a variable's value doesn't seem to propagate + +```python +# step:A +model1 = model2 = SomeModel() + +# step:B (prev: A) +model2.add_layer(SomeLayer()) + +# step:C (prev: B) +print(model1) # still the old model +``` + +**Cause.** Kale saves `model1` and `model2` separately in step A, so +mutations to `model2` in step B don't affect what C loads as `model1`. + +**Fix.** Avoid aliasing across steps. Use one name for each object. See +[Data Passing: Aliasing](../concepts/data-passing.md#aliasing). + +## Global state mutated inside a step is not visible to other steps + +Each step runs in its own container, so library configuration like +`warnings.simplefilter`, `logging.basicConfig`, or +`plt.rcParams[...]` does not persist across steps. + +**Fix.** Put all global configuration in the `imports` or `functions` +cell, so every step applies it at startup. + +## `kale` command not found after installing + +If you installed Kale inside a `uv` or virtualenv, make sure you're running +`kale` from an activated environment, or use `uv run kale --nb ...`. The +`kale` script is registered as a `[project.scripts]` entry in +`pyproject.toml`, so it only shows up inside the environment where the +package was installed. + +## Still stuck? + +- Check the generated `.kale/.kale.py` — it often reveals what Kale + thinks the pipeline looks like. +- Run with `--debug` for verbose output. +- File an issue on [GitHub](https://github.com/kubeflow/kale/issues) with + a minimal reproduction notebook. diff --git a/docs/source/why-kale.md b/docs/source/why-kale.md new file mode 100644 index 000000000..ab88c6daa --- /dev/null +++ b/docs/source/why-kale.md @@ -0,0 +1,25 @@ +# Why Kale + +```{admonition} Content in progress +:class: note + +This page will go into detail about why Kale exists, who it's for, and how it +compares to writing KFP components and pipelines by hand. The content is being +iterated on — expect a fuller write-up shortly. +``` + +In the meantime, here's the short version: + +- **You already have a notebook.** Kale lets you keep it. Instead of rewriting + it as a KFP pipeline, you tag cells with Kale's lightweight annotations and + let Kale compile the pipeline for you. +- **Data passing is automatic.** Kale statically analyses your code to find + which variables are shared between steps, and injects the right + save/load calls using its type-aware marshalling system. +- **It's designed for iteration.** You can go back to a pure notebook workflow + at any time — nothing Kale does is destructive or locks you in. +- **It fits into Kubeflow.** Kale compiles to KFP v2 DSL, so your pipelines + run on the same infrastructure as any other Kubeflow workload. + +For a concrete walk-through, head over to the +[Quickstart](getting-started/quickstart.md). diff --git a/kale/compiler.py b/kale/compiler.py index 8da3e7aa2..4811c7188 100644 --- a/kale/compiler.py +++ b/kale/compiler.py @@ -11,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Compile a Kale :class:`~kale.pipeline.Pipeline` into a Kubeflow Pipelines v2 DSL script. + +This module renders the Jinja2 templates in ``kale/templates/`` to produce a +ready-to-run KFP v2 pipeline script, formats it, and optionally hands it off to +the KFP SDK for compilation and submission. +""" import argparse import logging diff --git a/kale/marshal/backend.py b/kale/marshal/backend.py index bde25e8ba..b0d793ff0 100644 --- a/kale/marshal/backend.py +++ b/kale/marshal/backend.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Marshal dispatcher and backend base class. + +Defines :class:`MarshalBackend`, the abstract base class for type-aware +serializers that Kale uses to pass data between pipeline steps, and +:class:`Dispatcher`, which routes objects to the correct backend at +save/load time. +""" import logging import os diff --git a/kale/marshal/backends.py b/kale/marshal/backends.py index 7cad523d2..781bae986 100644 --- a/kale/marshal/backends.py +++ b/kale/marshal/backends.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Built-in marshal backends. + +Concrete :class:`~kale.marshal.backend.MarshalBackend` implementations for the +Python types Kale supports out of the box: numpy arrays, pandas DataFrames, +scikit-learn estimators, PyTorch / Keras / TensorFlow models, XGBoost +boosters and DMatrices, and plain Python functions. +""" import logging diff --git a/kale/pipeline.py b/kale/pipeline.py index a327b9650..8a5d70d81 100644 --- a/kale/pipeline.py +++ b/kale/pipeline.py @@ -11,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Pipeline data model. + +Defines :class:`Pipeline` — a :class:`networkx.DiGraph` of :class:`~kale.step.Step` +nodes — along with the configuration classes that describe pipeline-level +settings such as the pipeline name, KFP host, volumes, and Katib experiments. +""" from collections.abc import Iterable import copy diff --git a/kale/processors/nbprocessor.py b/kale/processors/nbprocessor.py index e530adba1..132f1a2a0 100644 --- a/kale/processors/nbprocessor.py +++ b/kale/processors/nbprocessor.py @@ -11,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Notebook → Pipeline conversion. + +:class:`NotebookProcessor` reads a Jupyter notebook, parses the Kale-specific +cell tags and notebook metadata, resolves static data dependencies, and +produces a ready-to-compile :class:`~kale.pipeline.Pipeline`. +""" import logging import os diff --git a/kale/step.py b/kale/step.py index 472db4adc..4e387d10c 100644 --- a/kale/step.py +++ b/kale/step.py @@ -11,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Pipeline step data model. + +Defines :class:`Step`, the unit of execution inside a Kale :class:`~kale.pipeline.Pipeline`, +together with :class:`StepConfig` and the :class:`PipelineParam` / :class:`Artifact` +named tuples used to describe step inputs and outputs. +""" from collections.abc import Callable import logging @@ -85,7 +91,7 @@ def add_artifact(self, artifact_name, artifact_type, is_input): Artifact_type will be either 'Dataset', 'Model', 'HTML', 'Metrics', 'ClassificationMetrics' or 'Artifact'. This will simplify tracking what should be an Input[Artifact] - or Output[Artifact]. + or Output[Artifact]. Args: artifact_name (str): Name of the artifact. diff --git a/pyproject.toml b/pyproject.toml index 39b44b3d4..26be76114 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,15 @@ dev = [ "pytest-asyncio", "pytest-jupyter[server]>=0.6.0", ] +docs = [ + "sphinx>=9.0", + "furo>=2025.0", + "sphinx-autodoc-typehints>=2.0", + "sphinx-copybutton>=0.5", + "myst-parser>=3.0", + "sphinx-design>=0.6", + "linkify-it-py>=2.0", +] [tool.hatch.version] path = "kale/__init__.py" diff --git a/uv.lock b/uv.lock index 9554a40a1..7c749794c 100644 --- a/uv.lock +++ b/uv.lock @@ -4,7 +4,29 @@ requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14'", "python_full_version == '3.13.*'", - "python_full_version < '3.13'", + "python_full_version == '3.12.*'", + "python_full_version < '3.12'", +] + +[[package]] +name = "accessible-pygments" +version = "0.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/c1/bbac6a50d02774f91572938964c582fff4270eee73ab822a4aeea4d8b11b/accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872", size = 1377899, upload-time = "2024-05-10T11:23:10.216Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" }, +] + +[[package]] +name = "alabaster" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" }, ] [[package]] @@ -608,6 +630,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, ] +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + [[package]] name = "executing" version = "2.2.1" @@ -644,6 +675,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014", size = 9121, upload-time = "2021-03-11T07:16:28.351Z" }, ] +[[package]] +name = "furo" +version = "2025.12.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "accessible-pygments" }, + { name = "beautifulsoup4" }, + { name = "pygments" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "sphinx-basic-ng" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ec/20/5f5ad4da6a5a27c80f2ed2ee9aee3f9e36c66e56e21c00fde467b2f8f88f/furo-2025.12.19.tar.gz", hash = "sha256:188d1f942037d8b37cd3985b955839fea62baa1730087dc29d157677c857e2a7", size = 1661473, upload-time = "2025-12-19T17:34:40.889Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/b2/50e9b292b5cac13e9e81272c7171301abc753a60460d21505b606e15cf21/furo-2025.12.19-py3-none-any.whl", hash = "sha256:bb0ead5309f9500130665a26bee87693c41ce4dbdff864dbfb6b0dae4673d24f", size = 339262, upload-time = "2025-12-19T17:34:38.905Z" }, +] + [[package]] name = "google-api-core" version = "2.30.0" @@ -813,6 +861,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "imagesize" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/e6/7bf14eeb8f8b7251141944835abd42eb20a658d89084b7e1f3e5fe394090/imagesize-2.0.0.tar.gz", hash = "sha256:8e8358c4a05c304f1fccf7ff96f036e7243a189e9e42e90851993c558cfe9ee3", size = 1773045, upload-time = "2026-03-03T14:18:29.941Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96", size = 9441, upload-time = "2026-03-03T14:18:27.892Z" }, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -1230,6 +1287,17 @@ dev = [ { name = "ruff" }, { name = "testfixtures" }, ] +docs = [ + { name = "furo" }, + { name = "linkify-it-py" }, + { name = "myst-parser" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "sphinx-autodoc-typehints", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx-autodoc-typehints", version = "3.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "sphinx-copybutton" }, + { name = "sphinx-design" }, +] jupyter = [ { name = "jupyter-server" }, ] @@ -1240,6 +1308,7 @@ requires-dist = [ { name = "autopep8", specifier = ">=2.0.0" }, { name = "coverage", marker = "extra == 'dev'" }, { name = "dill", specifier = ">=0.3.8" }, + { name = "furo", marker = "extra == 'docs'", specifier = ">=2025.0" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "ipython", specifier = ">=8.30.0" }, { name = "jinja2", specifier = ">=3.0.0" }, @@ -1248,6 +1317,8 @@ requires-dist = [ { name = "jupyter-server", marker = "extra == 'jupyter'", specifier = ">=2.4.0,<3" }, { name = "kfp", extras = ["kubernetes"], specifier = ">=2.16.0" }, { name = "kubernetes", specifier = ">=30.0.0" }, + { name = "linkify-it-py", marker = "extra == 'docs'", specifier = ">=2.0" }, + { name = "myst-parser", marker = "extra == 'docs'", specifier = ">=3.0" }, { name = "nbconvert", specifier = ">=7.16.0" }, { name = "nbformat", specifier = ">=5.10.4" }, { name = "networkx", specifier = ">=3.0.0" }, @@ -1262,9 +1333,13 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "pytest-jupyter", extras = ["server"], marker = "extra == 'dev'", specifier = ">=0.6.0" }, { name = "ruff", marker = "extra == 'dev'" }, + { name = "sphinx", marker = "extra == 'docs'", specifier = ">=9.0" }, + { name = "sphinx-autodoc-typehints", marker = "extra == 'docs'", specifier = ">=2.0" }, + { name = "sphinx-copybutton", marker = "extra == 'docs'", specifier = ">=0.5" }, + { name = "sphinx-design", marker = "extra == 'docs'", specifier = ">=0.6" }, { name = "testfixtures", marker = "extra == 'dev'" }, ] -provides-extras = ["dev", "jupyter"] +provides-extras = ["dev", "docs", "jupyter"] [[package]] name = "kubernetes" @@ -1296,6 +1371,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" }, ] +[[package]] +name = "linkify-it-py" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "uc-micro-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/c9/06ea13676ef354f0af6169587ae292d3e2406e212876a413bf9eece4eb23/linkify_it_py-2.1.0.tar.gz", hash = "sha256:43360231720999c10e9328dc3691160e27a718e280673d444c38d7d3aaa3b98b", size = 29158, upload-time = "2026-03-01T07:48:47.683Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/de/88b3be5c31b22333b3ca2f6ff1de4e863d8fe45aaea7485f591970ec1d3e/linkify_it_py-2.1.0-py3-none-any.whl", hash = "sha256:0d252c1594ecba2ecedc444053db5d3a9b7ec1b0dd929c8f1d74dce89f86c05e", size = 19878, upload-time = "2026-03-01T07:48:46.098Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1394,6 +1481,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" }, ] +[[package]] +name = "mdit-py-plugins" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -1412,6 +1511,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/f7/4a5e785ec9fbd65146a27b6b70b6cdc161a66f2024e4b04ac06a67f5578b/mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1", size = 53598, upload-time = "2025-12-23T11:36:33.211Z" }, ] +[[package]] +name = "myst-parser" +version = "5.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "jinja2" }, + { name = "markdown-it-py" }, + { name = "mdit-py-plugins" }, + { name = "pyyaml" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/fa/7b45eef11b7971f0beb29d27b7bfe0d747d063aa29e170d9edd004733c8a/myst_parser-5.0.0.tar.gz", hash = "sha256:f6f231452c56e8baa662cc352c548158f6a16fcbd6e3800fc594978002b94f3a", size = 98535, upload-time = "2026-01-15T09:08:18.036Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/ac/686789b9145413f1a61878c407210e41bfdb097976864e0913078b24098c/myst_parser-5.0.0-py3-none-any.whl", hash = "sha256:ab31e516024918296e169139072b81592336f2fef55b8986aa31c9f04b5f7211", size = 84533, upload-time = "2026-01-15T09:08:16.788Z" }, +] + [[package]] name = "nbclient" version = "0.10.4" @@ -2124,6 +2241,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] +[[package]] +name = "roman-numerals" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" }, +] + [[package]] name = "rpds-py" version = "0.30.0" @@ -2296,6 +2422,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "snowballstemmer" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575, upload-time = "2025-05-09T16:34:51.843Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" }, +] + [[package]] name = "soupsieve" version = "2.8.3" @@ -2305,6 +2440,195 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, ] +[[package]] +name = "sphinx" +version = "9.0.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.12'", +] +dependencies = [ + { name = "alabaster", marker = "python_full_version < '3.12'" }, + { name = "babel", marker = "python_full_version < '3.12'" }, + { name = "colorama", marker = "python_full_version < '3.12' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version < '3.12'" }, + { name = "imagesize", marker = "python_full_version < '3.12'" }, + { name = "jinja2", marker = "python_full_version < '3.12'" }, + { name = "packaging", marker = "python_full_version < '3.12'" }, + { name = "pygments", marker = "python_full_version < '3.12'" }, + { name = "requests", marker = "python_full_version < '3.12'" }, + { name = "roman-numerals", marker = "python_full_version < '3.12'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/50/a8c6ccc36d5eacdfd7913ddccd15a9cee03ecafc5ee2bc40e1f168d85022/sphinx-9.0.4.tar.gz", hash = "sha256:594ef59d042972abbc581d8baa577404abe4e6c3b04ef61bd7fc2acbd51f3fa3", size = 8710502, upload-time = "2025-12-04T07:45:27.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/3f/4bbd76424c393caead2e1eb89777f575dee5c8653e2d4b6afd7a564f5974/sphinx-9.0.4-py3-none-any.whl", hash = "sha256:5bebc595a5e943ea248b99c13814c1c5e10b3ece718976824ffa7959ff95fffb", size = 3917713, upload-time = "2025-12-04T07:45:24.944Z" }, +] + +[[package]] +name = "sphinx" +version = "9.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", +] +dependencies = [ + { name = "alabaster", marker = "python_full_version >= '3.12'" }, + { name = "babel", marker = "python_full_version >= '3.12'" }, + { name = "colorama", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version >= '3.12'" }, + { name = "imagesize", marker = "python_full_version >= '3.12'" }, + { name = "jinja2", marker = "python_full_version >= '3.12'" }, + { name = "packaging", marker = "python_full_version >= '3.12'" }, + { name = "pygments", marker = "python_full_version >= '3.12'" }, + { name = "requests", marker = "python_full_version >= '3.12'" }, + { name = "roman-numerals", marker = "python_full_version >= '3.12'" }, + { name = "snowballstemmer", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/bd/f08eb0f4eed5c83f1ba2a3bd18f7745a2b1525fad70660a1c00224ec468a/sphinx-9.1.0.tar.gz", hash = "sha256:7741722357dd75f8190766926071fed3bdc211c74dd2d7d4df5404da95930ddb", size = 8718324, upload-time = "2025-12-31T15:09:27.646Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/f7/b1884cb3188ab181fc81fa00c266699dab600f927a964df02ec3d5d1916a/sphinx-9.1.0-py3-none-any.whl", hash = "sha256:c84fdd4e782504495fe4f2c0b3413d6c2bf388589bb352d439b2a3bb99991978", size = 3921742, upload-time = "2025-12-31T15:09:25.561Z" }, +] + +[[package]] +name = "sphinx-autodoc-typehints" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.12'", +] +dependencies = [ + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/f6/bdd93582b2aaad2cfe9eb5695a44883c8bc44572dd3c351a947acbb13789/sphinx_autodoc_typehints-3.6.1.tar.gz", hash = "sha256:fa0b686ae1b85965116c88260e5e4b82faec3687c2e94d6a10f9b36c3743e2fe", size = 37563, upload-time = "2026-01-02T15:23:46.543Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/6a/c0360b115c81d449b3b73bf74b64ca773464d5c7b1b77bda87c5e874853b/sphinx_autodoc_typehints-3.6.1-py3-none-any.whl", hash = "sha256:dd818ba31d4c97f219a8c0fcacef280424f84a3589cedcb73003ad99c7da41ca", size = 20869, upload-time = "2026-01-02T15:23:45.194Z" }, +] + +[[package]] +name = "sphinx-autodoc-typehints" +version = "3.10.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", +] +dependencies = [ + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/2f/6152d2e409ffaab18b397ac1cde0920dc071f9afb76125a2d496c80b9976/sphinx_autodoc_typehints-3.10.0.tar.gz", hash = "sha256:7b821a123852176b2ed4f2cb9da8db06531a15b8098a4c7350c68febb7669bd0", size = 72801, upload-time = "2026-04-09T18:05:16.389Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/e9/b75897138c5611213472b5726dbb6106ac6192dd31f219cd154bacefa498/sphinx_autodoc_typehints-3.10.0-py3-none-any.whl", hash = "sha256:2176424f9e1ce3054d9016ac16b51d4b9febffd8cad8ece3b7912b2c4646759f", size = 38703, upload-time = "2026-04-09T18:05:14.923Z" }, +] + +[[package]] +name = "sphinx-basic-ng" +version = "1.0.0b2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/0b/a866924ded68efec7a1759587a4e478aec7559d8165fac8b2ad1c0e774d6/sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9", size = 20736, upload-time = "2023-07-08T18:40:54.166Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/dd/018ce05c532a22007ac58d4f45232514cd9d6dd0ee1dc374e309db830983/sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b", size = 22496, upload-time = "2023-07-08T18:40:52.659Z" }, +] + +[[package]] +name = "sphinx-copybutton" +version = "0.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/2b/a964715e7f5295f77509e59309959f4125122d648f86b4fe7d70ca1d882c/sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd", size = 23039, upload-time = "2023-04-14T08:10:22.998Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/48/1ea60e74949eecb12cdd6ac43987f9fd331156388dcc2319b45e2ebb81bf/sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e", size = 13343, upload-time = "2023-04-14T08:10:20.844Z" }, +] + +[[package]] +name = "sphinx-design" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/7b/804f311da4663a4aecc6cf7abd83443f3d4ded970826d0c958edc77d4527/sphinx_design-0.7.0.tar.gz", hash = "sha256:d2a3f5b19c24b916adb52f97c5f00efab4009ca337812001109084a740ec9b7a", size = 2203582, upload-time = "2026-01-19T13:12:53.297Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/cf/45dd359f6ca0c3762ce0490f681da242f0530c49c81050c035c016bfdd3a/sphinx_design-0.7.0-py3-none-any.whl", hash = "sha256:f82bf179951d58f55dca78ab3706aeafa496b741a91b1911d371441127d64282", size = 2220350, upload-time = "2026-01-19T13:12:51.077Z" }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" }, +] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -2461,6 +2785,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] +[[package]] +name = "uc-micro-py" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/78/67/9a363818028526e2d4579334460df777115bdec1bb77c08f9db88f6389f2/uc_micro_py-2.0.0.tar.gz", hash = "sha256:c53691e495c8db60e16ffc4861a35469b0ba0821fe409a8a7a0a71864d33a811", size = 6611, upload-time = "2026-03-01T06:31:27.526Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" }, +] + [[package]] name = "uri-template" version = "1.3.0"