Skip to content

Commit adae269

Browse files
Kaiserclaude
andcommitted
Add notebook profiler with live Rich tree display
Adds per-cell timing, output analysis, and section-grouped profiling to papermill. Sections are auto-numbered from markdown headings so the display shows "Section 1 / Sub-section 1.2" rather than raw notebook source text. New modules: - papermill/profile.py — build_sections(), build_profile(), profile_notebook() - papermill/live_tree.py — LiveTreeDisplay (replaces tqdm with a Rich live tree) New entry point: - papermill-profile <executed.ipynb> — profile any executed notebook Changes to existing modules: - execute.py — live_tree=False parameter wired to execute_notebook() - engines.py — NotebookExecutionManager accepts live_display=, calls its on_cell_start/complete/exception hooks instead of tqdm - cli.py — --live-tree / --no-live-tree flag; 'papermill profile' command - __init__.py — export profile_notebook, build_profile, build_sections - pyproject.toml — optional-dependencies.rich = ["rich>=13.0"] Tests: papermill/tests/test_profile.py (20 tests, all passing) Usage: papermill notebook.ipynb out.ipynb --live-tree papermill-profile executed.ipynb python -c "from papermill import profile_notebook; print(profile_notebook('out.ipynb'))" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e4e4ddd commit adae269

8 files changed

Lines changed: 979 additions & 19 deletions

File tree

papermill/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .exceptions import PapermillException, PapermillExecutionError # noqa: F401
22
from .execute import execute_notebook # noqa: F401
33
from .inspection import inspect_notebook # noqa: F401
4+
from .profile import profile_notebook, build_profile, build_sections # noqa: F401
45
from .version import version as __version__ # noqa: F401

papermill/cli.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .execute import execute_notebook
1616
from .inspection import display_notebook_help
1717
from .iorw import NoDatesSafeLoader, read_yaml_file
18+
from .profile import profile_notebook
1819
from .version import version as papermill_version
1920

2021
click.disable_unicode_literals_warning = True
@@ -95,6 +96,14 @@ def print_papermill_version(ctx, param, value):
9596
)
9697
@click.option('--cwd', default=None, help='Working directory to run notebook in.')
9798
@click.option('--progress-bar/--no-progress-bar', default=None, help="Flag for turning on the progress bar.")
99+
@click.option(
100+
'--live-tree/--no-live-tree',
101+
default=False,
102+
help=(
103+
"Show a live Rich tree of notebook sections and per-cell timing during execution, "
104+
"replacing the tqdm progress bar. Requires: pip install 'papermill[rich]'."
105+
),
106+
)
98107
@click.option(
99108
'--log-output/--no-log-output',
100109
default=False,
@@ -158,6 +167,7 @@ def papermill(
158167
language,
159168
cwd,
160169
progress_bar,
170+
live_tree,
161171
log_output,
162172
log_level,
163173
start_timeout,
@@ -250,13 +260,63 @@ def papermill(
250260
report_mode=report_mode,
251261
cwd=cwd,
252262
execution_timeout=execution_timeout,
263+
live_tree=live_tree,
253264
)
254265
except nbclient.exceptions.DeadKernelError:
255266
# Exiting with a special exit code for dead kernels
256267
traceback.print_exc()
257268
sys.exit(138)
258269

259270

271+
@click.command('profile', context_settings=dict(help_option_names=['-h', '--help']))
272+
@click.argument('notebook_path')
273+
@click.option(
274+
'--output', '-o', default=None,
275+
help='Path to write profile JSON (default: <notebook>.profile.json).',
276+
)
277+
def papermill_profile(notebook_path, output):
278+
"""Profile an already-executed notebook and print a timing summary.
279+
280+
NOTEBOOK_PATH must be an executed .ipynb file that contains papermill
281+
timing metadata (i.e. it was run via ``papermill`` or
282+
``execute_notebook``).
283+
284+
Writes a JSON report with per-section and per-cell durations, output
285+
types, bottleneck identification, and the five slowest cells.
286+
"""
287+
import json
288+
from pathlib import Path
289+
290+
out_path = output or str(Path(notebook_path).with_suffix('.profile.json'))
291+
profile = profile_notebook(notebook_path, output=out_path)
292+
293+
click.echo(f"\nNotebook : {profile['notebook']}")
294+
click.echo(f"Total : {profile.get('total_duration_s', '—')}s")
295+
click.echo(f"Cells : {profile['n_code_cells']} code | Errors: {profile['n_errors']}")
296+
297+
if profile.get('bottleneck'):
298+
b = profile['bottleneck']
299+
click.echo(
300+
f"Bottleneck: [{b['cell_index']}] in «{b['section']}» "
301+
f"— {b['duration_s']}s ({b['pct_of_total']}%)"
302+
)
303+
304+
click.echo("\nSections:")
305+
for s in profile['sections']:
306+
indent = " " * s['level']
307+
click.echo(f" {indent}{s['label']:<40} {s['duration_s']:.3f}s")
308+
309+
if profile.get('slowest_cells'):
310+
click.echo("\nSlowest cells:")
311+
for c in profile['slowest_cells']:
312+
click.echo(
313+
f" [{c['index']}] {c['source_preview'][:50]:<52} "
314+
f"{c['duration_s']}s {','.join(c['output_types']) or '—'}"
315+
)
316+
317+
click.echo(f"\nProfile written to: {out_path}")
318+
319+
260320
def _resolve_type(value):
261321
if value == "True":
262322
return True

papermill/engines.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ class NotebookExecutionManager:
9696
COMPLETED = "completed"
9797
FAILED = "failed"
9898

99-
def __init__(self, nb, output_path=None, log_output=False, progress_bar=True, autosave_cell_every=30):
99+
def __init__(self, nb, output_path=None, log_output=False, progress_bar=True, autosave_cell_every=30,
100+
live_display=None):
100101
self.nb = nb
101102
self.output_path = output_path
102103
self.log_output = log_output
@@ -105,6 +106,7 @@ def __init__(self, nb, output_path=None, log_output=False, progress_bar=True, au
105106
self.autosave_cell_every = autosave_cell_every
106107
self.max_autosave_pct = 25
107108
self.last_save_time = self.now() # Not exactly true, but simplifies testing logic
109+
self.live_display = live_display # optional LiveTreeDisplay — replaces tqdm when set
108110
self.pbar = None
109111
if progress_bar:
110112
# lazy import due to implicit slow ipython import
@@ -227,10 +229,14 @@ def cell_start(self, cell, cell_index=None, **kwargs):
227229
cell.metadata.papermill["status"] = self.RUNNING
228230
cell.metadata.papermill['exception'] = False
229231

230-
# injects optional description of the current cell directly in the tqdm
231-
cell_description = self.get_cell_description(cell)
232-
if cell_description is not None and hasattr(self, 'pbar') and self.pbar:
233-
self.pbar.set_description(f"Executing {cell_description}")
232+
if self.live_display is not None:
233+
if cell_index is not None:
234+
self.live_display.on_cell_start(cell_index)
235+
else:
236+
# injects optional description of the current cell directly in the tqdm
237+
cell_description = self.get_cell_description(cell)
238+
if cell_description is not None and hasattr(self, 'pbar') and self.pbar:
239+
self.pbar.set_description(f"Executing {cell_description}")
234240

235241
self.save()
236242

@@ -246,6 +252,8 @@ def cell_exception(self, cell, cell_index=None, **kwargs):
246252
cell.metadata.papermill['exception'] = True
247253
cell.metadata.papermill['status'] = self.FAILED
248254
self.nb.metadata.papermill['exception'] = True
255+
if self.live_display is not None and cell_index is not None:
256+
self.live_display.on_cell_exception(cell_index)
249257

250258
@catch_nb_assignment
251259
def cell_complete(self, cell, cell_index=None, **kwargs):
@@ -272,7 +280,10 @@ def cell_complete(self, cell, cell_index=None, **kwargs):
272280
cell.metadata.papermill['status'] = self.COMPLETED
273281

274282
self.save()
275-
if self.pbar:
283+
if self.live_display is not None:
284+
if cell_index is not None:
285+
self.live_display.on_cell_complete(self.nb.cells[cell_index], cell_index)
286+
elif self.pbar:
276287
self.pbar.update(1)
277288

278289
@catch_nb_assignment
@@ -348,6 +359,7 @@ def execute_notebook(
348359
progress_bar=True,
349360
log_output=False,
350361
autosave_cell_every=30,
362+
live_display=None,
351363
**kwargs,
352364
):
353365
"""
@@ -364,6 +376,7 @@ def execute_notebook(
364376
progress_bar=progress_bar,
365377
log_output=log_output,
366378
autosave_cell_every=autosave_cell_every,
379+
live_display=live_display,
367380
)
368381

369382
nb_man.notebook_start()

papermill/execute.py

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def execute_notebook(
2727
start_timeout=60,
2828
report_mode=False,
2929
cwd=None,
30+
live_tree=False,
3031
**engine_kwargs,
3132
):
3233
"""Executes a single notebook locally.
@@ -61,6 +62,9 @@ def execute_notebook(
6162
Flag for whether or not to hide input.
6263
cwd : str or Path, optional
6364
Working directory to use when executing the notebook
65+
live_tree : bool, optional
66+
Show a Rich live tree of sections and per-cell timing instead of the
67+
default tqdm progress bar. Requires ``pip install 'papermill[rich]'``.
6468
**kwargs
6569
Arbitrary keyword arguments to pass to the notebook engine
6670
@@ -111,21 +115,44 @@ def execute_notebook(
111115
if not prepare_only:
112116
# Dropdown to the engine to fetch the kernel name from the notebook document
113117
kernel_name = papermill_engines.nb_kernel_name(engine_name=engine_name, nb=nb, name=kernel_name)
118+
119+
# Resolve live_tree: if requested, disable tqdm and attach the Rich display
120+
_live_display = None
121+
if live_tree:
122+
from .live_tree import LiveTreeDisplay, is_available as _rich_ok
123+
if _rich_ok():
124+
import os
125+
nb_name = os.path.basename(input_path) if isinstance(input_path, str) else "notebook.ipynb"
126+
_live_display = LiveTreeDisplay(nb, nb_name)
127+
progress_bar = False # Rich tree replaces tqdm
128+
else:
129+
logger.warning(
130+
"live_tree=True requested but 'rich' is not installed. "
131+
"Falling back to tqdm. Install with: pip install 'papermill[rich]'"
132+
)
133+
114134
# Execute the Notebook in `cwd` if it is set
115135
with chdir(cwd):
116-
nb = papermill_engines.execute_notebook_with_engine(
117-
engine_name,
118-
nb,
119-
input_path=input_path,
120-
output_path=output_path if request_save_on_cell_execute else None,
121-
kernel_name=kernel_name,
122-
progress_bar=progress_bar,
123-
log_output=log_output,
124-
start_timeout=start_timeout,
125-
stdout_file=stdout_file,
126-
stderr_file=stderr_file,
127-
**engine_kwargs,
128-
)
136+
if _live_display is not None:
137+
_live_display.start()
138+
try:
139+
nb = papermill_engines.execute_notebook_with_engine(
140+
engine_name,
141+
nb,
142+
input_path=input_path,
143+
output_path=output_path if request_save_on_cell_execute else None,
144+
kernel_name=kernel_name,
145+
progress_bar=progress_bar,
146+
log_output=log_output,
147+
start_timeout=start_timeout,
148+
stdout_file=stdout_file,
149+
stderr_file=stderr_file,
150+
live_display=_live_display,
151+
**engine_kwargs,
152+
)
153+
finally:
154+
if _live_display is not None:
155+
_live_display.stop()
129156

130157
# Check for errors first (it saves on error before raising)
131158
raise_for_execution_errors(nb, output_path)

0 commit comments

Comments
 (0)