Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Execute preprocess cells #1380

Merged
merged 9 commits into from
Sep 13, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 80 additions & 7 deletions nbconvert/preprocessors/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from typing import Optional
from nbformat import NotebookNode
from nbclient import NotebookClient, execute as _execute
from nbclient.util import run_sync
# Backwards compatability for imported name
from nbclient.exceptions import CellExecutionError

Expand Down Expand Up @@ -31,6 +34,10 @@ def __init__(self, **kw):
Preprocessor.__init__(self, nb=nb, **kw)
NotebookClient.__init__(self, nb, **kw)

def _check_assign_resources(self, resources):
if resources or not hasattr(self, 'resources'):
self.resources = resources

def preprocess(self, nb, resources=None, km=None):
"""
Preprocess notebook executing each code cell.
Expand All @@ -56,11 +63,77 @@ def preprocess(self, nb, resources=None, km=None):
resources : dictionary
Additional resources used in the conversion process.
"""
# Copied from NotebookClient init :/
self.nb = nb
self.km = km
if resources:
self.resources = resources
self.reset_execution_trackers()
# This may look wrong, but preprocess acts like an init on execution state and
MSeal marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move all of this into docstring with an important directive so that this populates through to the docs.

# we need to capture it here again to properly reset (traitlet assignments are
MSeal marked this conversation as resolved.
Show resolved Hide resolved
# not passed). The risk is if traitlets apply any side effects for dual init,
MSeal marked this conversation as resolved.
Show resolved Hide resolved
# but it should be ok. The alternative is to copy the client's init internals
MSeal marked this conversation as resolved.
Show resolved Hide resolved
# which has already gotten out of sync with nbclient 0.5 release.
NotebookClient.__init__(self, nb, km)
self._check_assign_resources(resources)
self.execute()
return nb, resources
return self.nb, self.resources

async def async_execute_cell(
self,
cell: NotebookNode,
cell_index: int,
execution_count: Optional[int] = None,
store_history: bool = False) -> NotebookNode:
"""
Executes a single code cell.

To execute all cells see :meth:`execute`.

Parameters
----------
cell : nbformat.NotebookNode
The cell which is currently being processed.
cell_index : int
The position of the cell within the notebook object.
execution_count : int
The execution count to be assigned to the cell (default: Use kernel response)
store_history : bool
Determines if history should be stored in the kernel (default: False).
Specific to ipython kernels, which can store command histories.

Returns
-------
output : dict
The execution output payload (or None for no output).

Raises
------
CellExecutionError
If execution failed and should raise an exception, this will be raised
with defaults about the failure.

Returns
-------
cell : NotebookNode
The cell which was just processed.
"""
# Copied and intercepted to allow for custom preprocess_cell contracts to be fullfilled
self.store_history = store_history
cell, resources = self.preprocess_cell(cell, self.resources, cell_index)
if execution_count:
cell['execution_count'] = execution_count
return cell, resources

def preprocess_cell(self, cell, resources, index, **kwargs):
"""
Override if you want to apply some preprocessing to each cell.
Must return modified cell and resource dictionary.

Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""
self._check_assign_resources(resources)
cell = run_sync(NotebookClient.async_execute_cell)(self, cell, index, store_history=self.store_history)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's comment this line to unpack what is happening here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do subclasses need to call super().preprocess_cell() now? (IIRC they didn't need to before; it's fine either way, although if this has changed it would be great to document the breaking change in the release notes.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MSeal ^^ I'm happy to document the response.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably best to always call super for this preprocessor now.

Technically you don't have to so long as you call something equivilent to or exactly NotebookClient.async_execute_cell, but that's fairly complex to explain or expect given the subtlety of the multi-inheritance and overrides at play.

@willingc Documenting as such would be a good idea, happy to merge if you want to make the PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MSeal You could have called NotebookClient.execute_cell here, right?
Shouldn't this cell execution be in async_execute_cell?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, possibly. I was worried any refactors on the wrapping pattern for NotebookClient.execute_cell might make it lazy, causing in infinite loop here in the future since this overwrites async_execute_cell in this child class to allow for intercepting and running preprocess_cell... basically it's a hack to allow us to intercept the the execute_cell call from nbclient and run whatever the nbconvert inheritance tree demands.

return cell, self.resources
30 changes: 27 additions & 3 deletions nbconvert/preprocessors/tests/test_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pytest
import nbformat

from copy import deepcopy

from ..execute import ExecutePreprocessor, executenb


Expand Down Expand Up @@ -58,19 +60,41 @@ def test_basic_execution():
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
output_nb, _ = preprocessor.preprocess(input_nb)
output_nb, _ = preprocessor.preprocess(deepcopy(input_nb))
assert_notebooks_equal(input_nb, output_nb)


def test_executenb():
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
with pytest.warns(FutureWarning):
output_nb = executenb(input_nb)
output_nb = executenb(deepcopy(input_nb))
assert_notebooks_equal(input_nb, output_nb)


def test_populate_language_info():
preprocessor = ExecutePreprocessor(kernel_name="python")
nb = nbformat.v4.new_notebook() # Certainly has no language_info.
nb, _ = preprocessor.preprocess(nb, resources={})
preprocessor.preprocess(nb, resources={})
# Should mutate input
assert 'language_info' in nb.metadata # See that a basic attribute is filled in


def test_preprocess_cell():
class CellReplacer(ExecutePreprocessor):
def preprocess_cell(self, cell, resources, index, **kwargs):
cell.source = "print('Ignored')"
super().preprocess_cell(cell, resources, index, **kwargs)

preprocessor = CellReplacer()
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
output_nb, _ = preprocessor.preprocess(deepcopy(input_nb))
expected_nb = deepcopy(input_nb)
for cell in expected_nb.cells:
cell.source = "print('Ignored')"
for output in cell.outputs:
output.text = 'Ignored\n'
assert_notebooks_equal(expected_nb, output_nb)