Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Execute preprocess cells #1380

Merged
merged 9 commits into from
Sep 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 87 additions & 7 deletions nbconvert/preprocessors/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from typing import Optional
from nbformat import NotebookNode
from nbclient import NotebookClient, execute as _execute
from nbclient.util import run_sync
# Backwards compatability for imported name
from nbclient.exceptions import CellExecutionError

Expand Down Expand Up @@ -31,12 +34,27 @@ def __init__(self, **kw):
Preprocessor.__init__(self, nb=nb, **kw)
NotebookClient.__init__(self, nb, **kw)

def _check_assign_resources(self, resources):
if resources or not hasattr(self, 'resources'):
self.resources = resources

def preprocess(self, nb, resources=None, km=None):
"""
Preprocess notebook executing each code cell.

The input argument *nb* is modified in-place.

Note that this function recalls NotebookClient.__init__, which may look wrong.
However since the preprocess call acts line an init on exeuction state it's expected.
Therefore, we need to capture it here again to properly reset because traitlet
assignments are not passed. There is a risk if traitlets apply any side effects for
dual init.
The risk should be manageable, and this approach minimizes side-effects relative
to other alternatives.

One alternative but rejected implementation would be to copy the client's init internals
which has already gotten out of sync with nbclient 0.5 release before nbcovnert 6.0 released.

Parameters
----------
nb : NotebookNode
Expand All @@ -56,11 +74,73 @@ def preprocess(self, nb, resources=None, km=None):
resources : dictionary
Additional resources used in the conversion process.
"""
# Copied from NotebookClient init :/
self.nb = nb
self.km = km
if resources:
self.resources = resources
self.reset_execution_trackers()
NotebookClient.__init__(self, nb, km)
self._check_assign_resources(resources)
self.execute()
return nb, resources
return self.nb, self.resources

async def async_execute_cell(
self,
cell: NotebookNode,
cell_index: int,
execution_count: Optional[int] = None,
store_history: bool = False) -> NotebookNode:
"""
Executes a single code cell.

Overwrites NotebookClient's version of this method to allow for preprocess_cell calls.

Parameters
----------
cell : nbformat.NotebookNode
The cell which is currently being processed.
cell_index : int
The position of the cell within the notebook object.
execution_count : int
The execution count to be assigned to the cell (default: Use kernel response)
store_history : bool
Determines if history should be stored in the kernel (default: False).
Specific to ipython kernels, which can store command histories.

Returns
-------
output : dict
The execution output payload (or None for no output).

Raises
------
CellExecutionError
If execution failed and should raise an exception, this will be raised
with defaults about the failure.

Returns
-------
cell : NotebookNode
The cell which was just processed.
"""
# Copied and intercepted to allow for custom preprocess_cell contracts to be fullfilled
self.store_history = store_history
cell, resources = self.preprocess_cell(cell, self.resources, cell_index)
if execution_count:
cell['execution_count'] = execution_count
return cell, resources

def preprocess_cell(self, cell, resources, index, **kwargs):
"""
Override if you want to apply some preprocessing to each cell.
Must return modified cell and resource dictionary.

Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
index : int
Index of the cell being processed
"""
self._check_assign_resources(resources)
# Because nbclient is an async library, we need to wrap the parent async call to generate a syncronous version.
cell = run_sync(NotebookClient.async_execute_cell)(self, cell, index, store_history=self.store_history)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's comment this line to unpack what is happening here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do subclasses need to call super().preprocess_cell() now? (IIRC they didn't need to before; it's fine either way, although if this has changed it would be great to document the breaking change in the release notes.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MSeal ^^ I'm happy to document the response.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably best to always call super for this preprocessor now.

Technically you don't have to so long as you call something equivilent to or exactly NotebookClient.async_execute_cell, but that's fairly complex to explain or expect given the subtlety of the multi-inheritance and overrides at play.

@willingc Documenting as such would be a good idea, happy to merge if you want to make the PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MSeal You could have called NotebookClient.execute_cell here, right?
Shouldn't this cell execution be in async_execute_cell?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, possibly. I was worried any refactors on the wrapping pattern for NotebookClient.execute_cell might make it lazy, causing in infinite loop here in the future since this overwrites async_execute_cell in this child class to allow for intercepting and running preprocess_cell... basically it's a hack to allow us to intercept the the execute_cell call from nbclient and run whatever the nbconvert inheritance tree demands.

return cell, self.resources
30 changes: 27 additions & 3 deletions nbconvert/preprocessors/tests/test_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pytest
import nbformat

from copy import deepcopy

from ..execute import ExecutePreprocessor, executenb


Expand Down Expand Up @@ -58,19 +60,41 @@ def test_basic_execution():
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
output_nb, _ = preprocessor.preprocess(input_nb)
output_nb, _ = preprocessor.preprocess(deepcopy(input_nb))
assert_notebooks_equal(input_nb, output_nb)


def test_executenb():
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
with pytest.warns(FutureWarning):
output_nb = executenb(input_nb)
output_nb = executenb(deepcopy(input_nb))
assert_notebooks_equal(input_nb, output_nb)


def test_populate_language_info():
preprocessor = ExecutePreprocessor(kernel_name="python")
nb = nbformat.v4.new_notebook() # Certainly has no language_info.
nb, _ = preprocessor.preprocess(nb, resources={})
preprocessor.preprocess(nb, resources={})
# Should mutate input
assert 'language_info' in nb.metadata # See that a basic attribute is filled in


def test_preprocess_cell():
class CellReplacer(ExecutePreprocessor):
def preprocess_cell(self, cell, resources, index, **kwargs):
cell.source = "print('Ignored')"
return super().preprocess_cell(cell, resources, index, **kwargs)

preprocessor = CellReplacer()
fname = os.path.join(os.path.dirname(__file__), 'files', 'HelloWorld.ipynb')
with open(fname) as f:
input_nb = nbformat.read(f, 4)
output_nb, _ = preprocessor.preprocess(deepcopy(input_nb))
expected_nb = deepcopy(input_nb)
for cell in expected_nb.cells:
cell.source = "print('Ignored')"
for output in cell.outputs:
output.text = 'Ignored\n'
assert_notebooks_equal(expected_nb, output_nb)