Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/reference/papermill-cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,11 @@ Command Line options
failing execution (default: forever)
--report-mode / --no-report-mode
Flag for hiding input.
--obfuscate-sensitive-parameters / --no-obfuscate-sensitive-parameters
Flag for obfuscating sensitive parameters.
--sensitive-parameter-patterns TEXT...
List of patterns for obfuscating parameter
names in notebooks. If not provided, defaults
to papermill.utils.SENSITIVE_PARAMETER_PATTERNS.
--version Flag for displaying the version.
-h, --help Show this message and exit.
6 changes: 6 additions & 0 deletions docs/usage-cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,11 @@ options:

--report-mode / --no-report-mode
Flag for hiding input.
--obfuscate-sensitive-parameters / --no-obfuscate-sensitive-parameters
Flag for obfuscating sensitive parameters.
--sensitive-parameter-patterns TEXT...
List of patterns for obfuscating parameter
names in notebooks. If not provided, defaults
to papermill.utils.SENSITIVE_PARAMETER_PATTERNS.
--version Flag for displaying the version.
-h, --help Show this message and exit.
21 changes: 21 additions & 0 deletions papermill/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,19 @@ def print_papermill_version(ctx, param, value):
help="Time in seconds to wait for each cell before failing execution (default: forever)",
)
@click.option('--report-mode/--no-report-mode', default=False, help="Flag for hiding input.")
@click.option(
'--obfuscate-sensitive-parameters/--no-obfuscate-sensitive-parameters',
default=True,
help="Flag for obfuscating sensitive parameters.",
)
@click.option(
'--sensitive-parameter-patterns',
multiple=True,
help=(
"List of patterns for obfuscating parameter names in notebooks."
"If not provided, defaults to papermill.utils.SENSITIVE_PARAMETER_PATTERNS."
),
)
@click.option(
'--version',
is_flag=True,
Expand Down Expand Up @@ -163,6 +176,8 @@ def papermill(
start_timeout,
execution_timeout,
report_mode,
obfuscate_sensitive_parameters,
sensitive_parameter_patterns,
stdout_file,
stderr_file,
):
Expand Down Expand Up @@ -228,6 +243,10 @@ def papermill(
for name, value in parameters_raw or []:
parameters_final[name] = value

if sensitive_parameter_patterns is not None and len(sensitive_parameter_patterns) == 0:
# If the user does not provide any patterns, we should use the default patterns
sensitive_parameter_patterns = None

if help_notebook:
sys.exit(display_notebook_help(click_ctx, notebook_path, parameters_final))

Expand All @@ -248,6 +267,8 @@ def papermill(
stderr_file=stderr_file,
start_timeout=start_timeout,
report_mode=report_mode,
obfuscate_sensitive_parameters=obfuscate_sensitive_parameters,
sensitive_parameter_patterns=sensitive_parameter_patterns,
cwd=cwd,
execution_timeout=execution_timeout,
)
Expand Down
8 changes: 8 additions & 0 deletions papermill/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,14 @@ def execute_notebook(
nb_man.cleanup_pbar()
nb_man.notebook_complete()

# Replace the source with the obfuscated content if it is in the metadata.
for cell in nb_man.nb.cells:
if cell.get('cell_type') != 'code':
continue
if 'papermill-obfuscated-source' not in cell.metadata:
continue
cell.source = cell.metadata['papermill-obfuscated-source']
del cell.metadata['papermill-obfuscated-source']
return nb_man.nb

@classmethod
Expand Down
9 changes: 9 additions & 0 deletions papermill/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def execute_notebook(
start_timeout=60,
report_mode=False,
cwd=None,
obfuscate_sensitive_parameters=True,
sensitive_parameter_patterns=None,
**engine_kwargs,
):
"""Executes a single notebook locally.
Expand Down Expand Up @@ -61,6 +63,11 @@ def execute_notebook(
Flag for whether or not to hide input.
cwd : str or Path, optional
Working directory to use when executing the notebook
obfuscate_sensitive_parameters : bool, optional
Obfuscate sensitive parameters in the notebook, Defaults to True
sensitive_parameter_patterns : list, optional
List of parameter patterns to obfuscate in the notebook.
Defaults to `utils.SENSITIVE_PARAMETER_PATTERNS`
**kwargs
Arbitrary keyword arguments to pass to the notebook engine

Expand Down Expand Up @@ -102,6 +109,8 @@ def execute_notebook(
kernel_name=kernel_name,
language=language,
engine_name=engine_name,
obfuscate_sensitive_parameters=obfuscate_sensitive_parameters,
sensitive_parameter_patterns=sensitive_parameter_patterns,
)

nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode)
Expand Down
24 changes: 22 additions & 2 deletions papermill/parameterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .iorw import read_yaml_file
from .log import logger
from .translators import translate_parameters
from .utils import find_first_tagged_cell_index
from .utils import find_first_tagged_cell_index, obfuscate_parameters


def add_builtin_parameters(parameters):
Expand Down Expand Up @@ -64,6 +64,8 @@ def parameterize_notebook(
kernel_name=None,
language=None,
engine_name=None,
obfuscate_sensitive_parameters=True,
sensitive_parameter_patterns=None,
):
"""Assigned parameters into the appropriate place in the input notebook

Expand All @@ -77,6 +79,11 @@ def parameterize_notebook(
Flag to set report mode
comment : str, optional
Comment added to the injected cell
obfuscate_sensitive_parameters : bool, optional
Obfuscate sensitive parameters in the notebook, Defaults to True
sensitive_parameter_patterns : list, optional
List of parameter patterns to obfuscate in the notebook.
Defaults to `utils.SENSITIVE_PARAMETER_PATTERNS`
"""
# Load from a file if 'parameters' is a string.
if isinstance(parameters, str):
Expand All @@ -95,6 +102,15 @@ def parameterize_notebook(
newcell = nbformat.v4.new_code_cell(source=param_content)
newcell.metadata['tags'] = ['injected-parameters']

if obfuscate_sensitive_parameters:
obfuscated_param_content = translate_parameters(
kernel_name,
language,
obfuscate_parameters(parameters, sensitive_parameter_patterns),
comment,
)
newcell.metadata['papermill-obfuscated-source'] = obfuscated_param_content

if report_mode:
newcell.metadata['jupyter'] = newcell.get('jupyter', {})
newcell.metadata['jupyter']['source_hidden'] = True
Expand All @@ -116,6 +132,10 @@ def parameterize_notebook(
after = nb.cells

nb.cells = before + [newcell] + after
nb.metadata.papermill['parameters'] = parameters
nb.metadata.papermill['parameters'] = (
parameters
if not obfuscate_sensitive_parameters
else obfuscate_parameters(parameters, sensitive_parameter_patterns)
)

return nb
162 changes: 162 additions & 0 deletions papermill/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ class TestCLI(unittest.TestCase):
execution_timeout=None,
report_mode=False,
cwd=None,
obfuscate_sensitive_parameters=True,
sensitive_parameter_patterns=None,
stdout_file=None,
stderr_file=None,
)
Expand Down Expand Up @@ -536,3 +538,163 @@ def test_stdout_file(tmpdir):

with open(str(stdout_file)) as fp:
assert fp.read() == f"{secret}\n"


@require_papermill_installed
def test_obfuscated_output():
metadata = {'kernelspec': {'name': 'python3', 'language': 'python', 'display_name': 'python3'}}
secret = str(uuid.uuid4())
notebook = nbformat.v4.new_notebook(
metadata=metadata,
cells=[
nbformat.v4.new_code_cell('print(safe_text, "=", token)'),
],
)
process = papermill_cli(
[
'-',
'-',
'-p',
'token',
secret,
'-p',
'safe_text',
'Test',
],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
)
text = nbformat.writes(notebook)
out, err = process.communicate(input=text.encode('utf-8'))

# Test no message on std error
assert not err

# Test that secrets in the output are obfuscated
output_notebook = nbformat.reads(out.decode('utf-8'), as_version=4)

# secret in the notebook metadata should be obfuscated
assert output_notebook.metadata['papermill']['parameters'] == {
'safe_text': 'Test',
'token': '********',
}

assert len(output_notebook.cells) == 2

# secret in the injected parameters cell should be obfuscated
assert output_notebook.cells[0].cell_type == 'code'
assert output_notebook.cells[0].source == '# Parameters\ntoken = "********"\nsafe_text = "Test"\n'

# secret in the output cell will be printed as is
assert output_notebook.cells[1].cell_type == 'code'
assert len(output_notebook.cells[1].outputs) == 1
assert output_notebook.cells[1].outputs[0].output_type == 'stream'
assert output_notebook.cells[1].outputs[0].text == f'Test = {secret}\n'


@require_papermill_installed
def test_disable_output_obfuscation():
metadata = {'kernelspec': {'name': 'python3', 'language': 'python', 'display_name': 'python3'}}
secret = str(uuid.uuid4())
notebook = nbformat.v4.new_notebook(
metadata=metadata,
cells=[
nbformat.v4.new_code_cell('print(safe_text, "=", token)'),
],
)
process = papermill_cli(
[
'-',
'-',
'-p',
'token',
secret,
'-p',
'safe_text',
'Test',
'--no-obfuscate-sensitive-parameters',
],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
)
text = nbformat.writes(notebook)
out, err = process.communicate(input=text.encode('utf-8'))

# Test no message on std error
assert not err

# Test that secrets in the output are not obfuscated
output_notebook = nbformat.reads(out.decode('utf-8'), as_version=4)

# secret in the notebook metadata should not be obfuscated
assert output_notebook.metadata['papermill']['parameters'] == {
'safe_text': 'Test',
'token': secret,
}

assert len(output_notebook.cells) == 2

# secret in the injected parameters cell should be obfuscated
assert output_notebook.cells[0].cell_type == 'code'
assert output_notebook.cells[0].source == f'# Parameters\ntoken = "{secret}"\nsafe_text = "Test"\n'

# secret in the output cell will be printed as is
assert output_notebook.cells[1].cell_type == 'code'
assert len(output_notebook.cells[1].outputs) == 1
assert output_notebook.cells[1].outputs[0].output_type == 'stream'
assert output_notebook.cells[1].outputs[0].text == f'Test = {secret}\n'


@require_papermill_installed
def test_custom_output_obfuscation():
metadata = {'kernelspec': {'name': 'python3', 'language': 'python', 'display_name': 'python3'}}
secret = str(uuid.uuid4())
notebook = nbformat.v4.new_notebook(
metadata=metadata,
cells=[
nbformat.v4.new_code_cell('print(safe_text, "=", token)'),
],
)
process = papermill_cli(
[
'-',
'-',
'-p',
'token',
secret,
'-p',
'safe_text',
'Test',
'--sensitive-parameter-patterns',
'safe_text',
],
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
)
text = nbformat.writes(notebook)
out, err = process.communicate(input=text.encode('utf-8'))

# Test no message on std error
assert not err

# Test that secrets in the output are obfuscated
output_notebook = nbformat.reads(out.decode('utf-8'), as_version=4)

# secret in the notebook metadata should be obfuscated
# --sensitive-parameter-patterns should be set to obfuscate 'safe_text' and not 'token'
assert output_notebook.metadata['papermill']['parameters'] == {
'safe_text': '********',
'token': secret,
}

assert len(output_notebook.cells) == 2

# secret in the injected parameters cell should be obfuscated
assert output_notebook.cells[0].cell_type == 'code'
assert output_notebook.cells[0].source == f'# Parameters\ntoken = "{secret}"\nsafe_text = "********"\n'

# secret in the output cell will be printed as is
assert output_notebook.cells[1].cell_type == 'code'
assert len(output_notebook.cells[1].outputs) == 1
assert output_notebook.cells[1].outputs[0].output_type == 'stream'
assert output_notebook.cells[1].outputs[0].text == f'Test = {secret}\n'
28 changes: 28 additions & 0 deletions papermill/tests/test_parameterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,34 @@ def test_custom_comment(self):
first_line = cell_one['source'].split('\n')[0]
self.assertEqual(first_line, '# This is a custom comment')

def test_sensitive_parameters_obfuscation(self):
test_nb = load_notebook_node(get_notebook_path("simple_execute.ipynb"))
test_nb = parameterize_notebook(
test_nb, {'msg': 'Hello', 'password': 'secret'}, obfuscate_sensitive_parameters=True
)

cell_one = test_nb.cells[1]
self.assertIn('password', cell_one['source'])
self.assertIn('secret', cell_one['source'])

self.assertIn('papermill-obfuscated-source', cell_one['metadata'])
obfuscated_output = cell_one['metadata']['papermill-obfuscated-source']
self.assertIn('password', obfuscated_output)
self.assertNotIn('secret', obfuscated_output)
self.assertIn('********', obfuscated_output)

def test_sensitive_parameters_obfuscation_disabled(self):
test_nb = load_notebook_node(get_notebook_path("simple_execute.ipynb"))
test_nb = parameterize_notebook(
test_nb, {'msg': 'Hello', 'password': 'secret'}, obfuscate_sensitive_parameters=False
)

cell_one = test_nb.cells[1]
self.assertIn('password', cell_one['source'])
self.assertIn('secret', cell_one['source'])

self.assertNotIn('papermill-obfuscated-source', cell_one['metadata'])


class TestBuiltinParameters(unittest.TestCase):
def test_add_builtin_parameters_keeps_provided_parameters(self):
Expand Down
Loading