Skip to content

Commit e555d27

Browse files
authored
robust notebook check script (#19)
1 parent d21b4e6 commit e555d27

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

scripts/check-and-format-notebooks.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,48 @@
1717
def process_nb(nb: nbformat.NotebookNode, working_dir: Union[str, Path]) -> nbformat.NotebookNode:
1818
"""Execute cells in nb using working_dir as the working directory for imports, modifying the
1919
notebook in place (in memory)."""
20+
# Clear existing outputs before executing the notebook
21+
for cell in nb.cells:
22+
if cell.cell_type == "code":
23+
cell.outputs = []
2024
ep = ExecutePreprocessor(timeout=600)
2125
ep.preprocess(nb, {"metadata": {"path": working_dir}})
26+
# Merge adjacent text outputs after executing the notebook
27+
for cell in nb.cells:
28+
merge_adjacent_text_outputs(cell)
2229
return nb
2330

2431

32+
def merge_adjacent_text_outputs(cell: nbformat.NotebookNode) -> nbformat.NotebookNode:
33+
"""Merges adjacent text stream outputs to avoid non-deterministic splits in output."""
34+
if cell.cell_type != "code":
35+
return cell
36+
37+
new_outputs = []
38+
current_output = None
39+
40+
for output in cell.outputs:
41+
if output.output_type == "stream":
42+
if current_output is None:
43+
current_output = output
44+
elif current_output.name == output.name:
45+
current_output.text += output.text
46+
else:
47+
new_outputs.append(current_output)
48+
current_output = output
49+
else:
50+
if current_output is not None:
51+
new_outputs.append(current_output)
52+
current_output = None
53+
new_outputs.append(output)
54+
55+
if current_output is not None:
56+
new_outputs.append(current_output)
57+
58+
cell.outputs = new_outputs
59+
return cell
60+
61+
2562
def nb_paths(root_path: Union[str, Path]) -> List[Path]:
2663
"""Fetches all .ipynb filenames that belong to subdirectories of root_path (1 level deep) with
2764
'notebooks' in the name."""
@@ -91,6 +128,7 @@ def to_results_str(fns: List[Path], nonmatching_nbs: List[Path]) -> Tuple[str, s
91128
nonmatching_nbs = []
92129
fns = notebooks if notebooks else nb_paths(root_path)
93130
for fn in fns:
131+
print(f"{'checking' if check else 'processing'} {fn}")
94132
nb = read_notebook(fn)
95133
modified_nb = deepcopy(nb)
96134
process_nb(modified_nb, root_path)

0 commit comments

Comments
 (0)