|
17 | 17 | def process_nb(nb: nbformat.NotebookNode, working_dir: Union[str, Path]) -> nbformat.NotebookNode: |
18 | 18 | """Execute cells in nb using working_dir as the working directory for imports, modifying the |
19 | 19 | notebook in place (in memory).""" |
| 20 | + # Clear existing outputs before executing the notebook |
| 21 | + for cell in nb.cells: |
| 22 | + if cell.cell_type == "code": |
| 23 | + cell.outputs = [] |
20 | 24 | ep = ExecutePreprocessor(timeout=600) |
21 | 25 | ep.preprocess(nb, {"metadata": {"path": working_dir}}) |
| 26 | + # Merge adjacent text outputs after executing the notebook |
| 27 | + for cell in nb.cells: |
| 28 | + merge_adjacent_text_outputs(cell) |
22 | 29 | return nb |
23 | 30 |
|
24 | 31 |
|
| 32 | +def merge_adjacent_text_outputs(cell: nbformat.NotebookNode) -> nbformat.NotebookNode: |
| 33 | + """Merges adjacent text stream outputs to avoid non-deterministic splits in output.""" |
| 34 | + if cell.cell_type != "code": |
| 35 | + return cell |
| 36 | + |
| 37 | + new_outputs = [] |
| 38 | + current_output = None |
| 39 | + |
| 40 | + for output in cell.outputs: |
| 41 | + if output.output_type == "stream": |
| 42 | + if current_output is None: |
| 43 | + current_output = output |
| 44 | + elif current_output.name == output.name: |
| 45 | + current_output.text += output.text |
| 46 | + else: |
| 47 | + new_outputs.append(current_output) |
| 48 | + current_output = output |
| 49 | + else: |
| 50 | + if current_output is not None: |
| 51 | + new_outputs.append(current_output) |
| 52 | + current_output = None |
| 53 | + new_outputs.append(output) |
| 54 | + |
| 55 | + if current_output is not None: |
| 56 | + new_outputs.append(current_output) |
| 57 | + |
| 58 | + cell.outputs = new_outputs |
| 59 | + return cell |
| 60 | + |
| 61 | + |
25 | 62 | def nb_paths(root_path: Union[str, Path]) -> List[Path]: |
26 | 63 | """Fetches all .ipynb filenames that belong to subdirectories of root_path (1 level deep) with |
27 | 64 | 'notebooks' in the name.""" |
@@ -91,6 +128,7 @@ def to_results_str(fns: List[Path], nonmatching_nbs: List[Path]) -> Tuple[str, s |
91 | 128 | nonmatching_nbs = [] |
92 | 129 | fns = notebooks if notebooks else nb_paths(root_path) |
93 | 130 | for fn in fns: |
| 131 | + print(f"{'checking' if check else 'processing'} {fn}") |
94 | 132 | nb = read_notebook(fn) |
95 | 133 | modified_nb = deepcopy(nb) |
96 | 134 | process_nb(modified_nb, root_path) |
|
0 commit comments