|
14 | 14 | outputs-container holds an error, then decompose the parent |
15 | 15 | div[data-name="outputs-container"] so nothing is left behind. |
16 | 16 |
|
| 17 | +JB2/MyST flattens output into slug-based directories rather than mirroring |
| 18 | +the input path structure. We therefore scan every index.html under the |
| 19 | +build output tree instead of constructing paths from materials.yml. |
| 20 | +
|
17 | 21 | Run as: python parse_html_for_errors_v2.py student |
18 | 22 | """ |
19 | 23 |
|
20 | 24 | import os |
21 | 25 | import sys |
22 | | -import yaml |
23 | 26 | from bs4 import BeautifulSoup |
24 | 27 |
|
25 | | -ARG = sys.argv[1] # "student" or "instructor" |
| 28 | +sys.argv[1] # "student" or "instructor" — accepted but not used (kept for compat) |
26 | 29 |
|
27 | 30 | ERROR_STRINGS = ["NotImplementedError", "NameError"] |
28 | 31 |
|
| 32 | +HTML_ROOT = "book/_build/html" |
29 | 33 |
|
30 | | -def main(): |
31 | | - with open("tutorials/materials.yml") as fh: |
32 | | - materials = yaml.load(fh, Loader=yaml.FullLoader) |
33 | 34 |
|
34 | | - html_directory = "book/_build/html/" |
| 35 | +def main(): |
35 | 36 | total_removed = 0 |
36 | | - |
37 | | - for m in materials: |
38 | | - name = f"{m['day']}_{''.join(m['name'].split())}" |
39 | | - |
40 | | - notebook_paths = [] |
41 | | - if os.path.exists(f"tutorials/{name}/{m['day']}_Intro.ipynb"): |
42 | | - notebook_paths.append( |
43 | | - f"{html_directory}/tutorials/{name}/{ARG}/{m['day']}_Intro.html" |
44 | | - ) |
45 | | - notebook_paths += [ |
46 | | - f"{html_directory}/tutorials/{name}/{ARG}/{m['day']}_Tutorial{i + 1}.html" |
47 | | - for i in range(m["tutorials"]) |
48 | | - ] |
49 | | - if os.path.exists(f"tutorials/{name}/{m['day']}_Outro.ipynb"): |
50 | | - notebook_paths.append( |
51 | | - f"{html_directory}/tutorials/{name}/{ARG}/{m['day']}_Outro.html" |
52 | | - ) |
53 | | - |
54 | | - for html_path in notebook_paths: |
55 | | - if not os.path.exists(html_path): |
56 | | - print(f" Warning: {html_path} not found, skipping") |
| 37 | + files_touched = 0 |
| 38 | + |
| 39 | + if not os.path.isdir(HTML_ROOT): |
| 40 | + print( |
| 41 | + f"ERROR: HTML output directory not found: {HTML_ROOT!r} (cwd={os.getcwd()!r})" |
| 42 | + ) |
| 43 | + sys.exit(1) |
| 44 | + |
| 45 | + all_index_files = [] |
| 46 | + for dirpath, _dirnames, filenames in os.walk(HTML_ROOT): |
| 47 | + for fname in filenames: |
| 48 | + if fname == "index.html": |
| 49 | + all_index_files.append(os.path.join(dirpath, fname)) |
| 50 | + print(f"Found {len(all_index_files)} index.html files under {HTML_ROOT}") |
| 51 | + |
| 52 | + for dirpath, _dirnames, filenames in os.walk(HTML_ROOT): |
| 53 | + for fname in filenames: |
| 54 | + if fname != "index.html": |
57 | 55 | continue |
| 56 | + html_path = os.path.join(dirpath, fname) |
58 | 57 |
|
59 | 58 | with open(html_path, encoding="utf-8") as f: |
60 | 59 | contents = f.read() |
61 | 60 |
|
62 | 61 | parsed_html = BeautifulSoup(contents, features="html.parser") |
63 | 62 | removed = strip_error_divs(parsed_html) |
64 | | - total_removed += removed |
65 | 63 |
|
66 | 64 | # Put solution figures in center (matches JB1 behaviour) |
67 | 65 | for img in parsed_html.find_all("img", alt=True): |
68 | 66 | if img["alt"] == "Solution hint": |
69 | 67 | img["align"] = "center" |
70 | 68 | img["class"] = "align-center" |
71 | 69 |
|
72 | | - with open(html_path, "w", encoding="utf-8") as f: |
73 | | - f.write(str(parsed_html)) |
74 | | - |
75 | 70 | if removed: |
76 | | - print( |
77 | | - f" Stripped {removed} error div(s) from {os.path.basename(html_path)}" |
78 | | - ) |
79 | | - |
80 | | - print(f"Done. Removed {total_removed} error output div(s) total.") |
| 71 | + total_removed += removed |
| 72 | + files_touched += 1 |
| 73 | + with open(html_path, "w", encoding="utf-8") as f: |
| 74 | + f.write(str(parsed_html)) |
| 75 | + print(f" Stripped {removed} error div(s) from {html_path}") |
| 76 | + |
| 77 | + print( |
| 78 | + f"Done. Removed {total_removed} error output div(s) from {files_touched} file(s)." |
| 79 | + ) |
81 | 80 |
|
82 | 81 |
|
83 | 82 | def strip_error_divs(parsed_html): |
|
0 commit comments