jupyter · HaudinFlorence · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024 · Sep 17, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1634,6 +1634,7 @@ raw template
 {%- endblock in_prompt -%}
     """
 
+
 exporter_attr = AttrExporter()
 output_attr, _ = exporter_attr.from_notebook_node(nb)
 assert "raw template" in output_attr

diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py
@@ -27,7 +27,11 @@
 from nbformat import NotebookNode
 
 from nbconvert.filters.highlight import Highlight2HTML
-from nbconvert.filters.markdown_mistune import IPythonRenderer, MarkdownWithMath
+from nbconvert.filters.markdown_mistune import (
+    IPythonRenderer,
+    MarkdownWithMath,
+    extract_titles_from_notebook_node,
+)
 from nbconvert.filters.widgetsdatatypefilter import WidgetsDataTypeFilter
 from nbconvert.utils.iso639_1 import iso639_1
 
@@ -355,6 +359,7 @@ def resources_include_url(name):
             return markupsafe.Markup(src)
 
         resources = super()._init_resources(resources)
+
         resources["theme"] = self.theme
         resources["include_css"] = resources_include_css
         resources["include_lab_theme"] = resources_include_lab_theme
@@ -370,4 +375,5 @@ def resources_include_url(name):
         resources["should_sanitize_html"] = self.sanitize_html
         resources["language_code"] = self.language_code
         resources["should_not_encode_svg"] = self.skip_svg_encoding
+        resources["extract_titles_from_nodebook_node"] = extract_titles_from_notebook_node
         return resources
diff --git a/nbconvert/exporters/templateexporter.py b/nbconvert/exporters/templateexporter.py
@@ -207,6 +207,9 @@ def default_config(self):
     enable_async = Bool(False, help="Enable Jinja async template execution").tag(
         affects_environment=True
     )
+    include_tableofcontents = Bool(
+        False, allow_none=True, help="Enable to include a table of contents"
+    ).tag(config=True, affects_template=True)
 
     _last_template_file = ""
     _raw_template_key = "<memory>"
@@ -684,4 +687,5 @@ def get_prefix_root_dirs(self):
     def _init_resources(self, resources):
         resources = super()._init_resources(resources)
         resources["deprecated"] = deprecated
+        resources["include_tableofcontents"] = self.include_tableofcontents
         return resources
diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
@@ -12,6 +12,8 @@
 from typing import TYPE_CHECKING, Any, ClassVar, Dict, Iterable, Match, Optional, Protocol, Tuple
 
 import bs4
+import mistune
+from nbformat import NotebookNode
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexer import Lexer
@@ -62,7 +64,7 @@ def __call__(self, markdown: "Markdown") -> None:
     MISTUNE_V3_ATX = False
 
     def import_plugin(name: str) -> "Plugin":  # type: ignore[misc]
-        """Simple implementation of Mistune V3's import_plugin for V2."""
+        """Simple implementation of Mistune V3"s import_plugin for V2."""
         return PLUGINS[name]  # type: ignore[no-any-return]
 
 
@@ -71,7 +73,7 @@ class InvalidNotebook(Exception):
 
 
 def _dotall(pattern: str) -> str:
-    """Makes the '.' special character match any character inside the pattern, including a newline.
+    """Makes the "." special character match any character inside the pattern, including a newline.
 
     This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL`.
     It is useful for LaTeX environments, where line breaks may be present.
@@ -86,7 +88,7 @@ class MathBlockParser(BlockParser):
         order to avoid other block level rules splitting math sections apart.
 
         It works by matching each multiline math environment as a single paragraph,
-        so that other rules don't think each section is its own paragraph. Inline
+        so that other rules don"t think each section is its own paragraph. Inline
         is ignored here.
         """
 
@@ -214,7 +216,7 @@ class MathBlockParser(BlockParser):  # type: ignore[no-redef]
             re.DOTALL,
         )
 
-        # Regex for header that doesn't require space after '#'
+        # Regex for header that doesn"t require space after "#"
         AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)(?: *\n+|([^\n]*?)(?:\n+|\s+?#+\s*\n+))")
 
         # Multiline math must be searched before other rules
@@ -255,7 +257,7 @@ class MathInlineParser(InlineParser):  # type: ignore[no-redef]
 
         def parse_block_math_tex(self, m: Match[str], state: Any) -> Tuple[str, str]:
             """Parse block text math."""
-            # sometimes the Scanner keeps the final '$$', so we use the
+            # sometimes the Scanner keeps the final "$$", so we use the
             # full matched string and remove the math markers
             text = m.group(0)[2:-2]
             return "block_math", text
@@ -450,7 +452,7 @@ def _html_embed_images(self, html: str) -> str:
         parsed_html = bs4.BeautifulSoup(html, features="html.parser")
         imgs: bs4.ResultSet[bs4.Tag] = parsed_html.find_all("img")
 
-        # Replace img tags's sources by base64 dataurls
+        # Replace img tags"s sources by base64 dataurls
         for img in imgs:
             src = img.attrs.get("src")
             if src is None:
@@ -476,7 +478,7 @@ class MarkdownWithMath(Markdown):
         "def_list",
     )
 
-    def __init__(
+    def nb__(
         self,
         renderer: HTMLRenderer,
         block: Optional[BlockParser] = None,
@@ -504,3 +506,34 @@ def render(self, source: str) -> str:
 def markdown2html_mistune(source: str) -> str:
     """Convert a markdown string to HTML using mistune"""
     return MarkdownWithMath(renderer=IPythonRenderer(escape=False)).render(source)
+
+
+def extract_titles_from_notebook_node(nb: NotebookNode):
+    """Create a Markdown parser with the HeadingExtractor renderer to collect all the headings of a notebook
+    The input argument is the notebooknode from which a single string with all the markdown content concatenated
+    The output is an array containing information about the headings such as their level, their text content, an identifier and a href that can be used in case of html converter.s"""
+
+    cells_html_collection = ""
+    for cell in nb.cells:
+        if cell.cell_type == "markdown":
+            markdown_source = cell.source
+            html_source = mistune.html(markdown_source)  # convert all the markdown sources to html
+            if isinstance(html_source, str):
+                cells_html_collection += html_source + "\n"
+            elif isinstance(html_source, list):
+                rendered = "\n".join(str(item) for item in html_source)
+                cells_html_collection += rendered + "\n"
+
+    titles_array = []
+    html_collection = bs4.BeautifulSoup(cells_html_collection, "html.parser")
+    headings = html_collection.select("h1, h2, h3, h4, h5, h6")
+
+    # Iterate on all headings to get the necessary information on the various titles
+    for heading in headings:
+        text = heading.get_text().lstrip().rstrip()
+        level = int(heading.name[1])
+        header_id = text.replace(" ", "-")
+        heading["id"] = header_id
+        href = "#" + header_id
+        titles_array.append([str(heading), level, href])
+    return titles_array
diff --git a/nbconvert/nbconvertapp.py b/nbconvert/nbconvertapp.py
@@ -186,6 +186,10 @@ def validate(self, obj, value):
             },
             """Whether the HTML in Markdown cells and cell outputs should be sanitized..""",
         ),
+        "toc": (
+            {"TemplateExporter": {"include_tableofcontents": True}},
+            "Generate a table of contents in the output (only compatible with HTML and Latex exporters)",
+        ),
     }
 )
 
@@ -675,5 +679,6 @@ def _default_export_format(self):
 # Main entry point
 # -----------------------------------------------------------------------------
 
+
 main = launch_new_instance = NbConvertApp.launch_instance
 dejavu_main = DejavuApp.launch_instance
diff --git a/share/templates/lab/base.html.j2 b/share/templates/lab/base.html.j2
@@ -2,6 +2,7 @@
 {% from 'celltags.j2' import celltags %}
 {% from 'cell_id_anchor.j2' import cell_id_anchor %}
 
+
 {% block codecell %}
 {%- if not cell.outputs -%}
 {%- set no_output_class="jp-mod-noOutputs" -%}

diff --git a/share/templates/lab/index.html.j2 b/share/templates/lab/index.html.j2
@@ -100,6 +100,56 @@ a.anchor-link {
     display: block;
   }
 }
+/* Table of Contents for the html exporter */
+.jp-RenderedHTMLTOC-Title {
+  font-family: var(--jp-content-font-family);
+  font-size: 14px;
+  margin: 16px 0;
+  padding-left: 64px;
+  font-weight: bold;
+}
+
+.jp-RenderedHTMLTOC-Item-h1 {
+  font-family: var(--jp-content-font-family);
+  font-size: 14px;
+  margin: 0;
+  padding-left: 88px;
+}
+
+.jp-RenderedHTMLTOC-Item-h2 {
+  font-family: var(--jp-content-font-family);
+  font-size: 12px;
+  margin: 4px;
+  padding-left: 112px;
+}
+
+.jp-RenderedHTMLTOC-Item-h3 {
+  font-family: var(--jp-content-font-family);
+  font-size:10px;
+  margin: 4px;
+  padding-left: 136px;
+}
+
+.jp-RenderedHTMLTOC-Item-h4 {
+  font-family: var(--jp-content-font-family);
+  font-size: 8px;
+  margin: 4px;
+  padding-left: 160px;
+}
+
+.jp-RenderedHTMLTOC-Item-h5 {
+  font-family: var(--jp-content-font-family);
+  font-size: 7px;
+  margin: 4px;
+  padding-left: 184px;
+}
+
+.jp-RenderedHTMLTOC-Item-h6 {
+  font-family: var(--jp-content-font-family);
+  font-size: 6px;
+  margin: 2px;
+  padding-left: 208px;
+}
 </style>
 
 {% endblock notebook_css %}
@@ -126,6 +176,34 @@ a.anchor-link {
 <body class="jp-Notebook" data-jp-theme-light="true" data-jp-theme-name="JupyterLab Light">
 {% endif %}
 <main>
+{%- block tableofcontents -%}
+{%- if resources.include_tableofcontents -%}
+{%- set tableofcontents= resources.extract_titles_from_nodebook_node(nb) -%}
+<div class="jp-RenderedHTMLTOC-Title">Table of contents</div>
+{%- for item in tableofcontents -%}
+{%- set (header, level, href) = item -%}
+<div class="
+{%- if level==1 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h1
+{%- elif level==2 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h2
+{%- elif level==3 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h3
+{%- elif level==4 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h4
+{%- elif level==5 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h5
+{%- elif level==6 -%}
+jp-RenderedHTMLCommon jp-RenderedHTMLTOC-Item-h6
+{%- endif -%}"
+>
+<a href={{href}}>
+{{header | safe}}
+</a>
+</div>
+{%- endfor -%}
+{%- endif -%}
+{% endblock tableofcontents %}
 {%- endblock body_header -%}
 
 {% block body_footer %}

diff --git a/share/templates/latex/base.tex.j2 b/share/templates/latex/base.tex.j2
@@ -232,11 +232,17 @@ override this.-=))
 ((* endblock header *))
 
 ((* block body *))
+
 \begin{document}
     ((* block predoc *))
     ((* block maketitle *))\maketitle((* endblock maketitle *))
     ((* block abstract *))((* endblock abstract *))
     ((* endblock predoc *))
+    ((* block tableofcontents *))
+      ((* if resources.include_tableofcontents *))
+        \tableofcontents
+      ((* endif *))
+    ((* endblock tableofcontents *))
 
     ((( super() )))