Skip to content

Commit 748a286

Browse files
committed
Allow up to 6 levels of headers in the table of content and optional ignoring of h1 headers
Any number of levels is supported but since the official HTML supports only 6 levels (`h1` ... `h6`) an artifical limitation was added. This change affects two options: `toc_level` and `ordered_chapter_level`. Both can be set to a value between 0 and 6 (inclusive) where 0 means the feature is disabled. If either of the options is set to a value greater than 6 then 6 levels of headers are processed and a warning is emitted. This change also adds option `ignore_top_header` to exclude h1 headers from the table of content and numbering.
1 parent ccf20e5 commit 748a286

File tree

3 files changed

+122
-82
lines changed

3 files changed

+122
-82
lines changed

README.md

+10-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ This plugin is inspired by [MkDocs PDF Export Plugin][mkdocs-pdf-export-plugin].
1111
## Features
1212

1313
* Cover and Table of Contents integrated in the PDF
14-
* Automatically numbers on heading(h1-h3).
14+
* Automatically numbers on heading(h1-h6).
1515
* Shift down sub-page headings level.
1616
* using [WeasyPrint][weasyprint].
1717

@@ -80,6 +80,7 @@ plugins:
8080
#excludes_children:
8181
# - 'release-notes/:upgrading'
8282
# - 'release-notes/:changelog'
83+
#ignore_top_header: false
8384
#
8485
#exclude_pages:
8586
# - 'bugs/'
@@ -181,19 +182,25 @@ plugins:
181182
182183
* `toc_level`
183184
184-
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `3`.
185+
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `6`.
185186
**default**: `3`
186187
187188
* `ordered_chapter_level`
188189
189-
Set the level of heading number addition. This value is enabled in the range of from `1` to `3`.
190+
Set the level of heading number addition. This value is enabled in the range of from `1` to `6`.
190191
**default**: `3`
191192
192193
* `excludes_children`
193194
194195
Set the page `id` of `nav` url. If the `id` matches in this list, it will be excluded from the heading number addition and table of contents.
195196
**default**: `[]`
196197
198+
* `ignore_top_header`
199+
200+
Set this value to `true` to skip `h1` headers from being numbered (`ordered_chapter_level`) and
201+
included into _Table of Content_ (`toc_level`).
202+
**default**: `false`
203+
197204
##### for Page
198205
199206
* `exclude_pages`

mkdocs_with_pdf/options.py

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class Options(object):
3535
('toc_level', config_options.Type(int, default=2)),
3636
('ordered_chapter_level', config_options.Type(int, default=3)),
3737
('excludes_children', config_options.Type(list, default=[])),
38+
('ignore_top_header', config_options.Type(bool, default=False)),
3839

3940
('exclude_pages', config_options.Type(list, default=[])),
4041
('convert_iframe', config_options.Type(list, default=[])),
@@ -81,6 +82,7 @@ def __init__(self, local_config, config, logger: logging):
8182
self.toc_level = local_config['toc_level']
8283
self.ordered_chapter_level = local_config['ordered_chapter_level']
8384
self.excludes_children = local_config['excludes_children']
85+
self.ignore_top_header = local_config['ignore_top_header']
8486

8587
# Page
8688
self.exclude_pages = local_config['exclude_pages']

mkdocs_with_pdf/toc.py

+110-79
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,44 @@
1-
from bs4 import PageElement, Tag
1+
from dataclasses import dataclass, field
2+
from typing import Any, List, Tuple
3+
from bs4 import BeautifulSoup, Tag
24

35
from .options import Options
46
from .utils.soup_util import clone_element
57

68

7-
def make_indexes(soup: PageElement, options: Options) -> None:
9+
_MAX_HEADER_LEVEL = 6 # <h1> ... <h6>
10+
11+
12+
@dataclass
13+
class _HeaderTree:
14+
""" Normalized tree of document headers. Missed levels have `element` set to `None` """
15+
element: Tag | None
16+
subheaders: List['_HeaderTree'] = field(default_factory=list)
17+
18+
19+
def make_indexes(soup: BeautifulSoup, options: Options) -> None:
820
""" Generate ordered chapter number and TOC of document.
921
1022
Arguments:
1123
soup {BeautifulSoup} -- DOM object of Document.
1224
options {Options} -- The options of this sequence.
1325
"""
1426

15-
# Step 1: (re)ordered headdings
27+
# Step 1: (re)ordered headings
1628
_inject_heading_order(soup, options)
1729

1830
# Step 2: generate toc page
19-
level = options.toc_level
20-
if level < 1 or level > 3:
31+
start_level = 1 if options.ignore_top_header else 0
32+
stop_level = options.toc_level
33+
if stop_level <= start_level:
2134
return
35+
if stop_level > _MAX_HEADER_LEVEL:
36+
options.logger.warning(f'Ignore `toc_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
37+
stop_level = _MAX_HEADER_LEVEL
2238

23-
options.logger.info(
24-
f'Generate a table of contents up to heading level {level}.')
39+
options.logger.info(f'Generate a table of contents from h{start_level + 1} to h{stop_level}')
2540

26-
h1li = None
27-
h2ul = h2li = h3ul = None
28-
exclude_lv2 = exclude_lv3 = False
29-
30-
def makeLink(h: Tag) -> Tag:
41+
def make_link(h: Tag) -> Tag:
3142
li = soup.new_tag('li')
3243
ref = h.get('id', '')
3344
a = soup.new_tag('a', href=f'#{ref}')
@@ -40,98 +51,118 @@ def makeLink(h: Tag) -> Tag:
4051
options.logger.debug(f"| [{h.get_text(separator=' ')}]({ref})")
4152
return li
4253

54+
def create_toc(headers: List[_HeaderTree], parent: Tag):
55+
ul_tag = soup.new_tag('ul')
56+
parent.append(ul_tag)
57+
for header in headers:
58+
if header.element is not None:
59+
link_tag = make_link(header.element)
60+
else:
61+
options.logger.warning(f'Adding missed header to TOC')
62+
link_tag = soup.new_tag('li')
63+
ul_tag.append(link_tag)
64+
if len(header.subheaders) > 0:
65+
create_toc(header.subheaders, link_tag)
66+
67+
top_headers = _collect_headers(soup, options, start_level, stop_level)
68+
4369
toc = soup.new_tag('article', id='doc-toc')
4470
title = soup.new_tag('h1')
4571
title.append(soup.new_string(options.toc_title))
4672
toc.append(title)
4773

48-
h1ul = soup.new_tag('ul')
49-
toc.append(h1ul)
50-
51-
headings = soup.find_all(['h1', 'h2', 'h3'])
52-
for h in headings:
74+
create_toc(top_headers, toc)
75+
soup.body.insert(0, toc)
5376

54-
if h.name == 'h1':
5577

56-
h1li = makeLink(h)
57-
h1ul.append(h1li)
58-
h2ul = h2li = h3ul = None
78+
def _set_list_elements(l: List[Any], value: Any, start: int, end: int | None = None) -> None:
79+
for i in range(start, end if end is not None else len(l)):
80+
l[i] = value
5981

60-
exclude_lv2 = _is_exclude(h.get('id', None), options)
6182

62-
elif not exclude_lv2 and h.name == 'h2' and level >= 2:
83+
def _collect_headers(soup: BeautifulSoup, options: Options, start_level: int, stop_level: int) -> List[_HeaderTree]:
84+
"""Collect document headers.
85+
Retuns a list of top headers with their subheaders
86+
Levels are counted from zero i.e. zero level corresponds to h1
87+
"""
88+
assert 0 <= start_level < stop_level
89+
assert 0 < stop_level <= _MAX_HEADER_LEVEL
6390

64-
if not h2ul:
65-
h2ul = soup.new_tag('ul')
66-
h1li.append(h2ul)
67-
h2li = makeLink(h)
68-
h2ul.append(h2li)
69-
h3ul = None
91+
top_headers: List[_HeaderTree] = []
7092

71-
exclude_lv3 = _is_exclude(h.get('id', None), options)
93+
header_levels: List[_HeaderTree | None] = [None] * stop_level
94+
exclude_levels: List[bool] = [False] * stop_level
7295

73-
elif not exclude_lv2 and not exclude_lv3 \
74-
and h.name == 'h3' and level >= 3:
96+
html_headers = soup.find_all([f'h{i + 1}' for i in range(start_level, stop_level)])
97+
for h in html_headers:
98+
level = int(h.name[1:]) - 1
7599

76-
if not h2li:
77-
continue
78-
if not h3ul:
79-
h3ul = soup.new_tag('ul')
80-
h2li.append(h3ul)
81-
h3li = makeLink(h)
82-
h3ul.append(h3li)
100+
exclude_levels[level] = _is_exclude(h.get('id', None), options)
101+
_set_list_elements(exclude_levels, False, level + 1)
83102

84-
else:
103+
if any(exclude_levels[:level]):
85104
continue
86-
pass
87-
88-
soup.body.insert(0, toc)
89105

106+
header = _HeaderTree(h)
90107

91-
def _inject_heading_order(soup: Tag, options: Options):
92-
93-
level = options.ordered_chapter_level
94-
if level < 1 or level > 3:
95-
return
96-
97-
options.logger.info(f'Number headings up to level {level}.')
98-
99-
h1n = h2n = h3n = 0
100-
exclude_lv2 = exclude_lv3 = False
101-
102-
headings = soup.find_all(['h1', 'h2', 'h3'])
103-
for h in headings:
104-
105-
if h.name == 'h1':
106-
107-
h1n += 1
108-
h2n = h3n = 0
109-
prefix = f'{h1n}. '
110-
111-
exclude_lv2 = _is_exclude(h.get('id', None), options)
108+
if level == start_level:
109+
top_headers.append(header)
110+
else:
111+
parent_header = header_levels[level - 1]
112+
if parent_header is None:
113+
# Add skipped levels
114+
for i in range(start_level, level):
115+
if header_levels[i] is not None:
116+
continue
112117

113-
elif not exclude_lv2 and h.name == 'h2' and level >= 2:
118+
missed_header = _HeaderTree(None)
119+
if i == start_level:
120+
top_headers.append(missed_header)
121+
else:
122+
parent_header = header_levels[i - 1]
123+
assert parent_header is not None
124+
parent_header.subheaders.append(missed_header)
125+
header_levels[i] = missed_header
114126

115-
h2n += 1
116-
h3n = 0
117-
prefix = f'{h1n}.{h2n} '
127+
parent_header = header_levels[level - 1]
118128

119-
exclude_lv3 = _is_exclude(h.get('id', None), options)
129+
assert parent_header is not None
130+
parent_header.subheaders.append(header)
120131

121-
elif not exclude_lv2 and not exclude_lv3 \
122-
and h.name == 'h3' and level >= 3:
132+
header_levels[level] = header
133+
_set_list_elements(header_levels, None, level + 1)
123134

124-
h3n += 1
125-
prefix = f'{h1n}.{h2n}.{h3n} '
135+
return top_headers
126136

127-
else:
128-
continue
129137

130-
options.logger.debug(f"| [{prefix} {h.text}]({h.get('id', '(none)')})")
138+
def _inject_heading_order(soup: BeautifulSoup, options: Options) -> None:
139+
start_level = 1 if options.ignore_top_header else 0
140+
stop_level = options.ordered_chapter_level
141+
if stop_level <= start_level:
142+
return
143+
if stop_level > _MAX_HEADER_LEVEL:
144+
options.logger.warning(f'Ignore `ordered_chapter_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
145+
stop_level = _MAX_HEADER_LEVEL
146+
147+
options.logger.info(f'Number headers from h{start_level + 1} to h{stop_level}')
148+
149+
def inject_order(headers: List[_HeaderTree], numbers_prefix: List[int] = []):
150+
assert len(numbers_prefix) < _MAX_HEADER_LEVEL
151+
for i, header in enumerate(headers):
152+
prefix = numbers_prefix + [i + 1]
153+
prefix_str = '.'.join(str(n) for n in prefix)
154+
if header.element is not None:
155+
options.logger.debug(f"| [{prefix_str} {header.element}]({header.element.get('id', '(none)')})")
156+
nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
157+
nm_tag.append(prefix_str + ' ')
158+
header.element.insert(0, nm_tag)
159+
else:
160+
options.logger.warning(f'Assigned number for a missed header {prefix_str}')
161+
if len(header.subheaders) > 0:
162+
inject_order(header.subheaders, prefix)
131163

132-
nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
133-
nm_tag.append(prefix)
134-
h.insert(0, nm_tag)
164+
top_headers = _collect_headers(soup, options, start_level, stop_level)
165+
inject_order(top_headers)
135166

136167

137168
def _is_exclude(url: str, options: Options) -> bool:

0 commit comments

Comments
 (0)