Skip to content

Commit bdcce30

Browse files
katrinabrocklcougnaudyihui
authored
Allow split_by to accept numeric values (#1490)
Fixed version of #1346 Co-authored-by: Laure Cougnaud <[email protected]> Co-authored-by: Yihui Xie <[email protected]>
1 parent f7b9271 commit bdcce30

File tree

8 files changed

+308
-71
lines changed

8 files changed

+308
-71
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: bookdown
22
Type: Package
33
Title: Authoring Books and Technical Documents with R Markdown
4-
Version: 0.42.1
4+
Version: 0.42.2
55
Authors@R: c(
66
person("Yihui", "Xie", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0645-5666")),
77
person("Christophe", "Dervieux", , "[email protected]", role = c("ctb"),

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# CHANGES IN bookdown VERSION 0.43
22

3+
- Support `split_by` for section level higher than `2` (i.e., `##` sections) in `gitbook` (thanks, @katrinabrock #1490, @lcougnaud #1346 #1347).
4+
35
# CHANGES IN bookdown VERSION 0.42
46

57
- New option in `gitbook`'s font settings menu to control line spacing (thanks, @hayden-MB, #1479).

R/gitbook.R

+4-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ gitbook = function(
2525
fig_caption = TRUE, number_sections = TRUE, self_contained = FALSE,
2626
anchor_sections = TRUE, lib_dir = 'libs', global_numbering = !number_sections,
2727
pandoc_args = NULL, extra_dependencies = list(), ..., template = 'default',
28-
split_by = c('chapter', 'chapter+number', 'section', 'section+number', 'rmd', 'none'),
28+
split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
29+
'section+number', '0+number', '1+number', '2+number', '3+number',
30+
'4+number', '5+number', '6+number', 'rmd', 'none') ,
2931
split_bib = TRUE, config = list(), table_css = TRUE, code_folding = c("none", "show", "hide")
3032
) {
3133
gb_config = config
@@ -53,6 +55,7 @@ gitbook = function(
5355
template = template, pandoc_args = pandoc_args2(pandoc_args), ...
5456
)
5557
config$pandoc$lua_filters = append(config$pandoc$lua_filters, lua_filters)
58+
split_by = as.character(split_by)
5659
split_by = match.arg(split_by)
5760
post = config$post_processor # in case a post processor have been defined
5861
config$post_processor = function(metadata, input, output, clean, verbose) {

R/html.R

+97-48
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,17 @@
1616
#' @param split_by How to name the HTML output files from the book: \code{rmd}
1717
#' uses the base filenames of the input Rmd files to create the HTML
1818
#' filenames, e.g. generate \file{chapter1.html} for \file{chapter1.Rmd};
19-
#' \code{none} means do not split the HTML file (the book will be a single
20-
#' HTML file); \code{chapter} means split the file by the first-level headers;
21-
#' \code{section} means the second-level headers. For \code{chapter} and
22-
#' \code{section}, the HTML filenames will be determined by the header ID's,
23-
#' e.g. the filename for the first chapter with a chapter title \code{#
24-
#' Introduction} will be \file{introduction.html}; for \code{chapter+number}
25-
#' and \code{section+number}, the chapter/section numbers will be prepended to
26-
#' the HTML filenames, e.g. \file{1-introduction.html} and
27-
#' \file{2-1-literature.html}.
19+
#' \code{none} or \code{"0"} means do not split the HTML file (the book will be
20+
#' a single HTML file); \code{chapter} or \code{"1"} means split the file by
21+
#' the first-level headers; \code{section} or \code{"2"} means the second-level
22+
#' headers, \code{"3"}-\code{"6"} means split the file by the [3-6]-level
23+
#' headers. For \code{chapter}, \code{section} and \code{"1"}-\code{"6"}, the
24+
#' HTML filenames will be determined by the header ID's, e.g. the filename
25+
#' for the first chapter with a chapter title \code{# Introduction} will be
26+
#' \file{introduction.html}; for \code{"chapter+number"}, \code{"section+number"}
27+
#' and \code{"[1-6]+number"} the chapter/section (and higher level section)
28+
#' numbers will be prepended to the HTML filenames, e.g.
29+
#' \file{1-introduction.html} and \file{2-1-literature.html}.
2830
#' @param split_bib Whether to split the bibliography onto separate pages where
2931
#' the citations are actually used.
3032
#' @param page_builder A function to combine different parts of a chapter into a
@@ -54,13 +56,16 @@ html_chapters = function(
5456
template = bookdown_file('templates/default.html'),
5557
global_numbering = !number_sections, pandoc_args = NULL, ...,
5658
base_format = rmarkdown::html_document, split_bib = TRUE, page_builder = build_chapter,
57-
split_by = c('section+number', 'section', 'chapter+number', 'chapter', 'rmd', 'none')
59+
split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
60+
'section+number', '0+number', '1+number', '2+number', '3+number',
61+
'4+number', '5+number', '6+number', 'rmd', 'none')
5862
) {
5963
config = get_base_format(base_format, list(
6064
toc = toc, number_sections = number_sections, fig_caption = fig_caption,
6165
self_contained = FALSE, lib_dir = lib_dir,
6266
template = template, pandoc_args = pandoc_args2(pandoc_args), ...
6367
))
68+
split_by = as.character(split_by)
6469
split_by = match.arg(split_by)
6570
post = config$post_processor # in case a post processor have been defined
6671
config$post_processor = function(metadata, input, output, clean, verbose) {
@@ -257,12 +262,15 @@ split_chapters = function(
257262
) {
258263

259264
use_rmd_names = split_by == 'rmd'
260-
split_level = switch(
261-
split_by, none = 0, chapter = 1, `chapter+number` = 1,
262-
section = 2, `section+number` = 2, rmd = 1
263-
)
264265

265-
if (!(split_level %in% 0:2)) stop('split_level must be 0, 1, or 2')
266+
split_level = sub('[+]number$', '', split_by)
267+
split_level = switch(split_level,
268+
none = 0,
269+
chapter = 1,
270+
section = 2,
271+
rmd = 1,
272+
as.numeric(split_level)
273+
)
266274

267275
x = read_utf8(output)
268276
x = clean_html_tags(x)
@@ -280,41 +288,75 @@ split_chapters = function(
280288
# restore_appendix_html erase the section ids of the hidden PART or APPENDIX
281289
# sections.
282290
if (split_level > 1) {
291+
283292
body = x[(i5 + 1):(i6 - 1)]
284-
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', body) + i5
285-
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', body) + i5
286-
h12 = setNames(c(h1, h2), rep(c('h1', 'h2'), c(length(h1), length(h2))))
287-
if (length(h12) > 0 && h12[1] != i5 + 1) stop(
293+
294+
i_sections = grep(
295+
paste0(
296+
'^<div (id="[^"]+" )?class="section level(',
297+
paste(seq_len(split_level), collapse = '|'
298+
),
299+
')("| )'),
300+
body
301+
) + i5
302+
303+
names(i_sections) = sub('^<div (id="[^"]+" )?class="section level([[:digit:]])("| ).*',"\\2", body[i_sections - i5])
304+
# heading indices
305+
i_sections = sort(i_sections)
306+
# heading levels
307+
l_sections = as.numeric(names(i_sections))
308+
309+
if (length(i_sections) > 0 && (
310+
i_sections[1] != i5 + 1 || !l_sections[1] %in% 1:2
311+
)) stop(
288312
'The document must start with a first (#) or second level (##) heading'
289313
)
290-
h12 = sort(h12)
291-
if (length(h12) > 1) {
292-
n12 = names(h12)
293-
# h2 that immediately follows h1
294-
i = h12[n12 == 'h2' & c('h2', head(n12, -1)) == 'h1'] - 1
295-
# close the h1 section early with </div>
296-
# reg_chap and sec_num must take this into account so that cross reference
297-
# works when split by section. (#849)
298-
if (length(i)) x[i] = paste0(x[i], '\n</div>')
299-
# h1 that immediately follows h2 but not the first h1
300-
i = n12 == 'h1' & c('h1', head(n12, -1)) == 'h2'
301-
if (any(i) && n12[1] == 'h2') i[which(n12 == 'h1')[1]] = FALSE
302-
i = h12[i] - 1
303-
# need to comment out the </div> corresponding to the last <h1> in the body
304-
if (tail(n12, 1) == 'h2' && any(n12 == 'h1')) {
305-
for (j in (i6 - 1):(tail(h12, 1))) {
306-
# the line j should close h1, and j - 1 should close h2
314+
315+
if (length(i_sections) > 1) {
316+
pre_split_level = split_level - 1
317+
# h[X-1] that immediately follows h[X] but not the first h1
318+
d_sections = diff(l_sections)
319+
320+
# in case next section is X > 2, remove multiple </div>
321+
i = c()
322+
i_add = c()
323+
for (j in seq_along(d_sections)){
324+
if (d_sections[j] == 0) next
325+
if (d_sections[j] > 0) {
326+
# </div>s to add (close at the end of the page)
327+
i_add = c(i_add, i_sections[j + 1] - 1)
328+
}
329+
if (d_sections[j] < 0) {
330+
# </div>s to delete (remove from later in the doc)
331+
page_breakpoint = i_sections[j + 1] - 1
332+
# get the last instance of a level(j+1) or higher
333+
# this is the area over which we need to remove div closes
334+
j_prev_head = max(tail(which(l_sections[1:j]>=l_sections[j+1]), 1), 1)
335+
# count how many different levels are in that area
336+
# this is the number of divs we need to close
337+
n_div_to_delete = length(unique(l_sections[j_prev_head:j+1])) - 1
338+
i = c(i, seq(page_breakpoint - n_div_to_delete, page_breakpoint))
339+
}
340+
}
341+
if (length(i_add)) x[i_add] = paste0(x[i_add], '\n</div>')
342+
i = setdiff(i, i_sections[l_sections == 1][1])
343+
if (length(i) && l_sections[1] == split_level) i = setdiff(i, i_sections[which(l_sections == pre_split_level)][1])
344+
345+
# need to comment out the </div> corresponding to the last <h2> in the body
346+
if (tail(l_sections, 1) == split_level && any(l_sections == pre_split_level)) {
347+
for (j in (i6 - 1):(tail(i_sections, 1))) {
348+
# the line j should close h2, and j - 1 should close h1
307349
if (all(x[j - 0:1] == '</div>')) break
308350
}
309351
i = c(i, j)
310352
}
311-
for (j in i) {
312-
# the i-th lines should be the closing </div> for h1
313-
if (x[j] != '</div>') stop(
314-
'Something wrong with the HTML output. The line ', x[j],
315-
' is supposed to be </div>'
316-
)
317-
}
353+
for (j in i) {
354+
# the i-th lines should be the closing </div>
355+
if (!grepl('</div>', x[j])) stop(
356+
'Something wrong with the HTML output. The line ', x[j],
357+
' is supposed to be </div>'
358+
)
359+
}
318360
x[i] = paste('<!--', x[i], '-->') # remove the extra </div> of h1
319361
}
320362
}
@@ -378,15 +420,22 @@ split_chapters = function(
378420
idx = c(1, idx[-n])
379421
}
380422
} else {
381-
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', html_body)
382-
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', html_body)
383-
idx2 = if (split_level == 1) h1 else if (split_level == 2) sort(c(h1, h2))
423+
reg_level = paste(seq_len(split_level), collapse = '')
424+
idx2 = if (split_level >= 1) {
425+
use_rmd_names = split_by == 'rmd'
426+
sort(grep(
427+
paste0('^<div (id="[^"]+" )?class="section level[', reg_level, ']("| )'),
428+
html_body
429+
))
430+
}
384431
n = length(idx2)
385432
nms_chaps = if (length(idx)) {
386433
vapply(idx2, character(1), FUN = function(i) head(nms[idx > i], 1))
387434
}
388435
reg_id = '^<div id="([^"]+)".*$'
389-
reg_num = '^(<h[12]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[12]>).*$'
436+
reg_num = paste0('^(<h[', reg_level,
437+
']><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[', reg_level, ']>).*$'
438+
)
390439
nms = vapply(idx2, character(1), FUN = function(i) {
391440
x1 = html_body[i]; x2 = html_body[i + 1]
392441
id = if (grepl(reg_id, x1)) gsub(reg_id, '\\1', x1)
@@ -838,7 +887,7 @@ restore_ref_links = function(x, regexp, tags, txts, alt = TRUE) {
838887

839888
# add automatic identifiers to those section headings without ID's
840889
add_section_ids = function(content) {
841-
r = '^(<div)( class="section level[1-6].+)$'
890+
r = '^(<div).*(class="section level[1-6].+)$'
842891
for (i in grep(r, content)) {
843892
if (grepl('id=".+"', content[i])) next # the id exists
844893
h = content[i + 1]

man/gitbook.Rd

+14-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/html_chapters.Rd

+14-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/test-rmd.R

-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ if (Sys.getenv('NOT_CRAN') == 'true') local({
1010
for (f in list.files('rmd', '[.]Rmd$', full.names = TRUE)) {
1111
rmarkdown::render(f, envir = globalenv(), quiet = TRUE)
1212
}
13-
1413
validate_html(list.files("rmd", ".html$", full.names = TRUE))
1514

1615
# split by section works correctly

0 commit comments

Comments
 (0)