Skip to content

Fixed version of #1346: Allow split_by to accept numeric values #1490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: bookdown
Type: Package
Title: Authoring Books and Technical Documents with R Markdown
Version: 0.42.1
Version: 0.42.2
Authors@R: c(
person("Yihui", "Xie", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0645-5666")),
person("Christophe", "Dervieux", , "[email protected]", role = c("ctb"),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# CHANGES IN bookdown VERSION 0.43

- Support `split_by` for section level higher than `2` (i.e., `##` sections) in `gitbook` (thanks, @katrinabrock #1490, @lcougnaud #1346 #1347).

# CHANGES IN bookdown VERSION 0.42

- New option in `gitbook`'s font settings menu to control line spacing (thanks, @hayden-MB, #1479).
Expand Down
5 changes: 4 additions & 1 deletion R/gitbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ gitbook = function(
fig_caption = TRUE, number_sections = TRUE, self_contained = FALSE,
anchor_sections = TRUE, lib_dir = 'libs', global_numbering = !number_sections,
pandoc_args = NULL, extra_dependencies = list(), ..., template = 'default',
split_by = c('chapter', 'chapter+number', 'section', 'section+number', 'rmd', 'none'),
split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
'section+number', '0+number', '1+number', '2+number', '3+number',
'4+number', '5+number', '6+number', 'rmd', 'none') ,
split_bib = TRUE, config = list(), table_css = TRUE, code_folding = c("none", "show", "hide")
) {
gb_config = config
Expand Down Expand Up @@ -53,6 +55,7 @@ gitbook = function(
template = template, pandoc_args = pandoc_args2(pandoc_args), ...
)
config$pandoc$lua_filters = append(config$pandoc$lua_filters, lua_filters)
split_by = as.character(split_by)
split_by = match.arg(split_by)
post = config$post_processor # in case a post processor have been defined
config$post_processor = function(metadata, input, output, clean, verbose) {
Expand Down
145 changes: 97 additions & 48 deletions R/html.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@
#' @param split_by How to name the HTML output files from the book: \code{rmd}
#' uses the base filenames of the input Rmd files to create the HTML
#' filenames, e.g. generate \file{chapter1.html} for \file{chapter1.Rmd};
#' \code{none} means do not split the HTML file (the book will be a single
#' HTML file); \code{chapter} means split the file by the first-level headers;
#' \code{section} means the second-level headers. For \code{chapter} and
#' \code{section}, the HTML filenames will be determined by the header ID's,
#' e.g. the filename for the first chapter with a chapter title \code{#
#' Introduction} will be \file{introduction.html}; for \code{chapter+number}
#' and \code{section+number}, the chapter/section numbers will be prepended to
#' the HTML filenames, e.g. \file{1-introduction.html} and
#' \file{2-1-literature.html}.
#' \code{none} or \code{"0"} means do not split the HTML file (the book will be
#' a single HTML file); \code{chapter} or \code{"1"} means split the file by
#' the first-level headers; \code{section} or \code{"2"} means the second-level
#' headers, \code{"3"}-\code{"6"} means split the file by the [3-6]-level
#' headers. For \code{chapter}, \code{section} and \code{"1"}-\code{"6"}, the
#' HTML filenames will be determined by the header ID's, e.g. the filename
#' for the first chapter with a chapter title \code{# Introduction} will be
#' \file{introduction.html}; for \code{"chapter+number"}, \code{"section+number"}
#' and \code{"[1-6]+number"} the chapter/section (and higher level section)
#' numbers will be prepended to the HTML filenames, e.g.
#' \file{1-introduction.html} and \file{2-1-literature.html}.
#' @param split_bib Whether to split the bibliography onto separate pages where
#' the citations are actually used.
#' @param page_builder A function to combine different parts of a chapter into a
Expand Down Expand Up @@ -54,13 +56,16 @@ html_chapters = function(
template = bookdown_file('templates/default.html'),
global_numbering = !number_sections, pandoc_args = NULL, ...,
base_format = rmarkdown::html_document, split_bib = TRUE, page_builder = build_chapter,
split_by = c('section+number', 'section', 'chapter+number', 'chapter', 'rmd', 'none')
split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
'section+number', '0+number', '1+number', '2+number', '3+number',
'4+number', '5+number', '6+number', 'rmd', 'none')
) {
config = get_base_format(base_format, list(
toc = toc, number_sections = number_sections, fig_caption = fig_caption,
self_contained = FALSE, lib_dir = lib_dir,
template = template, pandoc_args = pandoc_args2(pandoc_args), ...
))
split_by = as.character(split_by)
split_by = match.arg(split_by)
post = config$post_processor # in case a post processor have been defined
config$post_processor = function(metadata, input, output, clean, verbose) {
Expand Down Expand Up @@ -257,12 +262,15 @@ split_chapters = function(
) {

use_rmd_names = split_by == 'rmd'
split_level = switch(
split_by, none = 0, chapter = 1, `chapter+number` = 1,
section = 2, `section+number` = 2, rmd = 1
)

if (!(split_level %in% 0:2)) stop('split_level must be 0, 1, or 2')
split_level = sub('[+]number$', '', split_by)
split_level = switch(split_level,
none = 0,
chapter = 1,
section = 2,
rmd = 1,
as.numeric(split_level)
)

x = read_utf8(output)
x = clean_html_tags(x)
Expand All @@ -280,41 +288,75 @@ split_chapters = function(
# restore_appendix_html erase the section ids of the hidden PART or APPENDIX
# sections.
if (split_level > 1) {

body = x[(i5 + 1):(i6 - 1)]
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', body) + i5
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', body) + i5
h12 = setNames(c(h1, h2), rep(c('h1', 'h2'), c(length(h1), length(h2))))
if (length(h12) > 0 && h12[1] != i5 + 1) stop(

i_sections = grep(
paste0(
'^<div (id="[^"]+" )?class="section level(',
paste(seq_len(split_level), collapse = '|'
),
')("| )'),
body
) + i5

names(i_sections) = sub('^<div (id="[^"]+" )?class="section level([[:digit:]])("| ).*',"\\2", body[i_sections - i5])
# heading indices
i_sections = sort(i_sections)
# heading levels
l_sections = as.numeric(names(i_sections))

if (length(i_sections) > 0 && (
i_sections[1] != i5 + 1 || !l_sections[1] %in% 1:2
)) stop(
'The document must start with a first (#) or second level (##) heading'
)
h12 = sort(h12)
if (length(h12) > 1) {
n12 = names(h12)
# h2 that immediately follows h1
i = h12[n12 == 'h2' & c('h2', head(n12, -1)) == 'h1'] - 1
# close the h1 section early with </div>
# reg_chap and sec_num must take this into account so that cross reference
# works when split by section. (#849)
if (length(i)) x[i] = paste0(x[i], '\n</div>')
# h1 that immediately follows h2 but not the first h1
i = n12 == 'h1' & c('h1', head(n12, -1)) == 'h2'
if (any(i) && n12[1] == 'h2') i[which(n12 == 'h1')[1]] = FALSE
i = h12[i] - 1
# need to comment out the </div> corresponding to the last <h1> in the body
if (tail(n12, 1) == 'h2' && any(n12 == 'h1')) {
for (j in (i6 - 1):(tail(h12, 1))) {
# the line j should close h1, and j - 1 should close h2

if (length(i_sections) > 1) {
pre_split_level = split_level - 1
# h[X-1] that immediately follows h[X] but not the first h1
d_sections = diff(l_sections)

# in case next section is X > 2, remove multiple </div>
i = c()
i_add = c()
for (j in seq_along(d_sections)){
if (d_sections[j] == 0) next
if (d_sections[j] > 0) {
# </div>s to add (close at the end of the page)
i_add = c(i_add, i_sections[j + 1] - 1)
}
if (d_sections[j] < 0) {
# </div>s to delete (remove from later in the doc)
page_breakpoint = i_sections[j + 1] - 1
# get the last instance of a level(j+1) or higher
# this is the area over which we need to remove div closes
j_prev_head = max(tail(which(l_sections[1:j]>=l_sections[j+1]), 1), 1)
# count how many different levels are in that area
# this is the number of divs we need to close
n_div_to_delete = length(unique(l_sections[j_prev_head:j+1])) - 1
i = c(i, seq(page_breakpoint - n_div_to_delete, page_breakpoint))
}
}
if (length(i_add)) x[i_add] = paste0(x[i_add], '\n</div>')
i = setdiff(i, i_sections[l_sections == 1][1])
if (length(i) && l_sections[1] == split_level) i = setdiff(i, i_sections[which(l_sections == pre_split_level)][1])

# need to comment out the </div> corresponding to the last <h2> in the body
if (tail(l_sections, 1) == split_level && any(l_sections == pre_split_level)) {
for (j in (i6 - 1):(tail(i_sections, 1))) {
# the line j should close h2, and j - 1 should close h1
if (all(x[j - 0:1] == '</div>')) break
}
i = c(i, j)
}
for (j in i) {
# the i-th lines should be the closing </div> for h1
if (x[j] != '</div>') stop(
'Something wrong with the HTML output. The line ', x[j],
' is supposed to be </div>'
)
}
for (j in i) {
# the i-th lines should be the closing </div>
if (!grepl('</div>', x[j])) stop(
'Something wrong with the HTML output. The line ', x[j],
' is supposed to be </div>'
)
}
x[i] = paste('<!--', x[i], '-->') # remove the extra </div> of h1
}
}
Expand Down Expand Up @@ -378,15 +420,22 @@ split_chapters = function(
idx = c(1, idx[-n])
}
} else {
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', html_body)
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', html_body)
idx2 = if (split_level == 1) h1 else if (split_level == 2) sort(c(h1, h2))
reg_level = paste(seq_len(split_level), collapse = '')
idx2 = if (split_level >= 1) {
use_rmd_names = split_by == 'rmd'
sort(grep(
paste0('^<div (id="[^"]+" )?class="section level[', reg_level, ']("| )'),
html_body
))
}
n = length(idx2)
nms_chaps = if (length(idx)) {
vapply(idx2, character(1), FUN = function(i) head(nms[idx > i], 1))
}
reg_id = '^<div id="([^"]+)".*$'
reg_num = '^(<h[12]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[12]>).*$'
reg_num = paste0('^(<h[', reg_level,
']><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[', reg_level, ']>).*$'
)
nms = vapply(idx2, character(1), FUN = function(i) {
x1 = html_body[i]; x2 = html_body[i + 1]
id = if (grepl(reg_id, x1)) gsub(reg_id, '\\1', x1)
Expand Down Expand Up @@ -838,7 +887,7 @@ restore_ref_links = function(x, regexp, tags, txts, alt = TRUE) {

# add automatic identifiers to those section headings without ID's
add_section_ids = function(content) {
r = '^(<div)( class="section level[1-6].+)$'
r = '^(<div).*(class="section level[1-6].+)$'
for (i in grep(r, content)) {
if (grepl('id=".+"', content[i])) next # the id exists
h = content[i + 1]
Expand Down
24 changes: 14 additions & 10 deletions man/gitbook.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 14 additions & 10 deletions man/html_chapters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion tests/test-rmd.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ if (Sys.getenv('NOT_CRAN') == 'true') local({
for (f in list.files('rmd', '[.]Rmd$', full.names = TRUE)) {
rmarkdown::render(f, envir = globalenv(), quiet = TRUE)
}

validate_html(list.files("rmd", ".html$", full.names = TRUE))

# split by section works correctly
Expand Down
Loading