rstudio · yihui · Apr 4, 2025 · Dec 11, 2024 · Jan 2, 2025 · Feb 5, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: bookdown
 Type: Package
 Title: Authoring Books and Technical Documents with R Markdown
-Version: 0.42.1
+Version: 0.42.2
 Authors@R: c(
     person("Yihui", "Xie", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0003-0645-5666")),
     person("Christophe", "Dervieux", , "[email protected]", role = c("ctb"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # CHANGES IN bookdown VERSION 0.43
 
+- Support `split_by` for section level higher than `2` (i.e., `##` sections) in `gitbook` (thanks, @katrinabrock #1490, @lcougnaud #1346 #1347).
+
 # CHANGES IN bookdown VERSION 0.42
 
 - New option in `gitbook`'s font settings menu to control line spacing (thanks, @hayden-MB, #1479).

diff --git a/R/gitbook.R b/R/gitbook.R
@@ -25,7 +25,9 @@ gitbook = function(
   fig_caption = TRUE, number_sections = TRUE, self_contained = FALSE,
   anchor_sections = TRUE, lib_dir = 'libs', global_numbering = !number_sections,
   pandoc_args = NULL, extra_dependencies = list(), ..., template = 'default',
-  split_by = c('chapter', 'chapter+number', 'section', 'section+number', 'rmd', 'none'),
+  split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
+               'section+number', '0+number', '1+number', '2+number', '3+number',
+               '4+number', '5+number', '6+number', 'rmd', 'none') ,
   split_bib = TRUE, config = list(), table_css = TRUE, code_folding = c("none", "show", "hide")
 ) {
   gb_config = config
@@ -53,6 +55,7 @@ gitbook = function(
     template = template, pandoc_args = pandoc_args2(pandoc_args), ...
   )
   config$pandoc$lua_filters = append(config$pandoc$lua_filters, lua_filters)
+  split_by = as.character(split_by)
   split_by = match.arg(split_by)
   post = config$post_processor  # in case a post processor have been defined
   config$post_processor = function(metadata, input, output, clean, verbose) {

diff --git a/R/html.R b/R/html.R
@@ -16,15 +16,17 @@
 #' @param split_by How to name the HTML output files from the book: \code{rmd}
 #'   uses the base filenames of the input Rmd files to create the HTML
 #'   filenames, e.g. generate \file{chapter1.html} for \file{chapter1.Rmd};
-#'   \code{none} means do not split the HTML file (the book will be a single
-#'   HTML file); \code{chapter} means split the file by the first-level headers;
-#'   \code{section} means the second-level headers. For \code{chapter} and
-#'   \code{section}, the HTML filenames will be determined by the header ID's,
-#'   e.g. the filename for the first chapter with a chapter title \code{#
-#'   Introduction} will be \file{introduction.html}; for \code{chapter+number}
-#'   and \code{section+number}, the chapter/section numbers will be prepended to
-#'   the HTML filenames, e.g. \file{1-introduction.html} and
-#'   \file{2-1-literature.html}.
+#'   \code{none} or \code{"0"} means do not split the HTML file (the book will be
+#'   a single HTML file); \code{chapter} or \code{"1"} means split the file by
+#'   the first-level headers; \code{section} or \code{"2"} means the second-level
+#'   headers, \code{"3"}-\code{"6"} means split the file by the [3-6]-level
+#'   headers. For \code{chapter}, \code{section} and \code{"1"}-\code{"6"}, the
+#'   HTML filenames will be determined by the header ID's, e.g. the filename
+#'   for the first chapter with a chapter title \code{# Introduction} will be
+#'   \file{introduction.html}; for \code{"chapter+number"}, \code{"section+number"}
+#'   and \code{"[1-6]+number"} the chapter/section (and higher level section)
+#'   numbers will be prepended to the HTML filenames, e.g.
+#'   \file{1-introduction.html} and \file{2-1-literature.html}.
 #' @param split_bib Whether to split the bibliography onto separate pages where
 #'   the citations are actually used.
 #' @param page_builder A function to combine different parts of a chapter into a
@@ -54,13 +56,16 @@ html_chapters = function(
   template = bookdown_file('templates/default.html'),
   global_numbering = !number_sections, pandoc_args = NULL, ...,
   base_format = rmarkdown::html_document, split_bib = TRUE, page_builder = build_chapter,
-  split_by = c('section+number', 'section', 'chapter+number', 'chapter', 'rmd', 'none')
+  split_by = c('chapter', 'section', '0', '1', '2', '3', '4', '5', '6', 'chapter+number',
+               'section+number', '0+number', '1+number', '2+number', '3+number',
+               '4+number', '5+number', '6+number', 'rmd', 'none')
 ) {
   config = get_base_format(base_format, list(
     toc = toc, number_sections = number_sections, fig_caption = fig_caption,
     self_contained = FALSE, lib_dir = lib_dir,
     template = template, pandoc_args = pandoc_args2(pandoc_args), ...
   ))
+  split_by = as.character(split_by)
   split_by = match.arg(split_by)
   post = config$post_processor  # in case a post processor have been defined
   config$post_processor = function(metadata, input, output, clean, verbose) {
@@ -257,12 +262,15 @@ split_chapters = function(
 ) {
 
   use_rmd_names = split_by == 'rmd'
-  split_level = switch(
-    split_by, none = 0, chapter = 1, `chapter+number` = 1,
-    section = 2, `section+number` = 2, rmd = 1
-  )
 
-  if (!(split_level %in% 0:2)) stop('split_level must be 0, 1, or 2')
+  split_level = sub('[+]number$', '', split_by)
+  split_level = switch(split_level,
+    none = 0,
+    chapter = 1,
+    section = 2,
+    rmd = 1,
+    as.numeric(split_level)
+  )
 
   x = read_utf8(output)
   x = clean_html_tags(x)
@@ -280,41 +288,75 @@ split_chapters = function(
   # restore_appendix_html erase the section ids of the hidden PART or APPENDIX
   # sections.
   if (split_level > 1) {
+
     body = x[(i5 + 1):(i6 - 1)]
-    h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', body) + i5
-    h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', body) + i5
-    h12 = setNames(c(h1, h2), rep(c('h1', 'h2'), c(length(h1), length(h2))))
-    if (length(h12) > 0 && h12[1] != i5 + 1) stop(
+
+    i_sections = grep(
+      paste0(
+        '^<div (id="[^"]+" )?class="section level(',
+        paste(seq_len(split_level), collapse = '|'
+      ),
+     ')("| )'),
+      body
+    ) + i5
+
+    names(i_sections) = sub('^<div (id="[^"]+" )?class="section level([[:digit:]])("| ).*',"\\2", body[i_sections - i5])
+    # heading indices
+    i_sections = sort(i_sections)
+    # heading levels
+    l_sections = as.numeric(names(i_sections))
+
+    if (length(i_sections) > 0 && (
+      i_sections[1] != i5 + 1 || !l_sections[1] %in% 1:2
+    )) stop(
       'The document must start with a first (#) or second level (##) heading'
     )
-    h12 = sort(h12)
-    if (length(h12) > 1) {
-      n12 = names(h12)
-      # h2 that immediately follows h1
-      i = h12[n12 == 'h2' & c('h2', head(n12, -1)) == 'h1'] - 1
-      # close the h1 section early with </div>
-      # reg_chap and sec_num must take this into account so that cross reference
-      # works when split by section. (#849)
-      if (length(i)) x[i] = paste0(x[i], '\n</div>')
-      # h1 that immediately follows h2 but not the first h1
-      i = n12 == 'h1' & c('h1', head(n12, -1)) == 'h2'
-      if (any(i) && n12[1] == 'h2') i[which(n12 == 'h1')[1]] = FALSE
-      i = h12[i] - 1
-      # need to comment out the </div> corresponding to the last <h1> in the body
-      if (tail(n12, 1) == 'h2' && any(n12 == 'h1')) {
-        for (j in (i6 - 1):(tail(h12, 1))) {
-          # the line j should close h1, and j - 1 should close h2
+
+    if (length(i_sections) > 1) {
+      pre_split_level = split_level - 1
+      # h[X-1] that immediately follows h[X] but not the first h1
+      d_sections = diff(l_sections)
+
+      # in case next section is X > 2, remove multiple </div>
+      i = c()
+      i_add = c()
+      for (j in seq_along(d_sections)){
+        if (d_sections[j] == 0) next
+        if (d_sections[j] > 0) {
+          # </div>s to add (close at the end of the page)
+          i_add = c(i_add, i_sections[j + 1] - 1)
+        }
+        if (d_sections[j] < 0) {
+          # </div>s to delete (remove from later in the doc)
+          page_breakpoint = i_sections[j + 1] - 1
+          # get the last instance of a level(j+1) or higher
+          # this is the area over which we need to remove div closes
+          j_prev_head = max(tail(which(l_sections[1:j]>=l_sections[j+1]), 1), 1)
+          # count how many different levels are in that area
+          # this is the number of divs we need to close
+          n_div_to_delete = length(unique(l_sections[j_prev_head:j+1])) - 1
+          i = c(i, seq(page_breakpoint - n_div_to_delete, page_breakpoint))
+        }
+      }
+      if (length(i_add)) x[i_add] = paste0(x[i_add], '\n</div>')
+      i = setdiff(i, i_sections[l_sections == 1][1])
+      if (length(i) && l_sections[1] == split_level) i = setdiff(i, i_sections[which(l_sections == pre_split_level)][1])
+
+      # need to comment out the </div> corresponding to the last <h2> in the body
+      if (tail(l_sections, 1) == split_level && any(l_sections == pre_split_level)) {
+        for (j in (i6 - 1):(tail(i_sections, 1))) {
+          # the line j should close h2, and j - 1 should close h1
           if (all(x[j - 0:1] == '</div>')) break
         }
         i = c(i, j)
       }
-      for (j in i) {
-        # the i-th lines should be the closing </div> for h1
-        if (x[j] != '</div>') stop(
-          'Something wrong with the HTML output. The line ', x[j],
-          ' is supposed to be </div>'
-        )
-      }
+     for (j in i) {
+       # the i-th lines should be the closing </div>
+       if (!grepl('</div>', x[j])) stop(
+         'Something wrong with the HTML output. The line ', x[j],
+         ' is supposed to be </div>'
+       )
+     }
       x[i] = paste('<!--', x[i], '-->')  # remove the extra </div> of h1
     }
   }
@@ -378,15 +420,22 @@ split_chapters = function(
       idx = c(1, idx[-n])
     }
   } else {
-    h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', html_body)
-    h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', html_body)
-    idx2 = if (split_level == 1) h1 else if (split_level == 2) sort(c(h1, h2))
+    reg_level = paste(seq_len(split_level), collapse = '')
+    idx2 = if (split_level >= 1) {
+      use_rmd_names = split_by == 'rmd'
+      sort(grep(
+        paste0('^<div (id="[^"]+" )?class="section level[', reg_level, ']("| )'),
+        html_body
+      ))
+    }
     n = length(idx2)
     nms_chaps = if (length(idx)) {
       vapply(idx2, character(1), FUN = function(i) head(nms[idx > i], 1))
     }
     reg_id = '^<div id="([^"]+)".*$'
-    reg_num = '^(<h[12]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[12]>).*$'
+    reg_num = paste0('^(<h[', reg_level,
+      ']><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[', reg_level, ']>).*$'
+    )
     nms = vapply(idx2, character(1), FUN = function(i) {
       x1 = html_body[i]; x2 = html_body[i + 1]
       id = if (grepl(reg_id, x1)) gsub(reg_id, '\\1', x1)
@@ -838,7 +887,7 @@ restore_ref_links = function(x, regexp, tags, txts, alt = TRUE) {
 
 # add automatic identifiers to those section headings without ID's
 add_section_ids = function(content) {
-  r = '^(<div)( class="section level[1-6].+)$'
+  r = '^(<div).*(class="section level[1-6].+)$'
   for (i in grep(r, content)) {
     if (grepl('id=".+"', content[i])) next  # the id exists
     h = content[i + 1]

diff --git a/man/gitbook.Rd b/man/gitbook.Rd
diff --git a/man/html_chapters.Rd b/man/html_chapters.Rd
diff --git a/tests/test-rmd.R b/tests/test-rmd.R
@@ -10,7 +10,6 @@ if (Sys.getenv('NOT_CRAN') == 'true') local({
   for (f in list.files('rmd', '[.]Rmd$', full.names = TRUE)) {
     rmarkdown::render(f, envir = globalenv(), quiet = TRUE)
   }
-
   validate_html(list.files("rmd", ".html$", full.names = TRUE))
 
   # split by section works correctly