16
16
# ' @param split_by How to name the HTML output files from the book: \code{rmd}
17
17
# ' uses the base filenames of the input Rmd files to create the HTML
18
18
# ' filenames, e.g. generate \file{chapter1.html} for \file{chapter1.Rmd};
19
- # ' \code{none} means do not split the HTML file (the book will be a single
20
- # ' HTML file); \code{chapter} means split the file by the first-level headers;
21
- # ' \code{section} means the second-level headers. For \code{chapter} and
22
- # ' \code{section}, the HTML filenames will be determined by the header ID's,
23
- # ' e.g. the filename for the first chapter with a chapter title \code{#
24
- # ' Introduction} will be \file{introduction.html}; for \code{chapter+number}
25
- # ' and \code{section+number}, the chapter/section numbers will be prepended to
26
- # ' the HTML filenames, e.g. \file{1-introduction.html} and
27
- # ' \file{2-1-literature.html}.
19
+ # ' \code{none} or \code{"0"} means do not split the HTML file (the book will be
20
+ # ' a single HTML file); \code{chapter} or \code{"1"} means split the file by
21
+ # ' the first-level headers; \code{section} or \code{"2"} means the second-level
22
+ # ' headers, \code{"3"}-\code{"6"} means split the file by the [3-6]-level
23
+ # ' headers. For \code{chapter}, \code{section} and \code{"1"}-\code{"6"}, the
24
+ # ' HTML filenames will be determined by the header ID's, e.g. the filename
25
+ # ' for the first chapter with a chapter title \code{# Introduction} will be
26
+ # ' \file{introduction.html}; for \code{"chapter+number"}, \code{"section+number"}
27
+ # ' and \code{"[1-6]+number"} the chapter/section (and higher level section)
28
+ # ' numbers will be prepended to the HTML filenames, e.g.
29
+ # ' \file{1-introduction.html} and \file{2-1-literature.html}.
28
30
# ' @param split_bib Whether to split the bibliography onto separate pages where
29
31
# ' the citations are actually used.
30
32
# ' @param page_builder A function to combine different parts of a chapter into a
@@ -54,13 +56,16 @@ html_chapters = function(
54
56
template = bookdown_file(' templates/default.html' ),
55
57
global_numbering = ! number_sections , pandoc_args = NULL , ... ,
56
58
base_format = rmarkdown :: html_document , split_bib = TRUE , page_builder = build_chapter ,
57
- split_by = c(' section+number' , ' section' , ' chapter+number' , ' chapter' , ' rmd' , ' none' )
59
+ split_by = c(' chapter' , ' section' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' chapter+number' ,
60
+ ' section+number' , ' 0+number' , ' 1+number' , ' 2+number' , ' 3+number' ,
61
+ ' 4+number' , ' 5+number' , ' 6+number' , ' rmd' , ' none' )
58
62
) {
59
63
config = get_base_format(base_format , list (
60
64
toc = toc , number_sections = number_sections , fig_caption = fig_caption ,
61
65
self_contained = FALSE , lib_dir = lib_dir ,
62
66
template = template , pandoc_args = pandoc_args2(pandoc_args ), ...
63
67
))
68
+ split_by = as.character(split_by )
64
69
split_by = match.arg(split_by )
65
70
post = config $ post_processor # in case a post processor have been defined
66
71
config $ post_processor = function (metadata , input , output , clean , verbose ) {
@@ -257,12 +262,15 @@ split_chapters = function(
257
262
) {
258
263
259
264
use_rmd_names = split_by == ' rmd'
260
- split_level = switch (
261
- split_by , none = 0 , chapter = 1 , `chapter+number` = 1 ,
262
- section = 2 , `section+number` = 2 , rmd = 1
263
- )
264
265
265
- if (! (split_level %in% 0 : 2 )) stop(' split_level must be 0, 1, or 2' )
266
+ split_level = sub(' [+]number$' , ' ' , split_by )
267
+ split_level = switch (split_level ,
268
+ none = 0 ,
269
+ chapter = 1 ,
270
+ section = 2 ,
271
+ rmd = 1 ,
272
+ as.numeric(split_level )
273
+ )
266
274
267
275
x = read_utf8(output )
268
276
x = clean_html_tags(x )
@@ -280,41 +288,75 @@ split_chapters = function(
280
288
# restore_appendix_html erase the section ids of the hidden PART or APPENDIX
281
289
# sections.
282
290
if (split_level > 1 ) {
291
+
283
292
body = x [(i5 + 1 ): (i6 - 1 )]
284
- h1 = grep(' ^<div (id="[^"]+" )?class="section level1("| )' , body ) + i5
285
- h2 = grep(' ^<div (id="[^"]+" )?class="section level2("| )' , body ) + i5
286
- h12 = setNames(c(h1 , h2 ), rep(c(' h1' , ' h2' ), c(length(h1 ), length(h2 ))))
287
- if (length(h12 ) > 0 && h12 [1 ] != i5 + 1 ) stop(
293
+
294
+ i_sections = grep(
295
+ paste0(
296
+ ' ^<div (id="[^"]+" )?class="section level(' ,
297
+ paste(seq_len(split_level ), collapse = ' |'
298
+ ),
299
+ ' )("| )' ),
300
+ body
301
+ ) + i5
302
+
303
+ names(i_sections ) = sub(' ^<div (id="[^"]+" )?class="section level([[:digit:]])("| ).*' ," \\ 2" , body [i_sections - i5 ])
304
+ # heading indices
305
+ i_sections = sort(i_sections )
306
+ # heading levels
307
+ l_sections = as.numeric(names(i_sections ))
308
+
309
+ if (length(i_sections ) > 0 && (
310
+ i_sections [1 ] != i5 + 1 || ! l_sections [1 ] %in% 1 : 2
311
+ )) stop(
288
312
' The document must start with a first (#) or second level (##) heading'
289
313
)
290
- h12 = sort(h12 )
291
- if (length(h12 ) > 1 ) {
292
- n12 = names(h12 )
293
- # h2 that immediately follows h1
294
- i = h12 [n12 == ' h2' & c(' h2' , head(n12 , - 1 )) == ' h1' ] - 1
295
- # close the h1 section early with </div>
296
- # reg_chap and sec_num must take this into account so that cross reference
297
- # works when split by section. (#849)
298
- if (length(i )) x [i ] = paste0(x [i ], ' \n </div>' )
299
- # h1 that immediately follows h2 but not the first h1
300
- i = n12 == ' h1' & c(' h1' , head(n12 , - 1 )) == ' h2'
301
- if (any(i ) && n12 [1 ] == ' h2' ) i [which(n12 == ' h1' )[1 ]] = FALSE
302
- i = h12 [i ] - 1
303
- # need to comment out the </div> corresponding to the last <h1> in the body
304
- if (tail(n12 , 1 ) == ' h2' && any(n12 == ' h1' )) {
305
- for (j in (i6 - 1 ): (tail(h12 , 1 ))) {
306
- # the line j should close h1, and j - 1 should close h2
314
+
315
+ if (length(i_sections ) > 1 ) {
316
+ pre_split_level = split_level - 1
317
+ # h[X-1] that immediately follows h[X] but not the first h1
318
+ d_sections = diff(l_sections )
319
+
320
+ # in case next section is X > 2, remove multiple </div>
321
+ i = c()
322
+ i_add = c()
323
+ for (j in seq_along(d_sections )){
324
+ if (d_sections [j ] == 0 ) next
325
+ if (d_sections [j ] > 0 ) {
326
+ # </div>s to add (close at the end of the page)
327
+ i_add = c(i_add , i_sections [j + 1 ] - 1 )
328
+ }
329
+ if (d_sections [j ] < 0 ) {
330
+ # </div>s to delete (remove from later in the doc)
331
+ page_breakpoint = i_sections [j + 1 ] - 1
332
+ # get the last instance of a level(j+1) or higher
333
+ # this is the area over which we need to remove div closes
334
+ j_prev_head = max(tail(which(l_sections [1 : j ]> = l_sections [j + 1 ]), 1 ), 1 )
335
+ # count how many different levels are in that area
336
+ # this is the number of divs we need to close
337
+ n_div_to_delete = length(unique(l_sections [j_prev_head : j + 1 ])) - 1
338
+ i = c(i , seq(page_breakpoint - n_div_to_delete , page_breakpoint ))
339
+ }
340
+ }
341
+ if (length(i_add )) x [i_add ] = paste0(x [i_add ], ' \n </div>' )
342
+ i = setdiff(i , i_sections [l_sections == 1 ][1 ])
343
+ if (length(i ) && l_sections [1 ] == split_level ) i = setdiff(i , i_sections [which(l_sections == pre_split_level )][1 ])
344
+
345
+ # need to comment out the </div> corresponding to the last <h2> in the body
346
+ if (tail(l_sections , 1 ) == split_level && any(l_sections == pre_split_level )) {
347
+ for (j in (i6 - 1 ): (tail(i_sections , 1 ))) {
348
+ # the line j should close h2, and j - 1 should close h1
307
349
if (all(x [j - 0 : 1 ] == ' </div>' )) break
308
350
}
309
351
i = c(i , j )
310
352
}
311
- for (j in i ) {
312
- # the i-th lines should be the closing </div> for h1
313
- if (x [ j ] != ' </div>' ) stop(
314
- ' Something wrong with the HTML output. The line ' , x [j ],
315
- ' is supposed to be </div>'
316
- )
317
- }
353
+ for (j in i ) {
354
+ # the i-th lines should be the closing </div>
355
+ if (! grepl( ' </div>' , x [ j ]) ) stop(
356
+ ' Something wrong with the HTML output. The line ' , x [j ],
357
+ ' is supposed to be </div>'
358
+ )
359
+ }
318
360
x [i ] = paste(' <!--' , x [i ], ' -->' ) # remove the extra </div> of h1
319
361
}
320
362
}
@@ -378,15 +420,22 @@ split_chapters = function(
378
420
idx = c(1 , idx [- n ])
379
421
}
380
422
} else {
381
- h1 = grep(' ^<div (id="[^"]+" )?class="section level1("| )' , html_body )
382
- h2 = grep(' ^<div (id="[^"]+" )?class="section level2("| )' , html_body )
383
- idx2 = if (split_level == 1 ) h1 else if (split_level == 2 ) sort(c(h1 , h2 ))
423
+ reg_level = paste(seq_len(split_level ), collapse = ' ' )
424
+ idx2 = if (split_level > = 1 ) {
425
+ use_rmd_names = split_by == ' rmd'
426
+ sort(grep(
427
+ paste0(' ^<div (id="[^"]+" )?class="section level[' , reg_level , ' ]("| )' ),
428
+ html_body
429
+ ))
430
+ }
384
431
n = length(idx2 )
385
432
nms_chaps = if (length(idx )) {
386
433
vapply(idx2 , character (1 ), FUN = function (i ) head(nms [idx > i ], 1 ))
387
434
}
388
435
reg_id = ' ^<div id="([^"]+)".*$'
389
- reg_num = ' ^(<h[12]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[12]>).*$'
436
+ reg_num = paste0(' ^(<h[' , reg_level ,
437
+ ' ]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[' , reg_level , ' ]>).*$'
438
+ )
390
439
nms = vapply(idx2 , character (1 ), FUN = function (i ) {
391
440
x1 = html_body [i ]; x2 = html_body [i + 1 ]
392
441
id = if (grepl(reg_id , x1 )) gsub(reg_id , ' \\ 1' , x1 )
@@ -838,7 +887,7 @@ restore_ref_links = function(x, regexp, tags, txts, alt = TRUE) {
838
887
839
888
# add automatic identifiers to those section headings without ID's
840
889
add_section_ids = function (content ) {
841
- r = ' ^(<div)( class="section level[1-6].+)$'
890
+ r = ' ^(<div).*( class="section level[1-6].+)$'
842
891
for (i in grep(r , content )) {
843
892
if (grepl(' id=".+"' , content [i ])) next # the id exists
844
893
h = content [i + 1 ]
0 commit comments