-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathqpdf.R
More file actions
101 lines (94 loc) · 3.24 KB
/
qpdf.R
File metadata and controls
101 lines (94 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#' Split, Combine and Compress PDF Files
#'
#' Content-preserving transformations of PDF files. Note that qpdf does not
#' read actual content from PDF files: to extract text and data you need the
#' [pdftools](https://docs.ropensci.org/pdftools/) package.
#'
#' Currently this package provides the following wrappers:
#'
#' - [pdf_length]: show the number of pages in a pdf
#' - [pdf_split]: split a single pdf into separate files, one for each page
#' - [pdf_subset]: create a new pdf with a subset of the input pages
#' - [pdf_combine]: join several pdf files into one
#' - [pdf_compress]: compress or linearize a pdf file
#'
#' These functions do not modify the `input` file: instead they create
#' new output file(s) and return the path(s) to these newly created files.
#'
#' @export
#' @name qpdf
#' @rdname qpdf
#' @useDynLib qpdf
#' @importFrom Rcpp sourceCpp
#' @importFrom askpass askpass
#' @param input path or url to the input pdf file
#' @param output base path of the output file(s)
#' @param password string with password to open pdf file
#' @examples # extract some pages
#' pdf_file <- file.path(tempdir(), "output.pdf")
#' pdf_subset('https://cran.r-project.org/doc/manuals/r-release/R-intro.pdf',
#' pages = 1:3, output = pdf_file)
#' pdf_length(pdf_file)
#' unlink(pdf_file)
pdf_split <- function(input, output = NULL, password = ""){
input <- get_input(input)
if(!length(output))
output <- sub("\\.pdf$", "", input)
cpp_pdf_split(input, output, password)
}
#' @export
#' @rdname qpdf
pdf_length <- function(input, password = ""){
input <- get_input(input)
cpp_pdf_length(input, password)
}
#' @export
#' @rdname qpdf
#' @param pages a vector with page numbers so select. Negative numbers
#' means removing those pages (same as R indexing)
pdf_subset <- function(input, pages = 1, output = NULL, password = ""){
input <- get_input(input)
if(!length(output))
output <- sub("\\.pdf$", "_output.pdf", input)
output <- normalizePath(output, mustWork = FALSE)
size <- pdf_length(input)
pages <- seq_len(size)[pages]
if(any(is.na(pages)) || !length(pages))
stop("Selected pages out of range")
cpp_pdf_select(input, output, pages, password)
}
#' @export
#' @rdname qpdf
pdf_combine <- function(input, output = NULL, password = ""){
input <- get_input_multi(input)
if(!length(output))
output <- sub("\\.pdf$", "_combined.pdf", input[1])
output <- normalizePath(output, mustWork = FALSE)
cpp_pdf_combine(input, output, password)
}
#' @export
#' @rdname qpdf
#' @param linearize enable pdf linearization (streamable pdf)
pdf_compress <- function(input, output = NULL, linearize = FALSE, password = ""){
input <- get_input(input)
if(!length(output))
output <- sub("\\.pdf$", "_output.pdf", input)
output <- normalizePath(output, mustWork = FALSE)
cpp_pdf_compress(input, output, linearize, password)
}
password_callback <- function(...){
paste(askpass::askpass(...), collapse = "")
}
get_input <- function(path){
if(length(path) != 1)
stop("input should contain exactly one file")
if(grepl("^https?://", path)){
tmp <- file.path(tempdir(), basename(path))
curl::curl_download(path, tmp)
path <- tmp
}
normalizePath(path, mustWork = TRUE)
}
get_input_multi <- function(path){
vapply(path, get_input, character(1))
}