Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ poppler_pdf_data <- function(x, get_font_info, opw, upw) {
.Call('_pdftools_poppler_pdf_data', PACKAGE = 'pdftools', x, get_font_info, opw, upw)
}

poppler_pdf_text <- function(x, opw, upw) {
.Call('_pdftools_poppler_pdf_text', PACKAGE = 'pdftools', x, opw, upw)
poppler_pdf_text <- function(x, opw, upw, raw = FALSE) {
.Call('_pdftools_poppler_pdf_text', PACKAGE = 'pdftools', x, opw, upw, raw)
}

poppler_pdf_pagesize <- function(x, opw, upw) {
Expand Down
5 changes: 3 additions & 2 deletions R/tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ pdf_info <- function(pdf, opw = "", upw = "") {
}

#' @rdname pdftools
#' @param raw if TRUE text is kept in content stream order. Default: FALSE.
#' @export
pdf_text <- function(pdf, opw = "", upw = "") {
poppler_pdf_text(loadfile(pdf), opw, upw)
pdf_text <- function(pdf, opw = "", upw = "", raw = FALSE) {
poppler_pdf_text(loadfile(pdf), opw, upw, raw)
}

#' @rdname pdftools
Expand Down
4 changes: 3 additions & 1 deletion man/pdftools.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,16 @@ BEGIN_RCPP
END_RCPP
}
// poppler_pdf_text
CharacterVector poppler_pdf_text(RawVector x, std::string opw, std::string upw);
RcppExport SEXP _pdftools_poppler_pdf_text(SEXP xSEXP, SEXP opwSEXP, SEXP upwSEXP) {
CharacterVector poppler_pdf_text(RawVector x, std::string opw, std::string upw, bool raw);
RcppExport SEXP _pdftools_poppler_pdf_text(SEXP xSEXP, SEXP opwSEXP, SEXP upwSEXP, SEXP rawSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< RawVector >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type opw(opwSEXP);
Rcpp::traits::input_parameter< std::string >::type upw(upwSEXP);
rcpp_result_gen = Rcpp::wrap(poppler_pdf_text(x, opw, upw));
Rcpp::traits::input_parameter< bool >::type raw(rawSEXP);
rcpp_result_gen = Rcpp::wrap(poppler_pdf_text(x, opw, upw, raw));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -175,7 +176,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_pdftools_get_poppler_config", (DL_FUNC) &_pdftools_get_poppler_config, 0},
{"_pdftools_poppler_pdf_info", (DL_FUNC) &_pdftools_poppler_pdf_info, 3},
{"_pdftools_poppler_pdf_data", (DL_FUNC) &_pdftools_poppler_pdf_data, 4},
{"_pdftools_poppler_pdf_text", (DL_FUNC) &_pdftools_poppler_pdf_text, 3},
{"_pdftools_poppler_pdf_text", (DL_FUNC) &_pdftools_poppler_pdf_text, 4},
{"_pdftools_poppler_pdf_pagesize", (DL_FUNC) &_pdftools_poppler_pdf_pagesize, 3},
{"_pdftools_poppler_pdf_fonts", (DL_FUNC) &_pdftools_poppler_pdf_fonts, 3},
{"_pdftools_poppler_pdf_files", (DL_FUNC) &_pdftools_poppler_pdf_files, 3},
Expand Down
4 changes: 2 additions & 2 deletions src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,13 +260,13 @@ List poppler_pdf_data (RawVector x, bool get_font_info, std::string opw, std::st
}

// [[Rcpp::export]]
CharacterVector poppler_pdf_text (RawVector x, std::string opw, std::string upw) {
CharacterVector poppler_pdf_text (RawVector x, std::string opw, std::string upw, bool raw = false) {
std::unique_ptr<poppler::document> doc(read_raw_pdf(x, opw, upw));
CharacterVector out(doc->pages());
for(int i = 0; i < doc->pages(); i++){
std::unique_ptr<poppler::page> p(doc->create_page(i));
if(!p) continue; //missing page
page::text_layout_enum show_text_layout = page::physical_layout;
page::text_layout_enum show_text_layout = raw ? page::raw_order_layout : page::physical_layout;

/* media_box includes text in margins: https://github.com/ropensci/pdftools/issues/67 */
rectf target(p->page_rect(media_box));
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-reading.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ test_that("reading password protected pdf", {

# Get text with password
expect_equal(4, length(pdf_text("pdf-example-password.original.pdf", upw = "test")))
expect_equal(4, length(pdf_text("pdf-example-password.original.pdf", upw = "test", raw = TRUE)))
expect_false(pdf_info("pdf-example-password.original.pdf", upw = "test")$locked)

# Reading 'encrypted' file
Expand Down
Loading