Skip to content

Use data.table subsetting to avoid copy of large objects #180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(partition_from_parts,data.table)
S3method(partition_from_parts,default)
S3method(repr_geojson,SpatialCollections)
S3method(repr_geojson,SpatialGrid)
S3method(repr_geojson,SpatialGridDataFrame)
Expand Down
56 changes: 46 additions & 10 deletions R/repr_matrix_df.r
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,20 @@ onload_chars <- function() {
chars$times_s <- .char_fallback('\u00D7', 'x')
}

arr_partition <- function(a, rows, cols) {
stopifnot(rows >= 2L, cols >= 2L)
# create sequences of indices to bisect rows and columns
part_r <- partition(nrow(a), rows)
part_c <- partition(ncol(a), cols)
# assign a list of parts that can be coerced to strings
#' Assign a list of parts that can be coerced to strings
#' @noRd
partition_from_parts <- function(a, part_r, part_c) {
UseMethod("partition_from_parts")
}

#' @export
partition_from_parts.default <- function(a, part_r, part_c) {
if (!is.null(part_r) && !is.null(part_c)) {
structure(list(
ul = a[part_r$start, part_c$start], ll = a[part_r$end, part_c$start],
ur = a[part_r$start, part_c$end ], lr = a[part_r$end, part_c$end ]),
ul = a[part_r$start, part_c$start, drop = FALSE],
ll = a[part_r$end , part_c$start, drop = FALSE],
ur = a[part_r$start, part_c$end, drop = FALSE],
lr = a[part_r$end , part_c$end, drop = FALSE]),
omit = 'both')
} else if (!is.null(part_r)) {
structure(list(
Expand All @@ -61,6 +63,40 @@ arr_partition <- function(a, rows, cols) {
}
}

#' @export
partition_from_parts.data.table <- function(a, part_r, part_c) {
if (!is.null(part_r) && !is.null(part_c)) {
structure(list(
ul = a[part_r$start, part_c$start, with = FALSE],
ll = a[part_r$end , part_c$start, with = FALSE],
ur = a[part_r$start, part_c$end, with = FALSE],
lr = a[part_r$end , part_c$end, with = FALSE]),
omit = 'both')
} else if (!is.null(part_r)) {
structure(list(
upper = a[part_r$start, , with = FALSE],
lower = a[part_r$end, , with = FALSE]),
omit = 'rows')
} else if (!is.null(part_c)) {
structure(list(
left = a[, part_c$start, with = FALSE],
right = a[, part_c$end, with = FALSE]),
omit = 'cols')
} else {
structure(list(full = a), omit = 'none')
}
}

arr_partition <- function(a, rows, cols) {
stopifnot(rows >= 2L, cols >= 2L)

# create sequences of indices to bisect rows and columns
part_r <- partition(nrow(a), rows)
part_c <- partition(ncol(a), cols)

partition_from_parts(a, part_r, part_c)
}

# unpack tibble and coerce to data.frame
arr_part_unpack_tbl <- function(tbl) {
tbl_col_format <- function(col, prefix = '') {
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test_repr_array_df.r
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,34 @@ test_that('data.frame with list columns can be displayed', {
expect_identical(repr_html(data.table::as.data.table(df)), sub('data\\.frame','data.table',expected))
}
})

test_that('forced-narrow inputs work', {
withr::local_options(repr.matrix.max.rows = 2L, repr.matrix.max.cols = 2L)
df <- data.frame(a = 1:3, b = 4:6, c = 7:9)
expect_silent(repr_text(df))
expect_identical(
# Scrub non-ASCII characters to make the test platform-agnostic.
gsub("[^a-zA-Z0-9.&;<>= '\"/:\n\t]", "*", repr_html(df)),
"<table class=\"dataframe\">
<caption>A data.frame: 3 * 3</caption>
<thead>
\t<tr><th scope=col>a</th><th scope=col>*</th><th scope=col>c</th></tr>
\t<tr><th scope=col>&lt;int&gt;</th><th scope=col>*</th><th scope=col>&lt;int&gt;</th></tr>
</thead>
<tbody>
\t<tr><td>1</td><td>*</td><td>7</td></tr>
\t<tr><td>*</td><td>*</td><td>*</td></tr>
\t<tr><td>3</td><td>*</td><td>9</td></tr>
</tbody>
</table>
")
})

test_that('data.table and data.frame elision is the same', {
skip_if_not_installed('data.table')
withr::local_options(list(repr.matrix.max.rows = 10L, repr.matrix.max.cols = 10L))
DF <- data.frame(matrix(rnorm(100L*100L), 100L, 100L))
expect_identical(repr_text(DF), repr_text(data.table::as.data.table(DF)))
expect_identical(repr_text(DF[1:10, ]), repr_text(data.table::as.data.table(DF[1:10, ])))
expect_identical(repr_text(DF[1:10, 1:10]), repr_text(data.table::as.data.table(DF[1:10, 1:10])))
})