Skip to content

Commit c8dca53

Browse files
authored
Merge pull request #763 from SebKrantz/development
Development
2 parents ab8a2c3 + 19a5181 commit c8dca53

File tree

5 files changed

+24
-12
lines changed

5 files changed

+24
-12
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: collapse
22
Title: Advanced and Fast Data Transformation
3-
Version: 2.1.1
4-
Date: 2025-04-14
3+
Version: 2.1.1.9000
4+
Date: 2025-04-16
55
Authors@R: c(
66
person("Sebastian", "Krantz", role = c("aut", "cre"),
77
email = "[email protected]",

NEWS.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# collapse 2.1.1.9000
2+
3+
* `na_insert` has new argument `set` to do this by reference.
4+
15
# collapse 2.1.1
26

37
* `alloc(list(1), 2)` now gives `list(1, 1)` instead of `list(list(1), list(1))`, which can still be generated with `alloc(list(1), 2, simplify = FALSE)`. This change also affects `ftransform()`/`fmutate()`, making, e.g., `fmutate(data, y = list(1))` consistent with `dplyr::mutate(data, y = list(1))`. Thanks @MattAFiedler (#753).

R/small_helper.R

+11-4
Original file line numberDiff line numberDiff line change
@@ -445,17 +445,24 @@ na_omit <- function(X, cols = NULL, na.attr = FALSE, prop = 0, ...) {
445445
res
446446
}
447447

448-
na_insert <- function(X, prop = 0.1, value = NA) {
448+
na_insert <- function(X, prop = 0.1, value = NA, set = FALSE) {
449449
if(is.list(X)) {
450450
n <- fnrow(X)
451451
nmiss <- floor(n * prop)
452-
res <- duplAttributes(lapply(unattrib(X), function(y) `[<-`(y, sample.int(n, nmiss), value = value)), X)
452+
if(set) {
453+
lapply(unattrib(X), function(y) scv(y, sample.int(n, nmiss), value, TRUE))
454+
return(invisible(X))
455+
}
456+
res <- duplAttributes(lapply(unattrib(X), function(y) scv(y, sample.int(n, nmiss), value)), X)
453457
return(if(inherits(X, "data.table")) alc(res) else res)
454458
}
455459
if(!is.atomic(X)) stop("X must be an atomic vector, array or data.frame")
456460
l <- length(X)
457-
X[sample.int(l, floor(l * prop))] <- value
458-
X
461+
if(set) {
462+
scv(X, sample.int(l, floor(l * prop)), value, TRUE)
463+
return(invisible(X))
464+
}
465+
return(scv(X, sample.int(l, floor(l * prop)), value))
459466
}
460467

461468
fdapply <- function(X, FUN, ...) duplAttributes(lapply(`attributes<-`(X, NULL), FUN, ...), X)

man/efficient-programming.Rd

+3-3
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ na_focb(x, set = FALSE) # (by reference). These also support lists (NULL/emp
6969
na_omit(X, cols = NULL, # Faster na.omit for matrices and data frames,
7070
na.attr = FALSE, # can use selected columns to check, attach indices,
7171
prop = 0, ...) # and remove cases with a proportion of values missing
72-
na_insert(X, prop = 0.1, # Insert missing values at random
73-
value = NA)
72+
na_insert(X, prop = 0.1, # Insert missing values at random (by reference)
73+
value = NA, set = TRUE)
7474
missing_cases(X, cols=NULL, # The opposite of complete.cases(), faster for DF's.
7575
prop = 0, count = FALSE) # See also kit::panyNA(), kit::pallNA(), kit::pcountNA()
7676
vlengths(X, use.names=TRUE) # Faster lengths() and nchar() (in C, no method dispatch)
@@ -88,7 +88,7 @@ cinv(x) # Choleski (fast) inverse of symmetric PD matrix, e.
8888
%- maybe also 'usage' for other objects documented here.
8989
\arguments{
9090
\item{X, V, R}{a vector, matrix or data frame.}
91-
\item{x, v}{a (atomic) vector or matrix (\code{na_rm} also supports lists).}
91+
\item{x, v}{a (atomic) vector or matrix (\code{na_rm}/\code{locf}/\code{focb} also support lists).}
9292
\item{value}{a single value of any (atomic) vector type. For \code{whichv} it can also be a \code{length(x)} vector.}
9393
\item{invert}{logical. \code{TRUE} considers elements \code{x != value}.}
9494
\item{set}{logical. \code{TRUE} transforms \code{x} by reference.}

src/kit_dup.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,22 @@ SEXP dupVecIndex(SEXP x) {
128128
} break;
129129
case REALSXP: {
130130
const double *restrict px = REAL(x);
131+
// size_t offset;
131132
union uno tpv;
132133
for (int i = 0; i != n; ++i) {
133134
tpv.d = px[i]; // R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN : px[i]);
134135
id = HASH(tpv.u[0] + tpv.u[1], K);
135-
// Double hashing idea: not faster!
136+
// // Double hashing idea: not faster!
136137
// if(h[id]) {
137138
// if(REQUAL(px[h[id]-1], px[i])) {
138139
// pans_i[i] = pans_i[h[id]-1]; // h[id];
139140
// continue;
140141
// }
141-
// offset = (id / M) + 1;
142+
// offset = HASH(tpv.u[0] * tpv.u[1], K) / M + 1;
142143
// // if(offset == 0) offset = 1;
143144
// id += offset;
144145
// id %= M;
146+
// // if(id >= M) id = 0;
145147
// while(h[id]) {
146148
// if(REQUAL(px[h[id]-1], px[i])) {
147149
// pans_i[i] = pans_i[h[id]-1]; // h[id];
@@ -152,7 +154,6 @@ SEXP dupVecIndex(SEXP x) {
152154
// // if(id >= M) id = 0;
153155
// }
154156
// }
155-
//
156157
while(h[id]) {
157158
if(REQUAL(px[h[id]-1], px[i])) {
158159
pans_i[i] = pans_i[h[id]-1]; // h[id];

0 commit comments

Comments
 (0)