diff --git a/R/transpose_list.R b/R/transpose_list.R index f5501a2..416ffaf 100644 --- a/R/transpose_list.R +++ b/R/transpose_list.R @@ -1,4 +1,12 @@ #' @useDynLib jsonlite C_transpose_list transpose_list <- function(x, names) { - .Call(C_transpose_list, x, names) + # Sort names before entering C, allowing for a binary search + LC_COLLATE <- "LC_COLLATE" + collate_before <- Sys.getlocale(LC_COLLATE) + on.exit(Sys.setlocale(LC_COLLATE, collate_before)) + Sys.setlocale(LC_COLLATE, "C") + sorted_names <- sort(names) + + transposed <- .Call(C_transpose_list, x, sorted_names) + transposed[match(names, sorted_names)] } diff --git a/src/transpose_list.c b/src/transpose_list.c index 9607a38..677af8a 100644 --- a/src/transpose_list.c +++ b/src/transpose_list.c @@ -1,26 +1,53 @@ #include #include +// names is assumed to be sorted, to make names matching faster +// by using a binary search SEXP C_transpose_list(SEXP x, SEXP names) { size_t ncol = Rf_length(names); size_t nrow = Rf_length(x); SEXP out = PROTECT(allocVector(VECSXP, ncol)); + + // Allocate output for(size_t i = 0; i < ncol; i++){ - const char * targetname = CHAR(STRING_ELT(names, i)); SEXP col = PROTECT(allocVector(VECSXP, nrow)); - for(size_t j = 0; j < nrow; j++){ - //search for 'targetname' in each record j - SEXP list = VECTOR_ELT(x, j); - SEXP listnames = getAttrib(list, R_NamesSymbol); - for(size_t k = 0; k < Rf_length(listnames); k++){ - if(!strcmp(CHAR(STRING_ELT(listnames, k)), targetname)){ + SET_VECTOR_ELT(out, i, col); + UNPROTECT(1); + } + + // Find and save all elements in their transposed place + for(size_t j = 0; j < nrow; j++){ + SEXP list = VECTOR_ELT(x, j); + SEXP listnames = getAttrib(list, R_NamesSymbol); + size_t listlength = Rf_length(listnames); + + for(size_t k = 0; k < listlength; k++){ + const char * listname = CHAR(STRING_ELT(listnames, k)); + + // Binary search for a name match + size_t low = 0; + size_t high = ncol - 1; + size_t mid; + while(low <= high){ + mid = (low + high) / 2; + const char * targetname = CHAR(STRING_ELT(names, mid)); + + int strcmp_result = strcmp(listname, targetname); + if(strcmp_result == 0){ + // Match! + SEXP col = VECTOR_ELT(out, mid); SET_VECTOR_ELT(col, j, VECTOR_ELT(list, k)); break; + } else if (strcmp_result > 0){ + low = mid + 1; + } else { + if (mid == 0) { + break; + } + high = mid - 1; } } } - SET_VECTOR_ELT(out, i, col); - UNPROTECT(1); } //setAttrib(out, R_NamesSymbol, names); UNPROTECT(1); diff --git a/tests/testthat/test-simplifyDataFrame.R b/tests/testthat/test-simplifyDataFrame.R new file mode 100644 index 0000000..c8d07af --- /dev/null +++ b/tests/testthat/test-simplifyDataFrame.R @@ -0,0 +1,33 @@ +context("simplifyDataFrame") + +test_that("simplifyDataFrame() works", { + source <- list( + list(a = 11, b = 12), + list(d = 24), + list(a = 31, c = 33) + ) + + actual <- simplifyDataFrame(source, flatten = TRUE) + + # Check that column order is preserved as discovered in the data + expect_equal(colnames(actual), c("a", "b", "d", "c")) + + expect_row_equals <- function(number, expected) { + expect_equal( + as.numeric(actual[number, ]), + expected + ) + } + # a b d c + expect_row_equals(1, c(11, 12, NA, NA)) + expect_row_equals(2, c(NA, NA, 24, NA)) + expect_row_equals(3, c(31, NA, NA, 33)) +}) + +test_that("transpose_list() does not change locale", { + locale_before <- Sys.getlocale() + transpose_list(list(a = 1), c("a")) + locale_after <- Sys.getlocale() + + expect_equal(locale_before, locale_after) +})