fstpackage
diff --git a/‎.Rbuildignore‎
Lines changed: 4 additions & 1 deletion b/‎.Rbuildignore‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.travis.yml‎
Lines changed: 11 additions & 7 deletions b/‎.travis.yml‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 4 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 30 additions & 10 deletions b/‎NEWS.md‎
Lines changed: 30 additions & 10 deletions
diff --git a/‎R/RcppExports.R‎
Lines changed: 4 additions & 0 deletions b/‎R/RcppExports.R‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/fst.R‎
Lines changed: 32 additions & 16 deletions b/‎R/fst.R‎
Lines changed: 32 additions & 16 deletions
@@ -4,8 +4,11 @@
 \.dll$
 \.a$
 \.Rmd$
+LZ4/LICENSE$
 \.md$
+^docs$
 \.png$
 \.yml$
 dataset\.fst$
-res - readme\.fst$
+^res - readme\.fst$
+^_pkgdown\.yml$
@@ -16,3 +16,4 @@
 *.txt
 *.zip
 .Rproj.user
+*.TMP
@@ -11,13 +11,6 @@ os:
   - linux
   - osx
 
-matrix:
-  exclude:
-  - r: release
-    os: osx
-  - r: devel
-    os: osx
-
 r_packages:
   - covr
   - lintr
@@ -26,6 +19,17 @@ r_packages:
   - testthat
   - data.table
 
+matrix:
+  exclude:
+  - r: release
+    os: osx
+  - r: devel
+    os: osx
+
+addons:
+  apt:
+    update: true
+
 after_success:
   - Rscript -e 'library(covr); codecov(quiet = FALSE)'
 
 
@@ -5,8 +5,8 @@ Description: Multithreaded serialization of compressed data frames using the
     'fst' format. The 'fst' format allows for random access of stored data and
     compression with the LZ4 and ZSTD compressors created by Yann Collet. The ZSTD
     compression library is owned by Facebook Inc.
-Version: 0.8.4
-Date: 2018-01-25
+Version: 0.8.6
+Date: 2018-05-15
 Authors@R: c(
     person("Mark", "Klik", email = "[email protected]", role = c("aut", "cre", "cph")),
     person("Yann", "Collet", role = c("ctb", "cph"),
@@ -25,7 +25,8 @@ Suggests:
     bit64,
     data.table,
     lintr,
-    nanotime
+    nanotime,
+    crayon
 License: AGPL-3 | file LICENSE
 Copyright: This package includes sources from the LZ4 library written
     by Yann Collet, sources of the ZSTD library owned by Facebook, Inc.
 
@@ -25,6 +25,8 @@ export(write.fst)
 export(write_fst)
 importFrom(Rcpp,sourceCpp)
 importFrom(parallel,detectCores)
+importFrom(utils,capture.output)
 importFrom(utils,packageVersion)
 importFrom(utils,str)
+importFrom(utils,tail)
 useDynLib(fst, .registration = TRUE)
@@ -1,15 +1,35 @@
 
-**If you are viewing this file on CRAN, please check latest news on GitHub [here](https://github.com/fstpackage/fst/blob/develop/NEWS.md).**
+# fst 0.8.6
 
-### Changes in v0.8.4
+Version 0.8.6 of the `fst` package brings clearer printing of `fst_table` objects. It also includes optimizations for controlling the number of threads used by the package during reads and writes and after a fork has ended. The `LZ4` and `ZSTD` compression libraries are updated to their latest (and fastest) releases. UTF-8 encoded column names are now correctly stored in the `fst` format.
+
+## New features
+
+* More advanced printing generic of the `fst_table` reference object, showing column types, (possible) keys, and the table header and footer data (issue #131, thanks @renkun-ken for reporting and discussions).
+
+* User has more control over the number of threads used by fst. Option 'fst_threads' can now be used to initialize the number of threads when the package is first loaded (issue #132, thanks to @karldw for the pull request).
+
+* Option 'fst_restore_after_fork' can be used to select the threading behaviour after a fork has ended. Like the `data.table` package, `fst` switches back to a single thread when a fork is detected (using OpenMP in a fork can lead to problems). Unlike `data.table`, the `fst` package restores the number of threads to it's previous setting when the fork ends. If this leads to unexpected problems, the user can set the 'fst_restore_after_fork' option to FALSE to disable that.
+    
+## Bugs solved
+
+* Character encoding of column names correctly stored in the `fst` format (issue #144, thanks @shrektan for reporting and discussions).
+
+## Documentation
+
+* Improved accuracy of fst_table documentation regarding random row access (issue #143, thanks @martinblostein for pointed out the unclarity)
+
+* Improved documentation on background threads during `write_fst()` and `read_fst()` (issue #121, thanks @krlmlr for suggestions and discussion)
+
+# fst 0.8.4
 
 The v0.8.4 release brings a `data.frame` interface to the `fst` package. Column and row selection can now be done directly from the `[` operator. In addition, it fixes some issues and prepares the package for the next build toolchain of CRAN.
 
-#### New features
+## New features
 
 * A `data.frame` interface was added to the package. The user can create a reference object to a `fst` file with method `fst`. That reference can be used like a `data.frame` and will automatically make column- and row- selections in the referenced `fst` file.
 
-#### Bugs solved
+## Bugs solved
 
 * Build issues with the dev build of R have been fixed. In particular, `fst` now builds correctly with the Clang 6.0 toolchain which will be released by CRAN shortly (thanks @kevinushey for reporting the problem and CRAN maintainers for the advance warning.
 
@@ -19,14 +39,14 @@ The v0.8.4 release brings a `data.frame` interface to the `fst` package. Column
 
 * An error was fixed where using `fst` as a dependency in another package and building that package in RStudio, crashed RStudio. The problem was that RStudio uses a fork to build or document a package. That fork made `fst` use OpenMP library methods, which leads to crashes on macOS. After the fix, no calls to any OpenMP library method are now made from `fst` when it's run from a forked process (issue #100 and issue #109, thanks to @eipi10, @PeteHaitch, @kevinushey, @thierrygosselin, @xiaodaigh and @jzzcutler for reporting the problem and help fix it).
 
-#### Documentation
+## Documentation
 
 * Documentation for method `write_fst` was improved (issue #123, thanks @krlmlr for reporting and submitting a pull request).
 
 
-### Changes in v0.8.2
+# fst 0.8.2
 
-#### New features
+## New features
 
 * Package `fst` has support for multi-threading using OpenMP. Compression, decompression and disk IO have been largely parallelized for (much) improved performance.
 
@@ -60,7 +80,7 @@ The v0.8.4 release brings a `data.frame` interface to the `fst` package. Column
 
 * The core C++ code with the API to read and write `fst` files, and use compression and hashing now lives in a separate library called [`fstlib`](https://github.com/fstpackage/fstlib). Although not visible to the user, this is a major development allowing `fst` to be implemented for other languages than `R` (with comparable performance).
 
-#### Bugs solved
+## Bugs solved
 
 * Tilde-expansion in `write_fst` not correctly processed. _Thanks @HughParsonage, @PoGibas._
 
@@ -76,11 +96,11 @@ The v0.8.4 release brings a `data.frame` interface to the `fst` package. Column
 
 * Stack imbalance warnings under centain conditions. _Thanks @ryankennedyio_
 
-#### Benchmarks
+## Benchmarks
 
 Thanks to @mattdowle, @st-pasha, @phillc73 for valuable discussions on `fst` benchmarks and how to accurately perform (and present) them.
 
-#### Additional credits
+## Additional credits
 
 * Special thanks to @arunsrinivasan for a lot of valuable discussions on the future direction of the `fst` package, I hope `fst` may continue to benefit from your experience!
 
 
@@ -37,3 +37,7 @@ hasopenmp <- function() {
     .Call(`_fst_hasopenmp`)
 }
 
+restore_after_fork <- function(restore) {
+    invisible(.Call(`_fst_restore_after_fork`, restore))
+}
+
@@ -22,41 +22,44 @@
 
 #' Read and write fst files.
 #'
-#' Read and write data frames from and to a fast-storage (fst) file.
+#' Read and write data frames from and to a fast-storage (`fst`) file.
 #' Allows for compression and (file level) random access of stored data, even for compressed datasets.
-#' When using a \code{data.table} object for \code{x}, the key (if any) is preserved,
+#' Multiple threads are used to obtain high (de-)serialization speeds but all background threads are
+#' re-joined before `write_fst` and `read_fst` return (reads and writes are stable).
+#' When using a `data.table` object for `x`, the key (if any) is preserved,
 #' allowing storage of sorted data.
-#' Methods \code{read_fst} and \code{write_fst} are equivalent to \code{read.fst} and \code{write.fst} (but the
+#' Methods `read_fst` and `write_fst` are equivalent to `read.fst` and `write.fst` (but the
 #' former syntax is preferred).
 #'
 #' @param x a data frame to write to disk
 #' @param path path to fst file
 #' @param compress value in the range 0 to 100, indicating the amount of compression to use.
-#'   Lower values mean larger file sizes.
-#' @param uniform_encoding If TRUE, all character vectors will be assumed to have elements with equal encoding.
+#' Lower values mean larger file sizes. The default compression is set to 50.
+#' @param uniform_encoding If `TRUE`, all character vectors will be assumed to have elements with equal encoding.
 #' The encoding (latin1, UTF8 or native) of the first non-NA element will used as encoding for the whole column.
 #' This will be a correct assumption for most use cases.
-#' If \code{uniform.encoding} is set to FALSE, no such assumption will be made and all elements will be converted
+#' If `uniform.encoding` is set to `FALSE`, no such assumption will be made and all elements will be converted
 #' to the same encoding. The latter is a relatively expensive operation and will reduce write performance for
 #' character columns.
-#' @return \code{read_fst} returns a data frame with the selected columns and rows. \code{read_fst}
-#' invisibly returns \code{x} (so you can use this function in a pipeline).
+#' @return `read_fst` returns a data frame with the selected columns and rows. `read_fst`
+#' invisibly returns `x` (so you can use this function in a pipeline).
 #' @examples
 #' # Sample dataset
 #' x <- data.frame(A = 1:10000, B = sample(c(TRUE, FALSE, NA), 10000, replace = TRUE))
 #'
-#' # Uncompressed
-#' write_fst(x, "dataset.fst")  # filesize: 41 KB
-#' y <- read_fst("dataset.fst") # read uncompressed data
+#' # Default compression
+#' write_fst(x, "dataset.fst")  # filesize: 17 KB
+#' y <- read_fst("dataset.fst") # read fst file
 #'
-#' # Compressed
+#' # Maximum compression
 #' write_fst(x, "dataset.fst", 100)  # fileSize: 4 KB
-#' y <- read_fst("dataset.fst") # read compressed data
+#' y <- read_fst("dataset.fst") # read fst file
 #'
 #' # Random access
 #' y <- read_fst("dataset.fst", "B") # read selection of columns
 #' y <- read_fst("dataset.fst", "A", 100, 200) # read selection of columns and rows
 #' @export
+#' @md
 write_fst <- function(x, path, compress = 50, uniform_encoding = TRUE) {
   if (!is.character(path)) stop("Please specify a correct path.")
 
@@ -156,7 +159,7 @@ print.fstmetadata <- function(x, ...) {
 #'
 #' @export
 read_fst <- function(path, columns = NULL, from = 1, to = NULL, as.data.table = FALSE, old_format = FALSE) {
-  fileName <- normalizePath(path, mustWork = TRUE)
+  fileName <- normalizePath(path, mustWork = FALSE)
 
   if (!is.null(columns)) {
     if (!is.character(columns)) {
@@ -200,8 +203,21 @@ read_fst <- function(path, columns = NULL, from = 1, to = NULL, as.data.table =
     return(res)
   }
 
-  as.data.frame(res$resTable, row.names = NULL, stringsAsFactors = FALSE,
-    optional = TRUE)
+  # use setters from data.table to improve performance
+  if (requireNamespace("data.table")) {
+
+    data.table::setattr(res$resTable, "class", "data.frame")
+    data.table::setattr(res$resTable, "row.names", 1:length(res$resTable[[1]]))
+
+    return(res$resTable)
+  }
+
+  res_table <- res$resTable
+
+  class(res_table) <- "data.frame"
+  attr(res_table, "row.names") <- 1:length(res$resTable[[1]])
+
+  res_table
 }
-Original file line number
+Diff line change
 *.txt
 *.zip
 .Rproj.user
 +*.TMP
Original file line number	Diff line number	Diff line change
`@@ -37,3 +37,7 @@ hasopenmp <- function() {`
`37`	`37`	.Call(`_fst_hasopenmp`)
`38`	`38`	`}`
`39`	`39`
	`40`	`+restore_after_fork <- function(restore) {`
	`41`	+ invisible(.Call(`_fst_restore_after_fork`, restore))
	`42`	`+}`
	`43`	`+`