Open
Description
Hi, there's a bug in the dev version. The utf8 encoding doesn't get preserved if the first string is ASCII. See the reproducible example below, thanks.
library(fst)
library(data.table)
dt1 <- data.table(
a = enc2utf8(c("english", "中文"))
)
dt2 <- data.table(
a = enc2utf8(c("中文", "english"))
)
file1 <- tempfile(fileext = ".fst")
file2 <- tempfile(fileext = ".fst")
write_fst(dt1, file1)
write_fst(dt2, file2)
res1 <- read_fst(file1)
res2 <- read_fst(file2)
print(res1)
#> a
#> 1 english
#> 2 涓枃
print(res2)
#> a
#> 1 中文
#> 2 english
Encoding(res1$a)
#> [1] "unknown" "unknown"
Encoding(res2$a)
#> [1] "UTF-8" "unknown"
Encoding(res1$a) <- "UTF-8"
print(res1)
#> a
#> 1 english
#> 2 中文
Created on 2018-05-22 by the reprex package (v0.2.0).
Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#> setting value
#> version R version 3.4.4 (2018-03-15)
#> os Windows 7 x64 SP 1
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate Chinese (Simplified)_People's Republic of China.936
#> tz Asia/Taipei
#> date 2018-05-22
#>
#> ─ Packages ──────────────────────────────────────────────────────────────
#> package * version date source
#> assertthat 0.2.0 2017-04-11 CRAN (R 3.4.4)
#> backports 1.1.2 2017-12-13 CRAN (R 3.4.3)
#> cli 1.0.0 2017-11-05 CRAN (R 3.4.4)
#> clisymbols 1.2.0 2017-05-21 CRAN (R 3.4.4)
#> crayon 1.3.4 2017-09-16 CRAN (R 3.4.4)
#> data.table * 1.11.0 2018-05-01 CRAN (R 3.4.4)
#> desc 1.2.0 2018-05-01 CRAN (R 3.4.4)
#> devtools 1.13.5.9000 2018-05-10 local
#> digest 0.6.15 2018-01-28 CRAN (R 3.4.3)
#> evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
#> fst * 0.8.6 2018-05-22 Github (fstpackage/fst@6eeb3e3)
#> htmltools 0.3.6.9000 2018-05-07 local
#> knitr 1.20 2018-02-20 CRAN (R 3.4.4)
#> magrittr 1.5 2014-11-22 CRAN (R 3.4.4)
#> memoise 1.1.0 2017-04-21 CRAN (R 3.4.4)
#> pkgbuild 0.0.0.9000 2017-12-06 Github (r-lib/pkgbuild@ce7f6d1)
#> pkgload 0.0.0.9000 2017-12-06 Github (r-lib/pkgload@70eaef8)
#> R6 2.2.2 2017-06-17 CRAN (R 3.4.4)
#> Rcpp 0.12.16 2018-03-13 CRAN (R 3.4.4)
#> rlang 0.2.0 2018-02-20 CRAN (R 3.4.3)
#> rmarkdown 1.9 2018-03-01 CRAN (R 3.4.4)
#> rprojroot 1.3-2 2018-01-03 CRAN (R 3.4.3)
#> sessioninfo 1.0.1.9000 2017-12-06 Github (r-lib/sessioninfo@c871d01)
#> stringi 1.1.7 2018-03-12 CRAN (R 3.4.4)
#> stringr 1.3.0 2018-02-19 CRAN (R 3.4.3)
#> testthat 2.0.0 2017-12-13 CRAN (R 3.4.4)
#> usethis 1.3.0 2018-02-24 CRAN (R 3.4.4)
#> withr 2.1.2 2018-03-15 CRAN (R 3.4.4)
#> yaml 2.1.19 2018-05-01 CRAN (R 3.4.4)