Skip to content

Commit

Permalink
Add zstd support to tar() and untar()
Browse files Browse the repository at this point in the history
Other tar-related updates.


git-svn-id: https://svn.r-project.org/R/trunk@87605 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
ripley committed Jan 20, 2025
1 parent e539753 commit 18683f9
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 54 deletions.
9 changes: 8 additions & 1 deletion doc/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@
\item New function \code{use()} to use packages in R scripts with
full control over what gets added to the search path. (Actually
already available since \R 4.4.0.)
\item There is some support for \command{zstd} compression of
tarballs in \code{tar()} and \code{untar()}. (This depends on OS
support of \code{libzstd} or by \command{tar}.)
}
}
Expand Down Expand Up @@ -358,8 +362,11 @@
\item \code{R CMD check} with a true value for environment variable
\env{_R_CHECK_BASHISMS_} checks more thoroughly, including for
\command{bash} scripts and components of
\command{bash} scripts and bashisms in components of
\command{autoconf}-generated \command{configure} scripts.
\item \code{R CMD build} now supports \option{--compression =
zstd} on platforms with sufficient support for \command{zstd}.
}
}
Expand Down
7 changes: 4 additions & 3 deletions src/library/tools/R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ inRbuildignore <- function(files, pkgdir) {
' "no" (default), "qpdf", "gs", "gs+qpdf", "both"',
" --compact-vignettes same as --compact-vignettes=qpdf",
" --compression= type of compression to be used on tarball:",
' "gzip" (default), "none", "bzip2", "xz"',
' "gzip" (default), "none", "bzip2", "xz", "zstd"',
" --md5 add MD5 sums",
" --log log to file 'pkg-00build.log' when processing ",
" the pkgdir with basename 'pkg'",
Expand Down Expand Up @@ -930,7 +930,7 @@ inRbuildignore <- function(files, pkgdir) {
install_dependencies <- "most"
} else if (substr(a, 1, 14) == "--compression=") {
compression <- match.arg(substr(a, 15, 1000),
c("none", "gzip", "bzip2", "xz"))
c("none", "gzip", "bzip2", "xz", "zstd"))
} else if (substr(a, 1, 7) == "--user=") {
user <- substr(a, 8, 64)
} else if (startsWith(a, "-")) {
Expand Down Expand Up @@ -1206,7 +1206,8 @@ inRbuildignore <- function(files, pkgdir) {

## Finalize
ext <- switch(compression,
"none"="", "gzip"= ".gz", "bzip2" = ".bz2", "xz" = ".xz")
"none"="", "gzip"= ".gz", "bzip2" = ".bz2",
"xz" = ".xz", "zstd" = ".zst")
filename <- paste0(pkgname, "_", desc["Version"], ".tar", ext)
filepath <- file.path(startdir, filename)
## NB: ../../../../tests/reg-packages.R relies on this exact format!
Expand Down
22 changes: 16 additions & 6 deletions src/library/utils/R/tar.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# File src/library/utils/R/tar.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2023 The R Core Team
# Copyright (C) 1995-2025 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -50,8 +50,9 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
if (!missing(compressed))
warning("untar(compressed=) is deprecated", call. = FALSE, domain = NA)
if (is.character(compressed)) {
cflag <- switch(match.arg(compressed, c("gzip", "bzip2", "xz")),
"gzip" = "z", "bzip2" = "j", "xz" = "J")
cflag <- switch(match.arg(compressed, c("gzip", "bzip2", "xz", "zstd")),
"gzip" = "z", "bzip2" = "j", "xz" = "J",
"zstd" = "-zstd")
} else if (is.logical(compressed)) {
if (is.na(compressed) && support_old_tars) {
magic <- readBin(tarfile, "raw", n = 6L)
Expand All @@ -60,6 +61,7 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
else if(rawToChar(magic[1:3]) == "BZh") cflag <- "j"
## (https://tukaani.org/xz/xz-file-format.txt)
else if(all(magic[1:6] == c(0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00))) cflag <- "J"
else if(all(magic[1:4] == c(0x28, 0xb5, 0x2f, 0xfd))) cflag <- "-zstd"
} else if (isTRUE(compressed)) cflag <- "z"
} else stop("'compressed' must be logical or character")

Expand All @@ -82,6 +84,12 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
tarfile <- "-"
cflag <- ""
} else stop(sprintf("No %s command found", sQuote("xz")))
if (cflag == "-zstd")
if (nzchar(Sys.which("zstd"))) {
TAR <- paste("zstd -dc", shQuote(tarfile), "|", TAR)
tarfile <- "-"
cflag <- ""
} else stop(sprintf("No %s command found", sQuote("zstd")))
}

if (list) {
Expand Down Expand Up @@ -368,7 +376,7 @@ untar2 <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
}

tar <- function(tarfile, files = NULL,
compression = c("none", "gzip", "bzip2", "xz"),
compression = c("none", "gzip", "bzip2", "xz", "zstd"),
compression_level = 6, tar = Sys.getenv("tar"),
extra_flags = "")
{
Expand All @@ -391,7 +399,8 @@ tar <- function(tarfile, files = NULL,
"none" = "-cf",
"gzip" = "-zcf",
"bzip2" = "-jcf",
"xz" = "-Jcf")
"xz" = "-Jcf",
"zstd" = "--zstd -cf")

if (grepl("darwin", R.version$os)) {
## Precaution for macOS to omit resource forks
Expand Down Expand Up @@ -430,7 +439,8 @@ tar <- function(tarfile, files = NULL,
"none" = file(tarfile, "wb"),
"gzip" = gzfile(tarfile, "wb", compression = compression_level),
"bzip2" = bzfile(tarfile, "wb", compression = compression_level),
"xz" = xzfile(tarfile, "wb", compression = compression_level))
"xz" = xzfile(tarfile, "wb", compression = compression_level),
"zstd" = zstdfile(tarfile, "wb", compression = compression_level))
on.exit(close(con))
} else if(inherits(tarfile, "connection")) con <- tarfile
else stop("'tarfile' must be a character string or a connection")
Expand Down
41 changes: 26 additions & 15 deletions src/library/utils/man/tar.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
}
\usage{
tar(tarfile, files = NULL,
compression = c("none", "gzip", "bzip2", "xz"),
compression = c("none", "gzip", "bzip2", "xz", "zstd"),
compression_level = 6, tar = Sys.getenv("tar"),
extra_flags = "")
}
Expand All @@ -30,7 +30,8 @@ tar(tarfile, files = NULL,
be used (default none). Can be abbreviated.}

\item{compression_level}{integer: the level of compression. Only used
for the internal method.}
for the internal method: see the help for \code{\link{gzfile}} for
possible values.}

\item{tar}{character string: the path to the command to be used. If
the command itself contains spaces it needs to be quoted (e.g., by
Expand Down Expand Up @@ -66,7 +67,7 @@ tar(tarfile, files = NULL,
For GNU \command{tar},
\option{--format=ustar} forces a more portable format. (The default is
set at compilation and will be shown at the end of the output from
\command{tar --help}: for version 1.34 \sQuote{out-of-the-box} it is
\command{tar --help}: for version 1.35 \sQuote{out-of-the-box} it is
\option{--format=gnu}, but the manual says the intention is to change
%% https://www.gnu.org/software/tar/manual/tar.html#Formats
to \option{--format=posix} which is the same as \code{pax} --
Expand All @@ -78,7 +79,7 @@ tar(tarfile, files = NULL,

%% This uses -T, not supported by Solaris nor Heirloom Toolchest.
One issue which can cause an external command to fail is a command
line too long for the system shell: as from \R 3.5.0 this is worked
line too long for the system shell: this is worked
around if the external command is detected to be GNU \command{tar} or
\I{libarchive} \command{tar} (aka \command{bsdtar}).

Expand Down Expand Up @@ -169,25 +170,35 @@ tar(tarfile, files = NULL,
\section{Compression}{
When an external \command{tar} command is used, compressing the tar
archive requires that \command{tar} supports the \option{-z},
\option{-j} or \option{-J} flag, and may require the appropriate
command (\command{gzip}, \command{bzip2} or \command{xz}) to be
available. For GNU \command{tar}, further compression programs can be
specified by e.g.\sspace{}\code{extra_flags = "-I lz4"} or
\code{"--zstd"}, \code{"--lzip"} or \code{"--lzop"} in argument
\code{extra_flags}. Some versions of \command{bsdtar} accept options
such as \option{--zstd}, \option{--lz4}, \option{--lzop} and
\option{--lrzip} or an external compressor \emph{via}
\option{--use-compress-program lz4}: these could be supplied in
\code{extra_flags}.
\option{-j}, \option{-J} or \option{--zstd}flag, and may require the
appropriate command (\command{gzip}, \command{bzip2} \command{xz} or
\command{zstd}) to be available. For GNU \command{tar}, further
compression programs can be specified by
e.g.\sspace{}\code{extra_flags = "-I lz4"} or \code{"--lzip"} or
\code{"--lzop"} in argument \code{extra_flags}. Some versions of
\command{bsdtar} accept options such as \option{--lz4},
\option{--lzop} and \option{--lrzip} or an external compressor
\emph{via} \option{--use-compress-program lz4}: these could be
supplied in \code{extra_flags}.

\I{NetBSD} prior to 8.0 used flag \option{--xz} rather than \option{-J},
so this should be used \emph{via} \code{extra_flags = "--xz"} rather
than \code{compression = "xz"}. The commands from \I{OpenBSD} and the
\I{Heirloom Toolchest} are not documented to support \command{xz}.
\I{Heirloom Toolchest} are not documented to support \command{xz} nor
\command{zstd}.

The \command{tar} program in recent macOS (e.g.\sspace{}15.2) does
support \command{zstd} compression.\emph{via} an
external command, but Apple does not supply one.

The \command{tar} programs in commercial Unixen such as \I{AIX} and
Solaris do not support compression.

GNU \command{tar} added support in version 1.22 for \command{xz}
compression and in version 1.31 for \command{zstd} compression.
\command{bsdtar} added support for \command{xz} in 2019 and for
\command{zstd} in 2020.

Neither the internal or the known external \command{tar} commands
support parallel compression --- but this function can be used to write
an uncompressed tarball which can then be compressed in parallel, for
Expand Down
68 changes: 39 additions & 29 deletions src/library/utils/man/untar.Rd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
% File src/library/utils/man/untar.Rd
% Part of the R package, https://www.R-project.org
% Copyright 2009-2019 R Core Team
% Copyright 2009-2025 R Core Team
% Distributed under GPL 2 or later

\name{untar}
Expand Down Expand Up @@ -40,13 +40,14 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",

\item{compressed}{(Deprecated in favour of auto-detection, used only
for an external \command{tar} command.) Logical or character
string. Values \code{"gzip"}, \code{"bzip2"} and \code{"xz"} select
that form of compression (and may be abbreviated to the first
letter). \code{TRUE} indicates \command{gzip} compression,
\code{FALSE} no known compression, and \code{NA} (the default)
indicates that the type is to be inferred from the file header.

The external command may ignore the selected compression type but
string. Values \code{"gzip"}, \code{"bzip2"}, \code{"xz"} and
\code{"zstd"} select that form of compression (and may be
abbreviated to the first letter). \code{TRUE} indicates
\command{gzip} compression, \code{FALSE} no known compression, and
\code{NA} (the default) indicates that the type is to be inferred
from the file header.

The external command may ignore the selected compression type and
detect a type automagically.
}

Expand Down Expand Up @@ -85,9 +86,9 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
}

\item{tar}{character string: the path to the command to be used or
\code{"internal"}. If the command itself contains spaces it needs
to be quoted -- but \code{tar} can also contain flags separated from
the command by spaces.}
\code{"internal"} or \code{""}. If the command itself contains
spaces it needs to be quoted -- but \code{tar} can also contain
flags separated from the command by spaces.}
}

\details{
Expand All @@ -107,16 +108,24 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
\item{GNU tar:}{Modern GNU \command{tar} versions support
compressed archives and since 1.15 are able to detect the type of
compression automatically: version 1.22 added support for
\command{xz} compression.
\command{xz} compression and version 1.31 for \command{zstd}
compression.

On a Unix-alike, \command{configure} will set environment variable
\env{TAR}, preferring GNU tar if found.}

%% bsdtar had it in FreeBSB 5.3 (2004)
\item{\code{bsdtar}:}{macOS 10.6 and later (and FreeBSD and some
other OSes) have a \command{tar}
from the \I{libarchive} project which detects all three forms
of compression automagically (even if undocumented in macOS).}
other OSes) have a \command{tar} from the \I{libarchive} project
which detects known-to-it forms of compression automagically.
However, this may rely on an external command being available: macOS
has a tar which knows about \code{zstd} compression, but relies
on a \command{zstd} command which it does not supply.

This added support for \command{xz} in 2019 and for \command{zstd}
in 2020 (if the appropriate library or external program is
available).
}

\item{NetBSD:}{It is undocumented if \I{NetBSD}'s \command{tar} can
detect compression automagically: for versions before 8 the flag
Expand All @@ -132,22 +141,23 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",

\item{Heirloom Toolchest:}{This \command{tar} does automagically
detect \command{gzip} and \command{bzip2} compression (undocumented)
but has no support for \command{xz} compression.}
but had no support for \command{xz} nor \command{zstd} compression.}

\item{Older support:}{Environment variable \env{R_GZIPCMD} gives the
command to decompress \command{gzip} files, and
\env{R_BZIPCMD} for \command{bzip2} files. (On Unix-alikes
these are set at installation if found.) \command{xz} is used if
available: if not decompression is expected to fail.}
these are set at installation if found.) An external program called
\command{xz} or \command{zstd} is used if available: if not
decompression is expected to fail.}
}

Arguments \code{compressed}, \code{extras} and \code{verbose} are only
used when an external \command{tar} is used.

Some external \command{tar} commands will detect some of
\command{lrzip}, \command{lzma}, \command{lz4}, \command{lzop} and
\command{zstd} compression in addition to \command{gzip},
\command{bzip2} and \command{xz}. (For some external \command{tar}
\command{lrzip}, \command{lzma}, \command{lz4} and \command{lzop}
compression in addition to \command{gzip}, \command{bzip2},
\command{xz} and \command{zstd}. (For some external \command{tar}
commands, compressed tarfiles can only be read if the appropriate
utility program is available.) For GNU \command{tar}, further
(de)compression programs can be specified by e.g.\sspace{}\code{extras
Expand All @@ -162,14 +172,14 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
linking operation fails (as it may on a FAT file system), a file copy
is tried. Since it uses \code{\link{gzfile}} to read a file it can
handle files compressed by any of the methods that function can
handle: at least \command{compress}, \command{gzip}, \command{bzip2}
and \command{xz} compression, and some types of \command{lzma}
compression. It does not guard against restoring absolute file paths,
as some \command{tar} implementations do. It will create the parent
directories for directories or files in the archive if necessary. It
handles the \I{USTAR}/POSIX, GNU and \command{pax} ways of handling file
paths of more than 100 bytes, and the GNU way of handling link targets
of more than 100 bytes.
handle: at least \command{compress}, \command{gzip}, \command{bzip2},
\command{xz} and \command{zstd} compression, and some types of
\command{lzma} compression. It does not guard against restoring
absolute file paths, as some \command{tar} implementations do. It
will create the parent directories for directories or files in the
archive if necessary. It handles the \I{USTAR}/POSIX, GNU and
\command{pax} ways of handling file paths of more than 100 bytes, and
the GNU way of handling link targets of more than 100 bytes.

You may see warnings from the internal implementation such
as \preformatted{ unsupported entry type 'x'}
Expand Down
47 changes: 47 additions & 0 deletions src/library/utils/tests/tar.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
### tests of compressiin on un(tar)

options(warn = 1)
old <- getwd()

for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
{
setwd(R.home('library'))
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
zz <- file.path(old, z)
message("making ", z)
## zstd support is optional
y <- try(tar(zz, "utils", f))
if(inherits(y, "try-error")) next
print(file.size(zz))
setwd(old)
print(head(untar(zz, list = TRUE, tar = "internal")))
untar(zz, tar = "internal")
}

## Now try external untar
for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
{
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
if (!file.exists(z)) next
message("unpacking ", z)
y <- untar(z)
if(inherits(y, "try-error")) next
print(head(dir("utils"), 5))
}

## and external tar
TAR <- Sys.getenv("TAR", "tar")
for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
{
setwd(R.home('library'))
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
zz <- file.path(old, z)
message("making ", z)
y <- try(tar(zz, "utils", f, tar = TAR))
if(inherits(y, "try-error")) next
print(file.size(zz))
setwd(old)
}

unlink("utils", recursive = TRUE)

0 comments on commit 18683f9

Please sign in to comment.