diff --git a/Cargo.toml b/Cargo.toml index 11c9f18be..575be8b30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ authors = [ license = "MIT" repository = "https://github.com/zip-rs/zip2.git" keywords = ["zip", "archive", "compression"] +categories = ["compression", "filesystem", "parser-implementations"] rust-version = "1.73.0" description = """ Library to support the reading and writing of zip files. @@ -23,7 +24,9 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [workspace.dependencies] -time = { version = "0.3.1", default-features = false } +arbitrary = { version = "1.3.2", features = ["derive"] } +time = { version = "0.3.36", default-features = false } +zip = { path = ".", default-features = false } [dependencies] aes = { version = "0.8", optional = true } @@ -53,7 +56,7 @@ lzma-rs = { version = "0.3", default-features = false, optional = true } crossbeam-utils = "0.8.20" [target.'cfg(fuzzing)'.dependencies] -arbitrary = { version = "1.3.2", features = ["derive"] } +arbitrary.workspace = true [dev-dependencies] bencher = "0.1.5" diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 000000000..e6cd1398b --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,73 @@ +[package] +name = "zip-cli" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[lib] + +[[bin]] +name = "zip-cli" + +[dependencies] +glob = { version = "0.3", optional = true } +regex = { version = "1", optional = true } +json = { version = "0.12", optional = true } + +[dependencies.zip] +path = ".." +default-features = false + +[features] +aes-crypto = ["zip/aes-crypto"] +bzip2 = ["zip/bzip2"] +chrono = ["zip/chrono"] +deflate64 = ["zip/deflate64"] +deflate = ["zip/deflate"] +deflate-flate2 = ["zip/deflate-flate2"] +deflate-zlib = ["zip/deflate-zlib"] +deflate-zlib-ng = ["zip/deflate-zlib-ng"] +deflate-zopfli = ["zip/deflate-zopfli"] +lzma = ["zip/lzma"] +time = ["zip/time"] +xz = ["zip/xz"] +zstd = ["zip/zstd"] + +glob = ["dep:glob"] +rx = ["dep:regex"] +json = ["dep:json"] + +default = [ + "aes-crypto", + "bzip2", + "deflate64", + "deflate", + "lzma", + "time", + "xz", + "zstd", + "glob", + "rx", + "json", +] + + +[profile.release] +strip = true +lto = true +opt-level = 3 +codegen-units = 1 diff --git a/cli/clite/Cargo.toml b/cli/clite/Cargo.toml new file mode 100644 index 000000000..475b3b483 --- /dev/null +++ b/cli/clite/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "zip-clite" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "zip-clite" + +[dependencies.zip-cli] +path = ".." +default-features = false +features = [ + "deflate-flate2", + "deflate-zlib", +] + +[features] +# These are all pure rust crates with no significant dependency tree. +rust-deps = ["zip-cli/glob", "zip-cli/rx", "zip-cli/json"] + +default = [] + +[profile.release] +strip = true +lto = true +opt-level = "s" +codegen-units = 1 diff --git a/cli/clite/src/main.rs b/cli/clite/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/clite/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/cli/src/args.rs b/cli/src/args.rs new file mode 100644 index 000000000..1fd5282d0 --- /dev/null +++ b/cli/src/args.rs @@ -0,0 +1,329 @@ +use std::{collections::VecDeque, ffi::OsString, fmt, sync::OnceLock}; + +#[derive(Debug)] +pub enum ArgParseError { + StdoutMessage(String), + /* FIXME: give these errors much more structure!! */ + StderrMessage(String), +} + +#[derive(Debug)] +pub struct ZipCli { + pub verbose: bool, + pub command: ZipCommand, +} + +#[derive(Debug)] +enum SubcommandName { + Compress, + Info, + Extract, +} + +static PARSED_EXE_NAME: OnceLock = OnceLock::new(); + +impl ZipCli { + const VERSION: &'static str = env!("CARGO_PKG_VERSION"); + const DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION"); + + pub const INTERNAL_ERROR_EXIT_CODE: i32 = 3; + pub const ARGV_PARSE_FAILED_EXIT_CODE: i32 = 2; + pub const NON_FAILURE_EXIT_CODE: i32 = 0; + + pub fn binary_name() -> &'static str { + PARSED_EXE_NAME.get().expect("binary name was not set yet") + } + + fn generate_version_text() -> String { + format!("{} {}\n", Self::binary_name(), Self::VERSION) + } + + fn generate_usage_line() -> String { + format!("Usage: {} [OPTIONS] ", Self::binary_name()) + } + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} + +Commands: + {}{}{} + {}{}{} + {}{}{} + +Options: + -v, --verbose Write information logs to stderr + -h, --help Print help + -V, --version Print version +", + Self::DESCRIPTION, + Self::generate_usage_line(), + compress::Compress::COMMAND_NAME, + compress::Compress::COMMAND_TABS, + compress::Compress::COMMAND_DESCRIPTION, + info::Info::COMMAND_NAME, + info::Info::COMMAND_TABS, + info::Info::COMMAND_DESCRIPTION, + extract::Extract::COMMAND_NAME, + extract::Extract::COMMAND_TABS, + extract::Extract::COMMAND_DESCRIPTION, + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} + +For more information, try '--help'. +", + Self::generate_usage_line() + ) + } + + fn parse_up_to_subcommand_name( + argv: &mut VecDeque, + ) -> Result<(bool, SubcommandName), ArgParseError> { + let mut verbose: bool = false; + let mut subcommand_name: Option = None; + while subcommand_name.is_none() { + match argv.pop_front() { + None => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StderrMessage(help_text)); + } + Some(arg) => match arg.as_encoded_bytes() { + b"-v" | b"--verbose" => verbose = true, + b"-V" | b"--version" => { + let version_text = Self::generate_version_text(); + return Err(ArgParseError::StdoutMessage(version_text)); + } + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"compress" => subcommand_name = Some(SubcommandName::Compress), + b"info" => subcommand_name = Some(SubcommandName::Info), + b"extract" => subcommand_name = Some(SubcommandName::Extract), + arg_bytes => { + let context = if arg_bytes.starts_with(b"-") { + format!("unrecognized global flag {arg:?}") + } else { + format!("unrecognized subcommand name {arg:?}") + }; + let help_text = Self::generate_brief_help_text(&context); + return Err(ArgParseError::StderrMessage(help_text)); + } + }, + } + } + Ok((verbose, subcommand_name.unwrap())) + } + + pub fn parse_argv(argv: impl IntoIterator) -> Result { + let mut argv: VecDeque = argv.into_iter().collect(); + let exe_name: String = argv + .pop_front() + .expect("exe name not on command line") + .into_string() + .expect("exe name not valid unicode"); + PARSED_EXE_NAME + .set(exe_name) + .expect("exe name already written"); + let (verbose, subcommand_name) = Self::parse_up_to_subcommand_name(&mut argv)?; + let command = match subcommand_name { + SubcommandName::Info => ZipCommand::Info(info::Info::parse_argv(argv)?), + SubcommandName::Extract => ZipCommand::Extract(extract::Extract::parse_argv(argv)?), + SubcommandName::Compress => ZipCommand::Compress(compress::Compress::parse_argv(argv)?), + }; + Ok(Self { verbose, command }) + } +} + +#[derive(Debug)] +pub enum ZipCommand { + Compress(compress::Compress), + Info(info::Info), + Extract(extract::Extract), +} + +pub mod resource { + use super::*; + + use crate::schema::{backends::Backend, transformers::WrapperError}; + + use std::{any, error}; + + pub trait ResourceValue: any::Any {} + + pub trait Resource { + /* const ID: &'static str; */ + type Value: ResourceValue + where + Self: Sized; + type Args + where + Self: Sized; + fn declare(args: Self::Args) -> Self + where + Self: Sized; + } + + pub trait ArgvResource: Resource { + type ArgvParseError + where + Self: Sized; + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized; + + /* fn print_help(&self) -> String; */ + + #[cfg(test)] + fn parse_argv_from( + &self, + argv: impl IntoIterator>, + ) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized, + { + let mut argv: VecDeque = argv.into_iter().map(|s| s.into()).collect(); + self.parse_argv(&mut argv) + } + + #[cfg(test)] + fn parse_argv_from_empty(&self) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized, + { + self.parse_argv_from(Vec::::new()) + } + } + + pub trait PositionalArgvResource: ArgvResource {} + + pub trait SchemaResource: Resource { + type B: Backend; + type SchemaParseError; + fn parse_schema<'a>( + &self, + v: ::Val<'a>, + ) -> Result<::Value, Self::SchemaParseError> + where + ::Value: Sized, + Self: Sized; + + fn parse_schema_str<'a>( + &self, + s: ::Str<'a>, + ) -> Result< + ::Value, + WrapperError<::Err<'a>, Self::SchemaParseError>, + > + where + ::Value: Sized, + Self: Sized, + { + let v = ::parse(s).map_err(WrapperError::In)?; + Ok(self.parse_schema(v).map_err(WrapperError::Out)?) + } + } +} + +pub trait CommandFormat: fmt::Debug { + const COMMAND_NAME: &'static str; + const COMMAND_TABS: &'static str; + const COMMAND_DESCRIPTION: &'static str; + + const USAGE_LINE: &'static str; + + fn generate_usage_line() -> String { + format!( + "Usage: {} {} {}", + ZipCli::binary_name(), + Self::COMMAND_NAME, + Self::USAGE_LINE, + ) + } + + fn generate_help() -> String; + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} +{}", + Self::COMMAND_DESCRIPTION, + Self::generate_usage_line(), + Self::generate_help(), + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} +", + Self::generate_usage_line() + ) + } + + fn exit_arg_invalid(context: &str) -> ArgParseError { + let message = Self::generate_brief_help_text(context); + ArgParseError::StderrMessage(message) + } + + fn parse_argv(argv: VecDeque) -> Result + where + Self: Sized; +} + +pub trait ComposedCommand: CommandFormat { + type ResourceArgs; + fn get_resource_args() -> Self::ResourceArgs; + fn from_resource_args( + args: Self::ResourceArgs, + argv: VecDeque, + ) -> Result + where + Self: Sized; + + fn parse_composed_argv(mut argv: VecDeque) -> Result + where + Self: Sized, + { + if let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + _ => { + argv.push_front(arg); + } + } + } + + let spec = Self::get_resource_args(); + Self::from_resource_args(spec, argv) + } +} + +pub mod compress; +pub mod extract; +pub mod info; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs new file mode 100644 index 000000000..2961ebb9b --- /dev/null +++ b/cli/src/args/compress.rs @@ -0,0 +1,364 @@ +use super::{ArgParseError, CommandFormat, ComposedCommand}; + +use zip::{write::SimpleFileOptions, CompressionMethod}; + +use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; + +pub mod resource; +use super::resource::{ArgvResource, Resource}; +use resource::{GlobalFlagsResource, ModSeqResource, OutputFlagsResource}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum CompressionMethodArg { + Stored, + Deflate, /* requires having zip/_deflate-any set to compile */ + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CompressionLevel(pub i64); + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct UnixPermissions(pub u32); + +impl UnixPermissions { + pub fn parse(s: &str) -> Result { + Ok(Self(u32::from_str_radix(s, 8)?)) + } +} + +#[derive(Debug)] +pub enum CompressionArg { + CompressionMethod(CompressionMethodArg), + Level(CompressionLevel), + UnixPermissions(UnixPermissions), + LargeFile(bool), + Name(String), + Dir, + Symlink, + Immediate(OsString), + FilePath(PathBuf), + RecursiveDirPath(PathBuf), +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum OutputType { + Stdout { allow_tty: bool }, + File { path: PathBuf, append: bool }, +} + +impl Default for OutputType { + fn default() -> Self { + Self::Stdout { allow_tty: false } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct GlobalFlags { + pub archive_comment: Option, +} + +impl Default for GlobalFlags { + fn default() -> Self { + Self { + archive_comment: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum EntrySpec { + Dir { + name: String, + }, + Immediate { + name: String, + data: OsString, + symlink_flag: bool, + }, + File { + name: Option, + path: PathBuf, + symlink_flag: bool, + }, + RecDir { + name: Option, + path: PathBuf, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ModificationOperation { + CreateEntry { + options: SimpleFileOptions, + spec: EntrySpec, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ModificationSequence { + pub operations: Vec, +} + +impl Default for ModificationSequence { + fn default() -> Self { + Self { + operations: Vec::new(), + } + } +} + +#[derive(Debug)] +pub struct Compress { + pub output: OutputType, + pub global_flags: GlobalFlags, + pub mod_seq: ModificationSequence, +} + +impl Compress { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; +} + +/* TODO: add support for entry comments! */ +/* TODO: add support for merging/transforming other zips!! */ +impl CommandFormat for Compress { + const COMMAND_NAME: &'static str = "compress"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Generate an archive from data in argument strings or read from the filesystem."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-FLAGS] [GLOBAL-FLAGS] [ATTR|ENTRY-DATA]... [--] [ENTRY-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +Output flags (OUTPUT-FLAGS): Where and how to write the generated zip archive. + +If not specified, output is written to stdout. + +OUTPUT-FLAGS = [--append] --output-file + = --stdout + + -o, --output-file + Output zip file path to write. + + The output file is truncated if it already exists, unless --append is + provided. + + --append + If an output path is provided with -o, open it as an existing zip + archive and append to it. + + If the output path does not already exist, no error is produced, and + a new zip file is created at the given path. + + --stdout + Allow writing output to stdout even if stdout is a tty. + +Global flags (GLOBAL-FLAGS): These flags describe information set for the entire produced archive. + +GLOBAL-FLAGS = --archive-comment + + --archive-comment + If provided, this will set the archive's comment field to the + specified bytes. This does not need to be valid unicode. + +Attributes (ATTR): Settings for entry metadata. + +Attributes may be "sticky" or "non-sticky". Sticky attributes apply to +everything that comes after them, while non-sticky attributes only apply to the +next entry after them. + +ATTR = STICKY + = NON-STICKY + +Sticky attributes (STICKY): Generic metadata. + +These flags apply to everything that comes after them until reset by another +instance of the same attribute. + +STICKY = --compression-method + = --compression-level + = --mode + = --large-file # [true|false] + + -c, --compression-method + Which compression technique to use. + Defaults to deflate if not specified. + + Possible values: + - stored: uncompressed + - deflate: with deflate (default) +{}{}{} + -l, --compression-level + How much compression to perform, from 0..=24. + + The accepted range of values differs for each technique. + TODO: how much??? + + -m, --mode + Unix permissions to apply to the file, in octal (like chmod). + + --large-file # [true|false] + Whether to enable large file support. + This may take up more space for records, but allows files over 32 bits + in length to be written, up to 64 bit sizes. + File arguments over 32 bits in length (either provided explicitly or + encountered when traversing a recursive directory) will have this flag + set automatically, without affecting the sticky value for + later options. + Therefore, this option likely never has to be set explicitly by + the user. + +Non-sticky attributes (NON-STICKY): Metadata for a single entry. + +These flags only apply to the next entry after them, and may not be repeated. + +NON-STICKY = --name + = --symlink + + -n, --name + The name to apply to the entry. This must be UTF-8 encoded. + + -s, --symlink + Make the next entry into a symlink entry. + + A symlink entry may be immediate with -i, or it may copy the target + from an existing symlink with -f. + +Entry data (ENTRY-DATA): Create an entry in the output zip archive. + +ENTRY-DATA = --dir + = --immediate + = --file + = --recursive-dir + + -d, --dir + Create a directory entry. + A name must be provided beforehand with -n. + + -i, --immediate + Write an entry containing the data in the argument + + This data need not be UTF-8 encoded, but will exit early upon + encountering any null bytes. A name must be provided beforehand with + -n. + + -f, --file + Write an entry with the contents of this file path. + A name may be provided beforehand with -n, otherwise the name will be + inferred from relativizing the given path to the working directory. + Note that sockets are currently not supported and will produce an + error. Providing a path to a directory will produce an error. + + If -s was specified beforehand, the path will be read as a symlink, + which will produce an error if the path does not point to a symbolic + link. If -s was not specified beforehand and a symlink path was + provided, then the symbolic link will be interpreted as if it was + a file with the contents of the symlink target, but with its name + corresponding to the symlink path (unless overridden with -n). + + -r, --recursive-dir + Write all the recursive contents of this directory path. + A name may be provided beforehand with -n, which will be used as the + prefix for all recursive contents of this directory. Otherwise, the + name will be inferred from relativizing the given path to the + working directory. + + -s is not allowed before this argument. If a path to a symbolic link + is provided, it will be treated as if it pointed to a directory with + the recursive contents of the target directory, but with its name + corresponding to the symlink path (unless overridden with -n). + Providing a symlink path which points to a file will produce an error. + +Positional entries (ENTRY-PATH): Paths which are converted into entries. + +Any sticky attributes will continue to apply to entries specified via path, +while any non-sticky attributes not matched to an explicit ENTRY-DATA will produce +an error. + +ENTRY-PATH = + + + Write the file or recursive directory contents, relativizing the path. + + If the given path points to a file, then a single file entry will + be written. + If the given path is a symlink, then a single symlink entry will + be written. + If the given path refers to a directory, then the recursive contents + will be written, reproducing files and symlinks. + Socket paths will produce an error. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result + where + Self: Sized, + { + ComposedCommand::parse_composed_argv(argv) + } +} + +impl ComposedCommand for Compress { + type ResourceArgs = (OutputFlagsResource, GlobalFlagsResource, ModSeqResource); + fn get_resource_args() -> Self::ResourceArgs { + ( + OutputFlagsResource::declare(()), + GlobalFlagsResource::declare(()), + ModSeqResource::declare(()), + ) + } + fn from_resource_args( + args: Self::ResourceArgs, + mut argv: VecDeque, + ) -> Result { + let (output, global_flags, mod_seq) = args; + let output = output + .parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; + let global_flags = global_flags + .parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; + let mod_seq = mod_seq + .parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e:?}")))?; + + Ok(Self { + output, + global_flags, + mod_seq, + }) + } +} + +impl crate::driver::ExecuteCommand for Compress { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::compress::execute_compress(err, self) + } +} diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs new file mode 100644 index 000000000..696f78196 --- /dev/null +++ b/cli/src/args/compress/resource.rs @@ -0,0 +1,1068 @@ +use super::*; +use crate::args::resource::*; + +pub struct OutputFlagsResource; + +impl ResourceValue for OutputType {} + +impl Resource for OutputFlagsResource { + /* const ID: &'static str = "OUTPUT-FLAGS"; */ + type Value = OutputType; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } +} + +pub struct GlobalFlagsResource; + +impl ResourceValue for GlobalFlags {} + +impl Resource for GlobalFlagsResource { + /* const ID: &'static str = "GLOBAL-FLAGS"; */ + type Value = GlobalFlags; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } +} + +pub struct ModSeqResource; + +impl ResourceValue for ModificationSequence {} + +impl Resource for ModSeqResource { + /* const ID: &'static str = "MOD-SEQ"; */ + type Value = ModificationSequence; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } +} + +pub mod argv { + use super::*; + + use std::{collections::VecDeque, error, ffi::OsString, fmt, path::PathBuf}; + + #[derive(Debug)] + pub enum OutputTypeError { + ArgWith(&'static str, String), + ArgTwice(&'static str), + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, + } + + impl fmt::Display for OutputTypeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::ArgWith(arg_name, other_entity) => { + write!(f, "{arg_name} is mutually exclusive with {other_entity}") + } + Self::ArgTwice(arg_name) => { + write!(f, "{arg_name} provided twice") + } + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::ValArgTwice { arg, prev, new } => { + write!( + f, + "value provided twice for argument {arg}. prev was: {prev}, new was {new}" + ) + } + } + } + } + + impl error::Error for OutputTypeError {} + + impl ArgvResource for OutputFlagsResource { + /* fn print_help(&self) -> String { */ +/* r#" */ +/* Output flags (OUTPUT-FLAGS): Where and how to write the generated zip archive. */ + +/* If not specified, output is written to stdout. */ + +/* OUTPUT-FLAGS = [--append] --output-file */ +/* = --stdout */ + +/* -o, --output-file */ +/* Output zip file path to write. */ + +/* The output file is truncated if it already exists, unless --append is */ +/* provided. */ + +/* --append */ +/* If an output path is provided with -o, open it as an existing zip */ +/* archive and append to it. */ + +/* If the output path does not already exist, no error is produced, and */ +/* a new zip file is created at the given path. */ + +/* --stdout */ +/* Allow writing output to stdout even if stdout is a tty. */ +/* "# */ +/* } */ + + type ArgvParseError = OutputTypeError; + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(OutputTypeError::ArgWith( + "--stdout", + format!("output file {output_path:?}"), + )); + } + if append_to_output_path { + return Err(OutputTypeError::ArgWith( + "--stdout", + "--append".to_string(), + )); + } + if allow_stdout { + return Err(OutputTypeError::ArgTwice("--stdout")); + } + allow_stdout = true; + } + b"--append" => { + if append_to_output_path { + return Err(OutputTypeError::ArgTwice("--append")); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--append", + "--stdout".to_string(), + )); + } + append_to_output_path = true; + } + b"-o" | b"--output-file" => { + let new_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; + if let Some(prev_path) = output_path.take() { + return Err(OutputTypeError::ValArgTwice { + arg: "-o/--output-file", + prev: format!("{prev_path:?}"), + new: format!("{new_path:?}"), + }); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--stdout", + "-o/--output-file".to_string(), + )); + } + output_path = Some(new_path); + } + _ => { + argv.push_front(arg); + break; + } + } + } + + Ok(if let Some(output_path) = output_path { + OutputType::File { + path: output_path, + append: append_to_output_path, + } + } else { + OutputType::Stdout { + allow_tty: allow_stdout, + } + }) + } + } + + #[derive(Debug)] + pub enum GlobalFlagsError { + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, + } + + impl fmt::Display for GlobalFlagsError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::ValArgTwice { arg, prev, new } => { + write!( + f, + "value provided twice for argument {arg}. prev was: {prev}, new was {new}" + ) + } + } + } + } + + impl error::Error for GlobalFlagsError {} + + impl ArgvResource for GlobalFlagsResource { + type ArgvParseError = GlobalFlagsError; + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { + let mut archive_comment: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--archive-comment" => { + let new_comment = argv + .pop_front() + .ok_or_else(|| GlobalFlagsError::NoValFor("--archive-comment"))?; + if let Some(prev_comment) = archive_comment.take() { + return Err(GlobalFlagsError::ValArgTwice { + arg: "--archive-comment", + prev: format!("{prev_comment:?}"), + new: format!("{new_comment:?}"), + }); + } + archive_comment = Some(new_comment); + } + _ => { + argv.push_front(arg); + break; + } + } + } + + Ok(GlobalFlags { archive_comment }) + } + } + + pub mod compression_args { + use super::*; + use crate::{schema::transformers::WrapperError, CommandError, WrapCommandErr}; + + use zip::{unstable::path_to_string, write::SimpleFileOptions, CompressionMethod}; + + use std::mem; + + #[derive(Debug)] + pub enum ModificationSequenceError { + NoValFor(&'static str), + Unrecognized { + context: &'static str, + value: String, + }, + ValidationFailed { + codec: &'static str, + context: &'static str, + value: String, + }, + } + + impl fmt::Display for ModificationSequenceError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::Unrecognized { context, value } => { + write!(f, "unrecognized {context}: {value}") + } + Self::ValidationFailed { + codec, + context, + value, + } => { + write!(f, "{codec} for {context}: {value}") + } + } + } + } + + impl error::Error for ModificationSequenceError {} + + pub struct CompressionArgs { + pub args: Vec, + pub positional_paths: Vec, + } + + impl CompressionArgs { + fn initial_options() -> SimpleFileOptions { + SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .large_file(false) + } + + fn parse_compression_method( + name: OsString, + ) -> Result { + Ok(match name.as_encoded_bytes() { + b"stored" => CompressionArg::CompressionMethod(CompressionMethodArg::Stored), + b"deflate" => CompressionArg::CompressionMethod(CompressionMethodArg::Deflate), + #[cfg(feature = "deflate64")] + b"deflate64" => { + CompressionArg::CompressionMethod(CompressionMethodArg::Deflate64) + } + #[cfg(feature = "bzip2")] + b"bzip2" => CompressionArg::CompressionMethod(CompressionMethodArg::Bzip2), + #[cfg(feature = "zstd")] + b"zstd" => CompressionArg::CompressionMethod(CompressionMethodArg::Zstd), + _ => { + return Err(ModificationSequenceError::Unrecognized { + context: "compression method", + value: format!("{name:?}"), + }) + } + }) + } + + fn parse_unicode( + context: &'static str, + arg: OsString, + ) -> Result { + arg.into_string() + .map_err(|arg| ModificationSequenceError::ValidationFailed { + codec: "invalid unicode", + context, + value: format!("{arg:?}"), + }) + } + + fn parse_i64( + context: &'static str, + arg: String, + ) -> Result { + arg.parse::() + .map_err(|e| ModificationSequenceError::ValidationFailed { + codec: "failed to parse integer", + context, + value: format!("{e}"), + }) + } + + fn parse_compression_level( + level: OsString, + ) -> Result { + let level = Self::parse_unicode("compression level", level)?; + let level = Self::parse_i64("compression level", level)?; + if (0..=24).contains(&level) { + Ok(CompressionArg::Level(CompressionLevel(level))) + } else { + Err(ModificationSequenceError::ValidationFailed { + codec: "integer was not between 0 and 24", + context: "compression level", + value: format!("{level}"), + }) + } + } + + fn parse_mode(mode: OsString) -> Result { + let mode = Self::parse_unicode("mode", mode)?; + let mode = UnixPermissions::parse(&mode).map_err(|e| { + ModificationSequenceError::ValidationFailed { + codec: "failed to parse octal integer", + context: "compression mode", + value: format!("{e}"), + } + })?; + Ok(CompressionArg::UnixPermissions(mode)) + } + + fn parse_large_file( + large_file: OsString, + ) -> Result { + Ok(match large_file.as_encoded_bytes() { + b"true" => CompressionArg::LargeFile(true), + b"false" => CompressionArg::LargeFile(false), + _ => { + return Err(ModificationSequenceError::Unrecognized { + context: "value for --large-file", + value: format!("{large_file:?}"), + }) + } + }) + } + + pub fn parse_argv( + argv: &mut VecDeque, + ) -> Result { + let mut args: Vec = Vec::new(); + let mut positional_paths: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + let arg = match arg.as_encoded_bytes() { + /* Attributes */ + b"-c" | b"--compression-method" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor( + "-c/--compression-method", + )), + Some(name) => Self::parse_compression_method(name), + }, + b"-l" | b"--compression-level" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor( + "-l/--compression-level", + )), + Some(level) => Self::parse_compression_level(level), + }, + b"-m" | b"--mode" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-m/--mode")), + Some(mode) => Self::parse_mode(mode), + }, + b"--large-file" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("--large-file")), + Some(large_file) => Self::parse_large_file(large_file), + }, + + /* Data */ + b"-n" | b"--name" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-n/--name")), + Some(name) => { + Self::parse_unicode("name", name).map(CompressionArg::Name) + } + }, + b"-s" | b"--symlink" => Ok(CompressionArg::Symlink), + b"-d" | b"--dir" => Ok(CompressionArg::Dir), + b"-i" | b"--immediate" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-i/--immediate")), + Some(data) => Ok(CompressionArg::Immediate(data)), + }, + b"-f" | b"--file" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-f/--file")), + Some(file) => Ok(CompressionArg::FilePath(file.into())), + }, + b"-r" | b"--recursive-dir" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-r/--recursive-dir")), + Some(dir) => Ok(CompressionArg::RecursiveDirPath(dir.into())), + }, + + /* Transition to positional args */ + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + Err(ModificationSequenceError::Unrecognized { + context: "flag", + value: format!("{arg:?}"), + }) + } else { + argv.push_front(arg); + break; + } + } + }?; + args.push(arg); + } + + positional_paths.extend(mem::take(argv).into_iter().map(PathBuf::from)); + + Ok(Self { + args, + positional_paths, + }) + } + + fn interpret_entry_path(path: PathBuf) -> Result { + let file_type = std::fs::symlink_metadata(&path) + .wrap_err_with(|| format!("failed to read metadata from path {path:?}"))? + .file_type(); + Ok(if file_type.is_dir() { + EntrySpec::RecDir { name: None, path } + } else { + EntrySpec::File { + name: None, + path, + symlink_flag: file_type.is_symlink(), + } + }) + } + + pub fn build_mod_seq( + self, + /* mut err: impl Write, */ + ) -> Result { + let Self { + args, + positional_paths, + } = self; + + let mut operations: Vec = Vec::new(); + + let mut options = Self::initial_options(); + + let mut last_name: Option = None; + let mut symlink_flag: bool = false; + + for arg in args.into_iter() { + match arg { + /* attributes: */ + CompressionArg::CompressionMethod(method) => { + let method = match method { + CompressionMethodArg::Stored => CompressionMethod::Stored, + CompressionMethodArg::Deflate => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethodArg::Zstd => CompressionMethod::Zstd, + }; + /* writeln!(err, "setting compression method {method:?}").unwrap(); */ + options = options.compression_method(method); + } + CompressionArg::Level(CompressionLevel(level)) => { + /* writeln!(err, "setting compression level {level:?}").unwrap(); */ + options = options.compression_level(Some(level)); + } + CompressionArg::UnixPermissions(UnixPermissions(mode)) => { + /* writeln!(err, "setting file mode {mode:#o}").unwrap(); */ + options = options.unix_permissions(mode); + } + CompressionArg::LargeFile(large_file) => { + /* writeln!(err, "setting large file flag to {large_file:?}").unwrap(); */ + options = options.large_file(large_file); + } + CompressionArg::Name(name) => { + /* writeln!(err, "setting name of next entry to {name:?}").unwrap(); */ + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "got two names before an entry: {last_name} and {name}" + ))); + } + last_name = Some(name); + } + CompressionArg::Symlink => { + /* writeln!(err, "setting symlink flag for next entry").unwrap(); */ + if symlink_flag { + /* TODO: make this a warning? */ + return Err(CommandError::InvalidArg( + "symlink flag provided twice before entry".to_string(), + )); + } + symlink_flag = true; + } + + /* new operations: */ + CompressionArg::Dir => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + /* writeln!(err, "writing dir entry").unwrap(); */ + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before dir entry".to_string(), + )); + } + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg( + "no name provided before dir entry".to_string(), + ) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::Dir { name }, + }); + } + CompressionArg::Immediate(data) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg(format!( + "no name provided for immediate data {data:?}" + )) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::Immediate { + name, + data, + symlink_flag, + }, + }); + } + CompressionArg::FilePath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::File { + name: last_name, + path, + symlink_flag, + }, + }); + } + CompressionArg::RecursiveDirPath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before recursive dir entry".to_string(), + )); + } + + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::RecDir { + name: last_name, + path, + }, + }); + } + } + } + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag remaining after all entry flags processed".to_string(), + )); + } + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "name {last_name} remaining after all entry flags processed" + ))); + } + + for p in positional_paths.into_iter() { + operations.push(ModificationOperation::CreateEntry { + options, + spec: Self::interpret_entry_path(p)?, + }); + } + Ok(ModificationSequence { operations }) + } + } + + impl ArgvResource for ModSeqResource { + type ArgvParseError = WrapperError; + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { + let compression_args = + CompressionArgs::parse_argv(argv).map_err(WrapperError::In)?; + compression_args.build_mod_seq().map_err(WrapperError::Out) + } + } + + impl PositionalArgvResource for ModSeqResource {} + } + use compression_args::{CompressionArgs, ModificationSequenceError}; + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn parse_output_type() { + let output = OutputFlagsResource::declare(()); + + assert_eq!( + OutputType::default(), + output.parse_argv_from_empty().unwrap() + ); + + assert_eq!( + OutputType::Stdout { allow_tty: true }, + output.parse_argv_from(["--stdout"]).unwrap() + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + output.parse_argv_from(["-o", "asdf"]).unwrap() + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: true + }, + output.parse_argv_from(["--append", "-o", "asdf"]).unwrap() + ); + } + + #[test] + fn parse_global_flags() { + let global_flags = GlobalFlagsResource::declare(()); + + assert_eq!( + GlobalFlags::default(), + global_flags.parse_argv_from_empty().unwrap(), + ); + + assert_eq!( + GlobalFlags { + archive_comment: Some("asdf".into()) + }, + global_flags + .parse_argv_from(["--archive-comment", "asdf"]) + .unwrap() + ); + } + + #[test] + fn parse_mod_seq() { + let mod_seq = ModSeqResource::declare(()); + + assert_eq!( + ModificationSequence::default(), + mod_seq.parse_argv_from_empty().unwrap(), + ); + + assert_eq!( + ModificationSequence { + operations: vec![ModificationOperation::CreateEntry { + options: SimpleFileOptions::default(), + spec: EntrySpec::File { + name: None, + path: "file.txt".into(), + symlink_flag: false + }, + }], + }, + mod_seq.parse_argv_from(["-f", "file.txt"]).unwrap(), + ); + } + } +} + +#[cfg(feature = "json")] +pub mod json_resource { + use super::{ + GlobalFlags, GlobalFlagsResource, ModSeqResource, ModificationSequence, + OutputFlagsResource, OutputType, Resource, + }; + use crate::{ + args::resource::SchemaResource, + schema::backends::{json_backend::JsonBackend, Backend}, + }; + + use std::{error, ffi::OsString, fmt, path::PathBuf}; + + use json::{object::Object as JsonObject, JsonValue}; + + #[derive(Debug)] + pub enum JsonSchemaError { + InvalidType { + val: JsonValue, + valid_types: &'static [&'static str], + context: &'static str, + }, + InvalidObjectKeys { + obj: JsonObject, + expected_keys: &'static [&'static str], + context: &'static str, + }, + } + + impl fmt::Display for JsonSchemaError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::InvalidType { + valid_types, + context, + val, + } => { + assert!(!valid_types.is_empty()); + let types_str: String = valid_types.join(", "); + write!( + f, + "{context} expected types [{types_str}], but received: {val}" + ) + } + Self::InvalidObjectKeys { + obj, + expected_keys, + context, + } => { + assert!(!expected_keys.is_empty()); + let keys_str: String = expected_keys.join(", "); + let obj = JsonValue::Object(obj.clone()); + write!( + f, + "{context} expected object keys [{keys_str}], but object was {obj}" + ) + } + } + } + } + + impl error::Error for JsonSchemaError {} + + impl SchemaResource for OutputFlagsResource { + type B = JsonBackend; + type SchemaParseError = JsonSchemaError; + + fn parse_schema<'a>( + &self, + v: ::Val<'a>, + ) -> Result { + match v { + JsonValue::Null => Ok(OutputType::default()), + /* => {"file": {"path": , "append": false}}} */ + JsonValue::Short(path) => Ok(OutputType::File { + path: path.as_str().into(), + append: false, + }), + JsonValue::String(path) => Ok(OutputType::File { + path: path.into(), + append: false, + }), + /* => {"stdout": {"allow_tty": }} */ + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { allow_tty }), + /* An object--destructure by enum case. */ + JsonValue::Object(o) => { + if let Some(o) = o.get("stdout") { + match o { + JsonValue::Null => Ok(OutputType::Stdout { allow_tty: false }), + /* {"stdout": } => {"stdout": {"allow_tty": }} */ + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { + allow_tty: *allow_tty, + }), + /* {"stdout": {"allow_tty": }} => {"stdout": {"allow_tty": }} */ + JsonValue::Object(o) => { + let allow_tty: bool = if let Some(allow_tty) = o.get("allow_tty") { + match allow_tty { + JsonValue::Boolean(allow_tty) => Ok(*allow_tty), + JsonValue::Null => Ok(false), + _ => Err(JsonSchemaError::InvalidType { + val: allow_tty.clone(), + valid_types: &["boolean", "null"], + context: "the 'allow_tty' field in the 'stdout' case of output flags", + }), + } + } else { + Ok(false) + }?; + Ok(OutputType::Stdout { allow_tty }) + } + _ => Err(JsonSchemaError::InvalidType { + val: o.clone(), + valid_types: &["boolean", "object", "null"], + context: "the 'stdout' enum case of output flags", + }), + } + } else if let Some(o) = o.get("file") { + match o { + /* {"file": } => {"file": {"path": , append: false}} */ + JsonValue::Short(path) => Ok(OutputType::File { + path: path.as_str().into(), + append: false, + }), + JsonValue::String(path) => Ok(OutputType::File { + path: path.into(), + append: false, + }), + /* {"file": {"path": , "append": }} => {"file": {"path": , append: }} */ + JsonValue::Object(o) => { + let path: PathBuf = if let Some(path) = o.get("path") { + match path { + JsonValue::Short(path) => Ok(path.as_str().into()), + JsonValue::String(path) => Ok(path.into()), + _ => Err(JsonSchemaError::InvalidType { + val: path.clone(), + valid_types: &["string"], + context: "the 'path' field in the 'file' case of output flags", + }), + } + } else { + /* This *must* be provided, whereas "append" has a default. */ + Err(JsonSchemaError::InvalidObjectKeys { + obj: o.clone(), + expected_keys: &["path"], + context: "the 'file' enum case of output flags", + }) + }?; + let append: bool = if let Some(append) = o.get("append") { + match append { + JsonValue::Boolean(append) => Ok(*append), + JsonValue::Null => Ok(false), + _ => Err(JsonSchemaError::InvalidType { + val: append.clone(), + valid_types: &["boolean", "null"], + context: + "the 'append' field in 'file' case of output flags", + }), + } + } else { + Ok(false) + }?; + Ok(OutputType::File { path, append }) + } + _ => Err(JsonSchemaError::InvalidType { + val: o.clone(), + valid_types: &["string", "object"], + context: "the 'file' enum case of output flags", + }), + } + } else { + Err(JsonSchemaError::InvalidObjectKeys { + obj: o, + expected_keys: &["stdout", "file"], + context: + "destructuring into 'file' and 'stdout' enum cases of output flags", + }) + } + } + _ => Err(JsonSchemaError::InvalidType { + val: v, + valid_types: &["string", "boolean", "object", "null"], + context: "top-level value for output flags", + }), + } + } + } + + impl SchemaResource for GlobalFlagsResource { + type B = JsonBackend; + type SchemaParseError = JsonSchemaError; + + fn parse_schema<'a>( + &self, + v: ::Val<'a>, + ) -> Result { + match v { + JsonValue::Object(o) => { + let archive_comment: Option = if let Some(archive_comment) = + o.get("archive-comment") + { + match archive_comment { + JsonValue::Short(archive_comment) => { + Ok(Some(archive_comment.as_str().into())) + } + JsonValue::String(archive_comment) => Ok(Some(archive_comment.into())), + JsonValue::Null => Ok(None), + _ => Err(JsonSchemaError::InvalidType { + val: archive_comment.clone(), + valid_types: &["string", "null"], + context: "the 'archive-comment' field in global flags", + }), + } + } else { + Ok(None) + }?; + Ok(GlobalFlags { archive_comment }) + } + JsonValue::Null => Ok(GlobalFlags::default()), + _ => Err(JsonSchemaError::InvalidType { + val: v.clone(), + valid_types: &["object", "null"], + context: "the top-level global flags object", + }), + } + } + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn parse_output_type() { + assert_eq!( + OutputType::Stdout { allow_tty: false }, + OutputType::default() + ); + + let output = OutputFlagsResource::declare(()); + + assert_eq!( + OutputType::Stdout { allow_tty: true }, + output.parse_schema_str("true").unwrap(), + ); + assert_eq!( + OutputType::Stdout { allow_tty: false }, + output.parse_schema_str("false").unwrap(), + ); + assert_eq!( + OutputType::default(), + output.parse_schema_str("null").unwrap(), + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + output.parse_schema_str("\"asdf\"").unwrap(), + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + output.parse_schema_str("{\"file\": \"asdf\"}").unwrap(), + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: true + }, + output + .parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": true}}") + .unwrap(), + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + output + .parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": false}}") + .unwrap(), + ); + } + + #[test] + fn parse_global_flags() { + assert_eq!( + GlobalFlags { + archive_comment: None + }, + GlobalFlags::default(), + ); + + let global_flags = GlobalFlagsResource::declare(()); + + assert_eq!( + GlobalFlags::default(), + global_flags.parse_schema_str("null").unwrap(), + ); + + assert_eq!( + GlobalFlags { + archive_comment: Some("aaaaasdf".into()), + }, + global_flags + .parse_schema_str("{\"archive-comment\": \"aaaaasdf\"}") + .unwrap(), + ); + assert_eq!( + GlobalFlags { + archive_comment: None, + }, + global_flags + .parse_schema_str("{\"archive-comment\": null}") + .unwrap(), + ); + } + } +} diff --git a/cli/src/args/extract.rs b/cli/src/args/extract.rs new file mode 100644 index 000000000..8eb46e7fd --- /dev/null +++ b/cli/src/args/extract.rs @@ -0,0 +1,1735 @@ +use super::{ArgParseError, CommandFormat}; + +use zip::CompressionMethod; + +use std::{collections::VecDeque, ffi::OsString, mem, path::PathBuf}; + +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ContentTransform { + Extract { name: Option }, +} + +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum ComponentSelector { + #[default] + Path, + Basename, + Dirname, + FileExtension, +} + +impl ComponentSelector { + pub fn parse(s: &[u8]) -> Option { + match s { + b"path" => Some(Self::Path), + b"basename" => Some(Self::Basename), + b"dirname" => Some(Self::Dirname), + b"ext" => Some(Self::FileExtension), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum PatternSelectorType { + Glob, + Literal, + Regexp, +} + +impl PatternSelectorType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"glob" => Some(Self::Glob), + b"lit" => Some(Self::Literal), + b"rx" => Some(Self::Regexp), + _ => None, + } + } + + const fn help_description(self) -> &'static str { + match self { + Self::Glob => "glob", + Self::Literal => "literal", + Self::Regexp => "regexp", + } + } + + const fn arg_abbreviation(self) -> &'static str { + match self { + Self::Glob => "glob", + Self::Literal => "lit", + Self::Regexp => "rx", + } + } + + fn generate_match_help_text(self) -> String { + format!( + r#"These flags default to interpreting a argument as a {} string to +match against the entire entry name, which can be explicitly requested as +follows: + + --match=path:{} "#, + self.help_description(), + self.arg_abbreviation(), + ) + } + + pub fn generate_match_default_help_text() -> String { + Self::default_for_match().generate_match_help_text() + } +} + +#[derive(Copy, Clone)] +pub enum PatSelContext { + MatchOnly, + MatchAndTransform, +} + +impl PatSelContext { + #[allow(dead_code)] + const fn first_default(self) -> &'static str { + match self { + Self::MatchOnly => "[DEFAULT] ", + Self::MatchAndTransform => "[DEFAULT for matching] ", + } + } + + #[allow(dead_code)] + const fn second_default(self) -> &'static str { + match self { + Self::MatchOnly => "", + Self::MatchAndTransform => "[DEFAULT for replacement] ", + } + } +} + +#[cfg(all(feature = "glob", feature = "rx"))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = glob {}(interpret as a shell glob) + = lit (interpret as literal string) + = rx {}(interpret as a regular expression) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(all(feature = "glob", not(feature = "rx")))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = glob {}(interpret as a shell glob) + = lit {}(interpret as literal string) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(all(not(feature = "glob"), feature = "rx"))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = lit {}(interpret as literal string) + = rx {}(interpret as a regular expression) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(not(any(feature = "glob", feature = "rx")))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(_ctx: PatSelContext) -> String { + r#"pat-sel = lit [DEFAULT] (interpret as literal string) + = + (apply search modifiers from )"# + .to_string() + } +} + +#[cfg(feature = "glob")] +impl PatternSelectorType { + pub const fn default_for_match() -> Self { + Self::Glob + } + + pub const fn generate_glob_replacement_note(ctx: PatSelContext) -> &'static str { + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => { + "\n*Note:* glob patterns are not supported for replacement, and attempting to use +them with e.g '--transform:glob' will produce an error.\n" + } + } + } +} + +#[cfg(not(feature = "glob"))] +impl PatternSelectorType { + pub const fn default_for_match() -> Self { + Self::Literal + } + + pub const fn generate_glob_replacement_note(_ctx: PatSelContext) -> &'static str { + "" + } +} + +#[cfg(feature = "rx")] +impl PatternSelectorType { + pub const fn default_for_replacement() -> Self { + Self::Regexp + } +} + +#[cfg(not(feature = "rx"))] +impl PatternSelectorType { + pub const fn default_for_replacement() -> Self { + Self::Literal + } +} + +#[derive(Debug)] +pub enum PatternSelectorModifier { + CaseInsensitive, + MultipleMatches, + PrefixAnchored, + SuffixAnchored, +} + +impl PatternSelectorModifier { + pub fn parse(s: &[u8]) -> Option { + match s { + b"i" => Some(Self::CaseInsensitive), + b"g" => Some(Self::MultipleMatches), + b"p" => Some(Self::PrefixAnchored), + b"s" => Some(Self::SuffixAnchored), + _ => None, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct PatternModifierFlags { + pub case_insensitive: bool, + pub multiple_matches: bool, + pub prefix_anchored: bool, + pub suffix_anchored: bool, +} + +#[derive(Debug)] +pub struct PatternSelector { + pub pat_sel: PatternSelectorType, + pub modifiers: PatternModifierFlags, +} + +impl PatternSelector { + pub fn parse(s: &[u8]) -> Option { + match s.iter().position(|c| *c == b':') { + Some(modifiers_ind) => { + let pat_sel_str = &s[..modifiers_ind]; + let modifiers_str = &s[(modifiers_ind + 1)..]; + + let pat_sel = PatternSelectorType::parse(pat_sel_str)?; + + let mut modifiers = PatternModifierFlags::default(); + let mod_els = modifiers_str + .split(|c| *c == b':') + .map(PatternSelectorModifier::parse) + .collect::>>()?; + for m in mod_els.into_iter() { + match m { + PatternSelectorModifier::CaseInsensitive => { + modifiers.case_insensitive = true; + } + PatternSelectorModifier::MultipleMatches => { + modifiers.multiple_matches = true; + } + PatternSelectorModifier::PrefixAnchored => { + modifiers.prefix_anchored = true; + } + PatternSelectorModifier::SuffixAnchored => { + modifiers.suffix_anchored = true; + } + } + } + Some(Self { pat_sel, modifiers }) + } + None => { + let pat_sel = PatternSelectorType::parse(s)?; + Some(Self { + pat_sel, + modifiers: Default::default(), + }) + } + } + } + + pub fn default_for_context(ctx: PatternContext) -> Self { + match ctx { + PatternContext::Match => Self::default_for_match(), + PatternContext::Replacement => Self::default_for_replacement(), + } + } + + pub fn default_for_match() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_match(), + modifiers: PatternModifierFlags::default(), + } + } + + pub fn default_for_replacement() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_replacement(), + modifiers: PatternModifierFlags::default(), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PatternContext { + Match, + Replacement, +} + +pub fn parse_only_pat_sel(s: &[u8], ctx: PatternContext) -> Option { + match s.iter().position(|c| *c == b':') { + Some(pat_sel_ind) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some(pat_sel) + } + None => Some(PatternSelector::default_for_context(ctx)), + } +} + +pub fn parse_comp_and_pat_sel( + s: &[u8], + ctx: PatternContext, +) -> Option<(ComponentSelector, PatternSelector)> { + match ( + s.iter().position(|c| *c == b'='), + s.iter().position(|c| *c == b':'), + ) { + (Some(comp_sel_ind), Some(pat_sel_ind)) => { + if comp_sel_ind >= pat_sel_ind { + return None; + } + let comp_sel_str = &s[(comp_sel_ind + 1)..pat_sel_ind]; + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some((comp_sel, pat_sel)) + } + (Some(comp_sel_ind), None) => { + let comp_sel_str = &s[(comp_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + (None, Some(pat_sel_ind)) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + let comp_sel = ComponentSelector::default(); + Some((comp_sel, pat_sel)) + } + (None, None) => { + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + } +} + +#[derive(Debug)] +pub enum EntryType { + File, + Dir, + Symlink, +} + +impl EntryType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"file" => Some(Self::File), + b"dir" => Some(Self::Dir), + b"symlink" => Some(Self::Symlink), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum NonSpecificCompressionMethodArg { + Any, + Known, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum SpecificCompressionMethodArg { + Stored, + Deflated, + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, + #[cfg(feature = "lzma")] + Lzma, + #[cfg(feature = "xz")] + Xz, +} + +impl SpecificCompressionMethodArg { + pub const KNOWN_COMPRESSION_METHODS: &[CompressionMethod] = &[ + CompressionMethod::Stored, + CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + CompressionMethod::Lzma, + #[cfg(feature = "xz")] + CompressionMethod::Xz, + ]; + + pub fn translate_to_zip(self) -> CompressionMethod { + match self { + Self::Stored => CompressionMethod::Stored, + Self::Deflated => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + Self::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + Self::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + Self::Zstd => CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + Self::Lzma => CompressionMethod::Lzma, + #[cfg(feature = "xz")] + Self::Xz => CompressionMethod::Xz, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum CompressionMethodArg { + NonSpecific(NonSpecificCompressionMethodArg), + Specific(SpecificCompressionMethodArg), +} + +impl CompressionMethodArg { + pub fn parse(s: &[u8]) -> Option { + match s { + b"any" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Any)), + b"known" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Known)), + b"stored" => Some(Self::Specific(SpecificCompressionMethodArg::Stored)), + b"deflated" => Some(Self::Specific(SpecificCompressionMethodArg::Deflated)), + #[cfg(feature = "deflate64")] + b"deflate64" => Some(Self::Specific(SpecificCompressionMethodArg::Deflate64)), + #[cfg(feature = "bzip2")] + b"bzip2" => Some(Self::Specific(SpecificCompressionMethodArg::Bzip2)), + #[cfg(feature = "zstd")] + b"zstd" => Some(Self::Specific(SpecificCompressionMethodArg::Zstd)), + #[cfg(feature = "lzma")] + b"lzma" => Some(Self::Specific(SpecificCompressionMethodArg::Lzma)), + #[cfg(feature = "xz")] + b"xz" => Some(Self::Specific(SpecificCompressionMethodArg::Xz)), + _ => None, + } + } +} + +#[derive(Debug)] +pub enum DepthLimitArg { + Max(u8), + Min(u8), +} + +#[derive(Debug)] +pub enum SizeArg { + Max(u64), + Min(u64), +} + +#[derive(Debug)] +pub struct MatchArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, +} + +#[derive(Debug)] +pub enum TrivialPredicate { + True, + False, +} + +#[derive(Debug)] +pub enum Predicate { + Trivial(TrivialPredicate), + EntryType(EntryType), + CompressionMethod(CompressionMethodArg), + DepthLimit(DepthLimitArg), + Size(SizeArg), + Match(MatchArg), +} + +#[derive(Debug)] +enum ExprOp { + Negation, + And, + Or, +} + +#[derive(Debug)] +enum ExprArg { + PrimitivePredicate(Predicate), + Op(ExprOp), + Subgroup(MatchExpression), +} + +#[derive(Debug, Default)] +struct SingleExprLevel { + expr_args: Vec, +} + +impl SingleExprLevel { + pub fn push_arg(&mut self, arg: ExprArg) { + self.expr_args.push(arg); + } + + fn get_negation(expr_args: &mut VecDeque) -> Result { + let negated_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + Extract::exit_arg_invalid(&format!( + "negation was only expression in list inside match expr (rest: {expr_args:?})" + )) + })? { + ExprArg::Subgroup(match_expr) => { + /* We have a valid match expression, so just negate it without + * wrapping. */ + MatchExpression::Negated(Box::new(match_expr)) + } + ExprArg::PrimitivePredicate(predicate) => { + /* We got a primitive predicate, so just negate it! */ + MatchExpression::Negated(Box::new(MatchExpression::PrimitivePredicate(predicate))) + } + ExprArg::Op(op) => { + /* Negation before any other operator is invalid. */ + return Err(Extract::exit_arg_invalid(&format!( + "negation before operator {op:?} inside match expr is invalid (rest: {expr_args:?})" + ))); + } + }; + Ok(negated_expr) + } + + fn get_non_operator( + expr_args: &mut VecDeque, + ) -> Result { + let next_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + /* We can't fold an empty list. */ + Extract::exit_arg_invalid(&format!( + "empty expression list inside match expr (rest: {expr_args:?})" + )) + })? { + /* This is already an evaluated match expression, so just start with that. */ + ExprArg::Subgroup(match_expr) => match_expr, + ExprArg::PrimitivePredicate(predicate) => { + /* Success! We start with a simple predicate. */ + MatchExpression::PrimitivePredicate(predicate) + } + ExprArg::Op(op) => match op { + /* We started with negation, which means we need to get the next arg to resolve + * it. */ + ExprOp::Negation => Self::get_negation(expr_args)?, + /* Starting with a binary operator is invalid. */ + op @ (ExprOp::And | ExprOp::Or) => { + return Err(Extract::exit_arg_invalid(&format!( + "expression list cannot begin with binary operator {op:?} (rest: {expr_args:?})" + ))); + } + }, + }; + Ok(next_expr) + } + + pub fn fold(self) -> Result { + let Self { expr_args } = self; + let mut expr_args: VecDeque<_> = expr_args.into(); + + /* Get a valid match expression to start our fold with. */ + let mut cur_expr: MatchExpression = Self::get_non_operator(&mut expr_args)?; + + /* Now fold the expression rightwards! */ + while let Some(next_arg) = expr_args.pop_front() { + match next_arg { + /* Implicit AND, wrapping the primitive result into a match. */ + ExprArg::PrimitivePredicate(predicate) => { + let next_expr = MatchExpression::PrimitivePredicate(predicate); + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Implicit AND, without needing to wrap the result. */ + ExprArg::Subgroup(match_expr) => { + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(match_expr), + }; + } + /* Evaluate the operator according to association. */ + ExprArg::Op(op) => match op { + /* Negation applies to the next element, so retrieve it! */ + ExprOp::Negation => { + let next_expr = Self::get_negation(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Explicit AND requires the next element. */ + ExprOp::And => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: true, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* OR requires the next element. */ + ExprOp::Or => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::Or { + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + }, + } + } + + assert!(expr_args.is_empty()); + Ok(cur_expr) + } +} + +#[derive(Debug)] +pub enum MatchExpression { + PrimitivePredicate(Predicate), + Negated(Box), + And { + explicit: bool, + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, + Grouped(Box), +} + +impl MatchExpression { + pub fn parse_argv( + argv: &mut VecDeque, + ) -> Result { + let mut expr_stack: Vec = Vec::new(); + let mut top_exprs = SingleExprLevel::default(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + /* Parse primitive predicates. */ + b"-true" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::True, + ))); + } + b"-false" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::False, + ))); + } + b"-t" | b"--type" => { + let type_arg = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -t/--type"))?; + let entry_type = + EntryType::parse(type_arg.as_encoded_bytes()).ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --type argument: {type_arg:?}")) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::EntryType( + entry_type, + ))); + } + b"--compression-method" => { + let method_arg = argv.pop_front().ok_or_else(|| { + C::exit_arg_invalid("no argument provided for --compression-method") + })?; + let method = CompressionMethodArg::parse(method_arg.as_encoded_bytes()) + .ok_or_else(|| { + C::exit_arg_invalid(&format!( + "invalid --compression-method argument: {method_arg:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::CompressionMethod( + method, + ))); + } + b"--max-depth" => { + let max_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Max(max_depth), + ))); + } + b"--min-depth" => { + let min_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Min(min_depth), + ))); + } + b"--max-size" => { + let max_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Max( + max_size, + )))); + } + b"--min-size" => { + let min_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Min( + min_size, + )))); + } + b"-m" => { + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -m"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for -m: {pattern:?}" + )) + })?; + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(PatternContext::Match); + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + arg_bytes if arg_bytes.starts_with(b"--match") => { + let (comp_sel, pat_sel) = parse_comp_and_pat_sel( + arg_bytes, + PatternContext::Match, + ) + .ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --match argument modifiers: {arg:?}")) + })?; + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --match"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --match: {pattern:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + + /* Parse operators. */ + b"!" | b"-not" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Negation)); + } + b"&" | b"-and" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::And)); + } + b"|" | b"-or" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Or)); + } + + /* Process groups with stack logic! */ + b"(" | b"-open" => { + expr_stack.push(mem::take(&mut top_exprs)); + } + b")" | b"-close" => { + /* Get the unevaluated exprs from the previous nesting level. */ + let prev_level = expr_stack.pop().ok_or_else(|| { + C::exit_arg_invalid("too many close parens inside match expr") + })?; + /* Move the previous nesting level into current, and evaluate the current + * nesting level. */ + let group_expr = mem::replace(&mut top_exprs, prev_level).fold()?; + /* Wrap the completed group in a Grouped. */ + let group_expr = MatchExpression::Grouped(Box::new(group_expr)); + /* Push the completed and evaluated group into the current nesting level. */ + top_exprs.push_arg(ExprArg::Subgroup(group_expr)); + } + + /* Conclude the match expr processing. */ + b"--expr" => { + break; + } + _ => { + return Err(C::exit_arg_invalid(&format!( + "unrecognized match expression component {arg:?}: all match expressions must start and end with a --expr flag" + ))); + } + } + } + + if !expr_stack.is_empty() { + return Err(C::exit_arg_invalid( + "not enough close parens inside match expr", + )); + } + top_exprs.fold() + } +} + +#[derive(Debug)] +pub enum TrivialTransform { + Identity, +} + +#[derive(Debug)] +pub enum BasicTransform { + StripComponents(u8), + AddPrefix(String), +} + +#[derive(Debug)] +pub struct TransformArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, + pub replacement_spec: String, +} + +#[derive(Debug)] +pub enum ComplexTransform { + Transform(TransformArg), +} + +#[derive(Debug)] +pub enum NameTransform { + Trivial(TrivialTransform), + Basic(BasicTransform), + Complex(ComplexTransform), +} + +#[derive(Debug)] +enum ExtractArg { + Match(MatchExpression), + NameTransform(NameTransform), + ContentTransform(ContentTransform), +} + +#[derive(Debug)] +pub struct EntrySpec { + pub match_expr: Option, + pub name_transforms: Vec, + pub content_transform: ContentTransform, +} + +impl EntrySpec { + fn parse_extract_args( + args: impl IntoIterator, + ) -> Result, ArgParseError> { + let mut match_expr: Option = None; + let mut name_transforms: Vec = Vec::new(); + + let mut ret: Vec = Vec::new(); + + for arg in args.into_iter() { + match arg { + ExtractArg::Match(new_expr) => { + if let Some(prev_expr) = match_expr.take() { + return Err(Extract::exit_arg_invalid(&format!( + "more than one match expr was provided for the same entry: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + ExtractArg::NameTransform(n_trans) => { + name_transforms.push(n_trans); + } + ExtractArg::ContentTransform(c_trans) => { + let spec = Self { + match_expr: match_expr.take(), + name_transforms: mem::take(&mut name_transforms), + content_transform: c_trans, + }; + ret.push(spec); + } + } + } + if let Some(match_expr) = match_expr { + return Err(Extract::exit_arg_invalid(&format!( + "match expr {match_expr:?} was provided with no corresponding content \ +transform. add -x/--extract to construct a complete entry spec" + ))); + } + if !name_transforms.is_empty() { + return Err(Extract::exit_arg_invalid(&format!( + "name transforms {name_transforms:?} were provided with no corresponding \ +content transform. add -x/--extract to construct a complete entry spec" + ))); + } + + Ok(ret) + } +} + +#[derive(Debug)] +pub enum OutputCollation { + ConcatenateStdout, + ConcatenateFile { path: PathBuf, append: bool }, + Filesystem { output_dir: PathBuf, mkdir: bool }, +} + +#[derive(Debug)] +pub struct NamedOutput { + pub name: String, + pub output: OutputCollation, +} + +#[derive(Debug)] +pub struct OutputSpecs { + pub default: Option, + pub named: Vec, +} + +impl Default for OutputSpecs { + fn default() -> Self { + Self { + default: Some(OutputCollation::Filesystem { + output_dir: PathBuf::from("."), + mkdir: false, + }), + named: Vec::new(), + } + } +} + +impl OutputSpecs { + pub fn parse_argv(argv: &mut VecDeque) -> Result { + let mut default: Option = None; + let mut named: Vec = Vec::new(); + let mut cur_name: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Extract::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"--name" => { + let name = argv + .pop_front() + .ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --name") + })? + .into_string() + .map_err(|name| { + Extract::exit_arg_invalid(&format!( + "invalid unicode provided for --name: {name:?}" + )) + })?; + if let Some(prev_name) = cur_name.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple names provided for output: {prev_name:?} and {name:?}" + ))); + } + cur_name = Some(name); + } + b"-d" => { + let dir_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -d"))?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-directory") => { + let mkdir = match arg_bytes { + b"--output-directory" => false, + b"--output-directory:mkdir" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-directory: {arg:?}" + ))); + } + }; + let dir_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-directory") + })?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"--stdout" => { + let output = OutputCollation::ConcatenateStdout; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"-f" => { + let file_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -f"))?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-file") => { + let append = match arg_bytes { + b"--output-file" => false, + b"--output-file:append" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-file: {arg:?}" + ))); + } + }; + let file_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-file") + })?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + _ => { + argv.push_front(arg); + break; + } + } + } + if let Some(name) = cur_name { + return Err(Extract::exit_arg_invalid(&format!( + "trailing --name argument provided without output spec: {name:?}" + ))); + } + + Ok(if default.is_none() && named.is_empty() { + Self::default() + } else { + Self { default, named } + }) + } +} + +#[derive(Debug)] +pub struct InputSpec { + pub stdin_stream: bool, + pub zip_paths: Vec, +} + +#[derive(Debug)] +pub struct Extract { + pub output_specs: OutputSpecs, + pub entry_specs: Vec, + pub input_spec: InputSpec, +} + +impl Extract { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; + + #[cfg(feature = "lzma")] + const LZMA_HELP_LINE: &'static str = " - lzma:\twith lzma\n"; + #[cfg(not(feature = "lzma"))] + const LZMA_HELP_LINE: &'static str = ""; + + #[cfg(feature = "xz")] + const XZ_HELP_LINE: &'static str = " - xz:\t\twith xz\n"; + #[cfg(not(feature = "xz"))] + const XZ_HELP_LINE: &'static str = ""; + + pub fn generate_match_expr_help_text() -> String { + format!( + r#" +## Match expressions (match-expr): + +Entry matching logic composes boolean arithmetic expressions ("expr") in terms +of basic "predicates" which test some component of the zip entry. Expressions +can be composed as follows, in order of precedence: + +expr = ( ) (grouping to force precedence) + = ! (negation) + = & (short-circuiting conjunction "and") + = (implicit &) + = | (disjunction "or") + = (evaluate on entry) + +### Operators: +The operators to compose match expressions must be quoted in shell commands +(e.g. as \( or '('), so alternatives are provided which do not require +special quoting: + +Grouping operators: + (, -open + ), -close + +Unary operators: + !, -not + +Binary operators: + |, -or + &, -and + +### Predicates (predicate): +These arguments are interpreted as basic predicates, returning true or false in +response to a specific zip entry. + +Trivial: +These results do not depend on the entry data at all: + + -true Always return true. + -false Always return false. + +If a match expression is not provided, it defaults to the behavior of -true. + +Basic: +These results are dependent on the entry data: + + -t, --type [file|dir|symlink] + Match entries of the given type. + Note that directory entries may have specific mode bits set, or they may just be + zero-length entries whose name ends in '/'. + + --compression-method + Match entries compressed with the given compression technique. + + Possible values: + - any: any compression method at all + - known: any compression method this binary is able to decompress + - stored: uncompressed + - deflated: with deflate +{}{}{}{}{} + Using e.g. '--compression-method known' as a match expression filters + entries to only those which can be successfully decompressed. + + --max-depth + Match entries with at *most* components of their + containing directory. + --min-depth + Match entries with at *least* components of their + containing directory. + + --max-size + Match entries of at *most* in *uncompressed* size. + --min-size + Match entries of at *least* in *uncompressed* size. + + Directory entries are 0 bytes in size, and symlink entries are the + size required to store their target. + + TODO: Abbrevations such as 1k, 1M are not currently supported; the + precise byte number must be provided, parseable as a u64. + + -m, --match[=][:] + Return true for entries whose name matches . + + See section on "Selector syntax" for and for how + the string argument is interpreted into a string matching + predicate against the entry name. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + Self::LZMA_HELP_LINE, + Self::XZ_HELP_LINE, + ) + } + + pub fn generate_pattern_selector_help_text(ctx: PatSelContext) -> String { + format!( + r#" +## Selector syntax: + +The string matching operations of {} expose an interface to +configure various pattern matching techniques on various components of the entry +name string. + +{} + +The entire range of search options is described below: + +### Component selector (comp-sel): +comp-sel = path [DEFAULT] (match full entry) + = basename (match only the final component of entry) + = dirname (match all except final component of entry) + = ext (match only the file extension, if available) + +### Pattern selector (pat-sel): +{} +{} +Also note that glob and regex patterns require building this binary with the +"glob" and "rx" cargo features respectively. Specifying ':glob' or ':rx' without +the requisite feature support will produce an error. If the requisite feature is +not provided, the default is to use literal matching, which is supported in +all cases. + +#### Pattern modifiers (pat-mod): +pat-mod = :i (use case-insensitive matching for the given pattern) +{} = :p (perform left-anchored "prefix" searches) + = :s (perform right-anchored "suffix" searches) + +Pattern modifiers from (pat-mod) can be sequenced, e.g. ':i:p'. If ':p' and ':s' +are provided together, the result is to perform a doubly-anchored match, against +the entire string. For regexp matching with ':rx', ':p' and ':s' are converted +to '^' or '$' anchors in the regexp pattern string. If the pattern string also +contains '^' or '$' as well, no error is produced. + +*Note:* not all pattern modifiers apply everywhere. In particular, {}':p' and ':s' are +incompatible with glob search and will produce an error. +"#, + match ctx { + PatSelContext::MatchOnly => "--match", + PatSelContext::MatchAndTransform => "--match and --transform", + }, + PatternSelectorType::generate_match_default_help_text(), + PatternSelectorType::generate_pat_sel_help_section(ctx), + PatternSelectorType::generate_glob_replacement_note(ctx), + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => + " = :g (use multi-match behavior for string replacements)\n", + }, + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => + "':g' only +applies to string replacement, and using it for a match expression like +'--match:rx:g' will produce an error. Additionally, ", + }, + ) + } + + pub const INPUT_HELP_TEXT: &'static str = r#" +# Input arguments: +Zip file inputs to extract from can be specified by streaming from stdin, or as +at least one path pointing to an existing zip file. Input arguments are always +specified after all output flags and entry specs on the command line. If no +positional argument is provided and --stdin is not present, an error will +be produced. + + --stdin + If this argument is provided, the streaming API will be used to read + entries as they are encountered, instead of filtering them beforehand + as is done with file inputs. This disables some optimizations, but + also avoids waiting for the entire input to buffer to start writing + output, so can be used in a streaming context. + +Positional paths: + ZIP-PATH... + Apply the entry specs to filter and rename entries to extract from all + of the provided zip files. At least one zip path must be provided, and + all provided paths must exist and point to an existing zip file. Pipes + are not supported and will produce an error. + + If --stdin is provided, it will be read in a streaming manner before + reading entries from any positional zip paths. +"#; +} + +impl CommandFormat for Extract { + const COMMAND_NAME: &'static str = "extract"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Decompress and transform matching entries into a stream or directory."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-SPEC]... [ENTRY-SPEC]... [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +# Output flags: +Where and how to collate the extracted entries. + +## Directory extraction: +Extract entries into relative paths of a named directory according to the +entry's name. + + -d, --output-directory[:mkdir] + Output directory path to write extracted entries into. + Paths for extracted entries will be constructed by interpreting entry + names as relative paths to the provided directory. + + If the provided path is not a directory, an error is produced. If the + provided path does not exist, an error is produced, unless :mkdir is + specified, which attempts to create the specified directory along with + any missing parent directories. + + If not provided, entries will be extracted into the current directory + (as if '-d .' had been provided). + +## Pipe decompression: +Concatenate decompressed entry data into a pipe or file. Entry names are +effectively ignored. This disables some optimizations that are possible when +extracting to the filesystem. + + --stdout + Concatenate all extracted entries and write them in order to stdout + instead of writing anything to the filesystem. + This will write output to stdout even if stdout is a tty. + + -f, --output-file[:append] + Write all entries into the specified file path . + + The output file will be truncated if it already exists, unless :append + is provided. If the specified file path could not be created + (e.g. because the containing directory does not exist, or because the + path exists but does not point to a regular file), an error + is produced. + +## Output teeing: +Entries may be *received* by one or more named outputs. Without any output names specified, the +above flags will produce a single receiver named "default". This is the default receiver used for +the -x/--extract argument unless otherwise specified. However, multiple named receivers may be +specified in sequence, separated by the --name flag: + + --name + Assign the output receiver created from the following output flags to the name . + +Note that the first output in a list need not have a name, as it will be assigned to the name +"default" if not provided. + +'--stdout' Creates a single default receiver decompressing contents to stdout. +'-d ./a' Creates a single default receiver extracting entries into './a'. + +'--name one -d ./a' + Creates a single named receiver "one" extracting into './a'. -x/--extract + must specify the name "one", or an error will be produced. +'--output-directory:mkdir ./a --name two --stdout' + Creates a default receiver extracting into './a', which will be created if + it does not exist, and a named receiver "two" concatenating into stdout. +'--name one -d ./a --name two -f ./b' + Creates a named receiver "one" extracting into './a', and a second named receiver "two" + concatenating into the file './b'. + +# Entry specs: + +After output flags are provided, entry specs are processed in order until an +input argument is reached. Entry specs are modelled after the arguments to +find(1), although "actions" are separated from "matching" expressions with +test clauses instead of being fully recursive like find(1). + +The full specification of an entry spec is provided below +(we will use lowercase names to describe this grammar): + + entry-spec = [--expr match-expr --expr] [name-transform]... content-transform + +1. (match-expr) matches against entries, +2. (name-transform) may transform the entry name string, +3. (content-transform) processes the entry content and writes it + to the output. + +Note that only the "content transform" is required: each entry spec must +conclude with exactly one content transform, but the other arguments may +be omitted and will be set to their default values. + +If no entry specs are provided, by default all entries are decompressed and written to the +output collator without modification. This behavior can be requested explicitly +with the command line: + + --expr -true --expr --identity --extract + +*Note:* if a match-expr is provided, it *must* be surrounded with --expr arguments on both sides! +This is a necessary constraint of the current command line parsing. + +{} + +## Name transforms (name-transform): + +Name transforms modify the entry name before writing the entry to the +output. Unlike match expressions, name transforms do not involve any boolean +logic, and instead are composed linearly, each processing the string produced by +the prior name transform in the series. + +*Note:* name transforms do *not* perform any filtering, so if a string +replacement operation "fails", the entry name is simply returned unchanged. + +Trivial: + --identity Return the entry name string unchanged. + +If no name transforms are provided, it defaults to the behavior of --identity. + +Basic: +These transformers do not perform any complex pattern matching, and instead add +or remove a fixed string from the entry name: + + --strip-components + Remove at most directory components from the entry name. + If is greater than or equal the number of components in the + entry dirname, then the basename of the entry is returned. + --add-prefix + Prefix the entry name with a directory path . + A single separator '/' will be added after before the rest of + the entry name, and any trailing '/' in will be trimmed + before joining. + +Complex: +These transformers perform complex pattern matching and replacement upon the +entry name string: + + --transform[=][:] + Extract the portion of the entry name corresponding to , + search it against corresponding to , and then + replace the result with . + + If == 'rx', then may contain references + to numbered capture groups specified by . Otherwise, + is interpreted as a literal string. + + +## Content transforms (content-transform): + +Content transforms determine how to interpret the content of the zip +entry itself. + +*Note:* when multiple entry specs are provided on the command line, a single +entry may be matched more than once. In this case, the entry's content will be +teed to all the specified outputs. + + -x, --extract[=] + Decompress the entry's contents (if necessary) before writing it to + the named output , or the default output if the receiver name is + not specified. + +Attempting to extract an entry using an unsupported compression method with +-x/--extract will produce an error. In this case, --compression-method can be +used to filter out such entries. + +{} +{}"#, + Self::generate_match_expr_help_text(), + Self::generate_pattern_selector_help_text(PatSelContext::MatchAndTransform), + Self::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut args: Vec = Vec::new(); + let mut stdin_flag: bool = false; + let mut positional_zips: Vec = Vec::new(); + + let output_specs = OutputSpecs::parse_argv(&mut argv)?; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Transition to entry specs */ + /* Try content transforms first, as they are unambiguous sentinel values. */ + b"-x" | b"--extract" => { + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: None, + })); + } + arg_bytes if arg_bytes.starts_with(b"--extract=") => { + let name = arg + .into_string() + .map_err(|arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --extract=: {arg:?}" + )) + })? + .strip_prefix("--extract=") + .unwrap() + .to_string(); + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: Some(name), + })); + } + + /* Try name transforms next, as they only stack linearly and do not require CFG + * parsing of paired delimiters. */ + /* FIXME: none of these name transforms have any effect if --stdout is + * provided. Should we error or warn about this? */ + b"--identity" => { + args.push(ExtractArg::NameTransform(NameTransform::Trivial( + TrivialTransform::Identity, + ))); + } + b"--strip-components" => { + let num: u8 = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --strip-component") + })? + .into_string() + .map_err(|num| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --strip-component: {num:?}" + )) + })? + .parse::() + .map_err(|e| { + Self::exit_arg_invalid(&format!( + "failed to parse --strip-component arg {e:?} as u8" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::StripComponents(num), + ))); + } + b"--add-prefix" => { + let prefix = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --add-prefix") + })? + .into_string() + .map_err(|prefix| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --add-prefix: {prefix:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::AddPrefix(prefix), + ))); + } + arg_bytes if arg_bytes.starts_with(b"--transform") => { + let (comp_sel, pat_sel) = + parse_comp_and_pat_sel(arg_bytes, PatternContext::Replacement).ok_or_else( + || { + Self::exit_arg_invalid(&format!( + "invalid --transform argument modifiers: {arg:?}" + )) + }, + )?; + let pattern = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --transform") + })? + .into_string() + .map_err(|pattern| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {pattern:?}" + )) + })?; + let replacement_spec = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid( + "no argument provided for --transform", + ) + })? + .into_string() + .map_err(|replacement_spec| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {replacement_spec:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Complex( + ComplexTransform::Transform(TransformArg { + comp_sel, + pat_sel, + pattern, + replacement_spec, + }), + ))); + } + + /* Try parsing match specs! */ + b"--expr" => { + let match_expr = MatchExpression::parse_argv::(&mut argv)?; + args.push(ExtractArg::Match(match_expr)); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let entry_specs = EntrySpec::parse_extract_args(args)?; + + Ok(Self { + output_specs, + entry_specs, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Extract { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::extract::execute_extract(err, self) + } +} diff --git a/cli/src/args/info.rs b/cli/src/args/info.rs new file mode 100644 index 000000000..4bfd903c3 --- /dev/null +++ b/cli/src/args/info.rs @@ -0,0 +1,760 @@ +use super::{ + extract::{Extract, InputSpec, MatchExpression, PatSelContext}, + ArgParseError, CommandFormat, +}; + +use std::{collections::VecDeque, ffi::OsString, fmt, path::PathBuf}; + +#[derive(Debug)] +pub struct ModifierParseError(pub String); + +impl fmt::Display for ModifierParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", &self.0) + } +} + +#[derive(Debug)] +pub enum DirectiveParseError { + Modifier(String, ModifierParseError), + Unrecognized(String), +} + +impl fmt::Display for DirectiveParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Modifier(d, e) => { + write!(f, "unrecognized modifier in directive {d:?}: {e}") + } + Self::Unrecognized(d) => { + write!(f, "unrecognized directive: {d:?}") + } + } + } +} + +#[derive(Debug)] +pub enum FormatParseError { + Directive(DirectiveParseError), + Search(String), +} + +impl fmt::Display for FormatParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Directive(e) => { + write!(f, "{e}") + } + Self::Search(e) => { + write!(f, "error in parsing logic: {e}") + } + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ByteSizeFormat { + #[default] + FullDecimal, + HumanAbbreviated, +} + +impl ByteSizeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::FullDecimal), + ":human" => Ok(Self::HumanAbbreviated), + _ => Err(ModifierParseError(format!( + "unrecognized byte size format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OffsetFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl OffsetFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized offset format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryStringFormat { + #[default] + PrintAsString, + EscapeAscii, + WriteBinaryContents, +} + +impl BinaryStringFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":print" => Ok(Self::PrintAsString), + ":escape" => Ok(Self::EscapeAscii), + ":write" => Ok(Self::WriteBinaryContents), + _ => Err(ModifierParseError(format!( + "unrecognized string format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ArchiveOverviewFormatDirective { + ArchiveName, + TotalSize(ByteSizeFormat), + NumEntries, + ArchiveComment(BinaryStringFormat), + FirstEntryStart(OffsetFormat), + CentralDirectoryStart(OffsetFormat), +} + +impl ParseableDirective for ArchiveOverviewFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::ArchiveName), + s if s.starts_with("size") => { + let size_fmt = ByteSizeFormat::parse(&s["size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::TotalSize(size_fmt)) + } + "num" => Ok(Self::NumEntries), + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ArchiveComment(str_fmt)) + } + s if s.starts_with("offset") => { + let offset_fmt = OffsetFormat::parse(&s["offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FirstEntryStart(offset_fmt)) + } + s if s.starts_with("cde-offset") => { + let offset_fmt = OffsetFormat::parse(&s["cde-offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralDirectoryStart(offset_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug)] +pub enum ParseableFormatComponent { + Directive(D), + Escaped(&'static str), + Literal(String), +} + +#[derive(Debug)] +pub struct ParseableFormatSpec { + pub components: Vec>, +} + +pub trait ParseableDirective: Sized { + fn parse_directive(s: &str) -> Result; +} + +impl ParseableFormatSpec +where + D: ParseableDirective, +{ + pub fn parse_format(s: &str) -> Result { + let mut components: Vec> = Vec::new(); + let mut last_source_position: usize = 0; + while let Some(pcnt_pos) = s[last_source_position..] + .find('%') + .map(|p| p + last_source_position) + { + /* Anything in between directives is a literal string. */ + if pcnt_pos > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..pcnt_pos].to_string(), + )); + last_source_position = pcnt_pos; + } + let next_pcnt = s[(pcnt_pos + 1)..] + .find('%') + .map(|p| p + pcnt_pos + 1) + .ok_or_else(|| { + FormatParseError::Search("% directive opened but not closed".to_string()) + })?; + let directive_contents = &s[pcnt_pos..=next_pcnt]; + match directive_contents { + /* An empty directive is a literal percent. */ + "%%" => { + components.push(ParseableFormatComponent::Escaped("%")); + } + /* A single '!' directive is a literal newline. */ + "%!%" => { + components.push(ParseableFormatComponent::Escaped("\n")); + } + "%,%" => { + components.push(ParseableFormatComponent::Escaped("\t")); + } + /* Otherwise, parse the space between percents. */ + d => { + let directive = D::parse_directive(&d[1..(d.len() - 1)]) + .map_err(FormatParseError::Directive)?; + components.push(ParseableFormatComponent::Directive(directive)); + } + } + last_source_position += directive_contents.len(); + } + if s.len() > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..].to_string(), + )); + } + Ok(Self { components }) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum UnixModeFormat { + #[default] + Octal, + Pretty, +} + +impl UnixModeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":octal" => Ok(Self::Octal), + ":pretty" => Ok(Self::Pretty), + _ => Err(ModifierParseError(format!( + "unrecognized unix mode format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TimestampFormat { + DateOnly, + TimeOnly, + #[default] + DateAndTime, +} + +impl TimestampFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":date" => Ok(Self::DateOnly), + ":time" => Ok(Self::TimeOnly), + ":date-time" => Ok(Self::DateAndTime), + _ => Err(ModifierParseError(format!( + "unrecognized timestamp format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CompressionMethodFormat { + Abbreviated, + #[default] + Full, +} + +impl CompressionMethodFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized compression method format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryNumericValueFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl BinaryNumericValueFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized binary numeric value format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum FileTypeFormat { + Abbreviated, + #[default] + Full, +} + +impl FileTypeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized file type format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum EntryFormatDirective { + Name, + FileType(FileTypeFormat), + Comment(BinaryStringFormat), + LocalHeaderStart(OffsetFormat), + ContentStart(OffsetFormat), + ContentEnd(OffsetFormat), + CentralHeaderStart(OffsetFormat), + CompressedSize(ByteSizeFormat), + UncompressedSize(ByteSizeFormat), + UnixMode(UnixModeFormat), + CompressionMethod(CompressionMethodFormat), + CrcValue(BinaryNumericValueFormat), + Timestamp(TimestampFormat), +} + +impl ParseableDirective for EntryFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::Name), + s if s.starts_with("type") => { + let type_fmt = FileTypeFormat::parse(&s["type".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FileType(type_fmt)) + } + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Comment(str_fmt)) + } + s if s.starts_with("header-start") => { + let offset_fmt = OffsetFormat::parse(&s["header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::LocalHeaderStart(offset_fmt)) + } + s if s.starts_with("content-start") => { + let offset_fmt = OffsetFormat::parse(&s["content-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentStart(offset_fmt)) + } + s if s.starts_with("content-end") => { + let offset_fmt = OffsetFormat::parse(&s["content-end".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentEnd(offset_fmt)) + } + s if s.starts_with("central-header-start") => { + let offset_fmt = OffsetFormat::parse(&s["central-header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralHeaderStart(offset_fmt)) + } + s if s.starts_with("compressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["compressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressedSize(size_fmt)) + } + s if s.starts_with("uncompressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["uncompressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UncompressedSize(size_fmt)) + } + s if s.starts_with("unix-mode") => { + let mode_fmt = UnixModeFormat::parse(&s["unix-mode".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UnixMode(mode_fmt)) + } + s if s.starts_with("compression-method") => { + let method_fmt = CompressionMethodFormat::parse(&s["compression-method".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressionMethod(method_fmt)) + } + s if s.starts_with("crc") => { + let num_fmt = BinaryNumericValueFormat::parse(&s["crc".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CrcValue(num_fmt)) + } + s if s.starts_with("timestamp") => { + let ts_fmt = TimestampFormat::parse(&s["timestamp".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Timestamp(ts_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug, Default)] +pub enum FormatSpec { + #[default] + Compact, + Extended, + Custom { + overview: ParseableFormatSpec, + entry: ParseableFormatSpec, + }, +} + +impl FormatSpec { + pub fn parse_format_strings( + archive_format: String, + entry_format: String, + ) -> Result { + let overview = + ParseableFormatSpec::::parse_format(&archive_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse archive format string {archive_format:?}: {e}" + )) + })?; + let entry = ParseableFormatSpec::::parse_format(&entry_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse entry format string {entry_format:?}: {e}" + )) + })?; + Ok(Self::Custom { overview, entry }) + } +} + +#[derive(Debug)] +pub struct Info { + pub format_spec: FormatSpec, + pub match_expr: Option, + pub input_spec: InputSpec, +} + +impl CommandFormat for Info { + const COMMAND_NAME: &'static str = "info"; + const COMMAND_TABS: &'static str = "\t\t"; + const COMMAND_DESCRIPTION: &'static str = + "Print info about archive contents and individual entries."; + + const USAGE_LINE: &'static str = + "[-h|--help] [--extended|--format ] [--expr MATCH-EXPR --expr] [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +By default, a compact representation of the metadata within the top-level +archive and individual entries is printed to stdout. This format, along with the +"extended" format from --extended, is not stable for processing by external +tools. For stable output, a custom format string should be provided with +--format. + +*Note:* the archive metadata is printed *after* the metadata for each entry, +because zip files store metadata at the end of the file! + +Note that the contents of individual entries are not accessible with this +command, and should instead be extracted with the '{}' subcommand, which can +write entries to stdout or a given file path as well as extracted into an +output directory. + + --extended + Print a verbose description of all top-level archive and individual + entry fields. + + --format + Print a custom description of the top-level archive and individual + entry metadata. + + Both format specs must be provided, but empty strings are + accepted. Explicit trailing newlines must be specified and will not be + inserted automatically. + + Note again that archive metadata is printed after all entries + are formatted. + +# Format specs: +Format specs are literal strings interspersed with directives, which are +surrounded by *paired* '%' characters. This is different from typical %-encoded +format strings which only use a single '%'. A doubled '%%' produces a literal +'%', while '%name%' encodes a directive "name". The directives for archive and +entry format strings are different, but certain directives are parsed with +modifier strings which are shared across both format types. These modifiers are +discussed in the section on . + +## Escape characters: +%% + Prints a literal percent '%'. + +%!% + Prints a single literal newline '\n'. + +%,% + Prints a single literal tab character '\t'. + +## Archive format directives: +This is printed at the bottom of the output, after all entries are formatted. + +%name% + The name of the file provided as input, or '' for stdin. + +%size% + The size of the entire archive. + +%num% + The number of entries in the archive. + +%comment% + The archive comment, if provided (otherwise an empty string). + +%offset% + The offset of the first entry's local header from the start of the + file. This is where the zip file content starts, and arbitrary data may be + present in the space before this point. + +%cde-offset% + The offset of the central directory record from the start of the file. This + is where entry contents end, and after this point is only zip metadata until + the end of the file. + +## Entry format directives: +This is printed for each entry. Note again that no newlines are inserted +automatically, so an explicit trailing newline must be provided to avoid writing +all the output to a single line. + +%name% + The name of the entry in the archive. This is the relative path that the + entry would be extracted to. + +%type% + The type of the entry (file, directory, or symlink). + +%comment% + The entry comment, if provided (otherwise an empty string). + +%header-start% + The offset of the entry's local header, which comes before any + entry contents. + +%content-start% + The offset of the entry's possibly-compressed content, which comes after the + local header. + +%content-end% + The offset of the end of the entry's possibly-compressed content. The next + entry's local header begins immediately after. + +%central-header-start% + The offset of the entry's central directory header, at the end of the + zip file. + +%compressed-size% + The size of the entry's possibly-compressed content as stored in + the archive. + +%uncompressed-size% + The size of the entry's content after decompression, as it would be + after extraction. + +%unix-mode% + The mode bits for the entry, if set. If unset, this is interpreted as + a value of 0. + +%compression-method% + The method used to compress the entry. + +%crc% + The CRC32 value for the entry. + +%timestamp% + The timestamp for the entry. + + Note that zip timestamps only have precision down to 2 seconds. + +## Entry format directives: + +## Modifiers : +byte-size = '' [DEFAULT => decimal] + = ':decimal' (decimal numeric representation) + = ':human' (human-abbreviated size e.g. 1K, 1M) + +offset = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +bin-str = '' [DEFAULT => print] + = ':print' (non-unicode chunks are replaced with + the unicode replacement character '�') + = ':escape' (surround with "" and escape each byte as ascii) + = ':write' (write string to output without checking for unicode) + +unix-mode = '' [DEFAULT => octal] + = ':octal' (octal numeric representation) + = ':pretty' (`ls`-like permissions string) + +timestamp = '' [DEFAULT => date-time] + = ':date' (ISO 8601 string representation of date) + = ':time' (HH:MM:SS string representation of time) + = ':date-time' + (ISO 8601 date then HH:MM time joined by a space) + +compression-method + = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of method) + = ':full' (full name of method) + +bin-num = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +file-type = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of file type) + = ':full' (full name of file type) + + +{} + +{} +{} +"#, + Extract::COMMAND_NAME, + Extract::generate_match_expr_help_text(), + Extract::generate_pattern_selector_help_text(PatSelContext::MatchOnly), + Extract::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut format_spec: Option = None; + let mut match_expr: Option = None; + let mut stdin_flag = false; + let mut positional_zips: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Try parsing format specs. */ + b"--extended" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --extended: {prev_spec:?}" + ))); + } + format_spec = Some(FormatSpec::Extended); + } + b"--format" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --format: {prev_spec:?}" + ))); + } + let archive_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + let entry_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + format_spec = Some(FormatSpec::parse_format_strings( + archive_format, + entry_format, + )?); + } + + /* Try parsing match specs! */ + b"--expr" => { + let new_expr = MatchExpression::parse_argv::(&mut argv)?; + if let Some(prev_expr) = match_expr.take() { + return Err(Self::exit_arg_invalid(&format!( + "multiple match expressions provided: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let format_spec = format_spec.unwrap_or_default(); + + Ok(Self { + format_spec, + match_expr, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Info { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::info::execute_info(err, self) + } +} diff --git a/cli/src/compress.rs b/cli/src/compress.rs new file mode 100644 index 000000000..27705dd22 --- /dev/null +++ b/cli/src/compress.rs @@ -0,0 +1,400 @@ +use std::{ + ffi::OsString, + fs, + io::{self, Cursor, IsTerminal, Seek, Write}, + mem, + path::{Path, PathBuf}, +}; + +use zip::{ + unstable::path_to_string, + write::{SimpleFileOptions, ZipWriter}, + CompressionMethod, ZIP64_BYTES_THR, +}; + +use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; + +impl EntrySpec { + pub fn create_entry( + self, + writer: &mut ZipWriter, + options: SimpleFileOptions, + mut err: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::Dir { name } => writer + .add_directory(&name, options) + .wrap_err_with(|| format!("failed to create dir entry {name}")), + Self::Immediate { + name, + data, + symlink_flag, + } => { + if data.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "length of immediate data argument is {}; use a file for inputs over {} bytes", + data.len(), + ZIP64_BYTES_THR + ))); + }; + if symlink_flag { + /* This is a symlink entry. */ + let target = data.into_string().map_err(|target| { + CommandError::InvalidArg(format!( + "failed to decode immediate symlink target {target:?}" + )) + })?; + writeln!( + err, + "writing immediate symlink entry with name {name:?} and target {target:?}" + ) + .unwrap(); + /* TODO: .add_symlink() should support OsString targets! */ + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to created symlink entry {name}->{target}") + }) + } else { + /* This is a file entry. */ + writeln!( + err, + "writing immediate file entry with name {name:?} and data {data:?}" + ) + .unwrap(); + let data = data.into_encoded_bytes(); + writer + .start_file(&name, options) + .wrap_err_with(|| format!("failed to create file entry {name}"))?; + writer.write_all(data.as_ref()).wrap_err_with(|| { + format!( + "failed writing immediate data of length {} to file entry {name}", + data.len() + ) + }) + } + } + Self::File { + name, + path, + symlink_flag, + } => { + let name = name.unwrap_or_else(|| path_to_string(&path).into()); + if symlink_flag { + /* This is a symlink entry. */ + let target: String = + path_to_string(fs::read_link(&path).wrap_err_with(|| { + format!("failed to read symlink from path {}", path.display()) + })?) + .into(); + /* Similarly to immediate data arguments, we're simply not going to support + * symlinks over this length, which should be impossible anyway. */ + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {name} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!(err, "writing symlink entry from path {path:?} with name {name:?} and target {target:?}").unwrap(); + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to create symlink entry for {name}->{target}") + }) + } else { + /* This is a file entry. */ + writeln!( + err, + "writing file entry from path {path:?} with name {name:?}" + ) + .unwrap(); + let mut f = fs::File::open(&path).wrap_err_with(|| { + format!("error opening file for {name} at {}", path.display()) + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + writeln!(err, "entry is {input_len} bytes long").unwrap(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&name, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {name}"))?; + io::copy(&mut f, writer) + .wrap_err_with(|| { + format!("error copying content for {name} from file {f:?}") + }) + .map(|_| ()) + } + } + Self::RecDir { name, path } => { + writeln!( + err, + "writing recursive dir entries for path {path:?} with name {name:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, name, &path, writer, options) + } + } + } +} + +impl ModificationOperation { + pub fn invoke( + self, + writer: &mut ZipWriter, + err: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::CreateEntry { options, spec } => spec.create_entry(writer, options, err), + } + } +} + +impl ModificationSequence { + pub fn invoke( + self, + writer: &mut ZipWriter, + mut err: impl Write, + ) -> Result<(), CommandError> { + let Self { operations } = self; + for op in operations.into_iter() { + op.invoke(writer, &mut err)?; + } + Ok(()) + } +} + +fn enter_recursive_dir_entries( + err: &mut impl Write, + base_rename: Option, + root: &Path, + writer: &mut ZipWriter, + options: SimpleFileOptions, +) -> Result<(), CommandError> { + let base_dirname: String = base_rename + .unwrap_or_else(|| path_to_string(root).into()) + .trim_end_matches('/') + .to_string(); + writeln!( + err, + "writing top-level directory entry for {base_dirname:?}" + ) + .unwrap(); + writer + .add_directory(&base_dirname, options) + .wrap_err_with(|| format!("error adding top-level directory entry {base_dirname}"))?; + + let mut readdir_stack: Vec<(fs::ReadDir, String)> = vec![( + fs::read_dir(root) + .wrap_err_with(|| format!("error reading directory contents for {}", root.display()))?, + base_dirname, + )]; + while let Some((mut readdir, top_component)) = readdir_stack.pop() { + if let Some(dir_entry) = readdir + .next() + .transpose() + .wrap_err("reading next dir entry")? + { + let mut components: Vec<&str> = readdir_stack.iter().map(|(_, s)| s.as_ref()).collect(); + components.push(&top_component); + + let entry_basename: String = dir_entry.file_name().into_string().map_err(|name| { + CommandError::InvalidArg(format!("failed to decode basename {name:?}")) + })?; + components.push(&entry_basename); + let full_path: String = components.join("/"); + readdir_stack.push((readdir, top_component)); + + let file_type = dir_entry.file_type().wrap_err_with(|| { + format!("failed to read file type for dir entry {dir_entry:?}") + })?; + if file_type.is_symlink() { + let target: String = path_to_string( + fs::read_link(dir_entry.path()) + .wrap_err_with(|| format!("failed to read symlink from {dir_entry:?}"))?, + ) + .into(); + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {full_path} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!( + err, + "writing recursive symlink entry with name {full_path:?} and target {target:?}" + ) + .unwrap(); + writer + .add_symlink(&full_path, &target, options) + .wrap_err_with(|| format!("error adding symlink from {full_path}->{target}"))?; + } else if file_type.is_file() { + writeln!(err, "writing recursive file entry with name {full_path:?}").unwrap(); + let mut f = fs::File::open(dir_entry.path()).wrap_err_with(|| { + format!("error opening file for {full_path} from dir entry {dir_entry:?}") + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&full_path, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {full_path}"))?; + io::copy(&mut f, writer).wrap_err_with(|| { + format!("error copying content for {full_path} from file {f:?}") + })?; + } else { + assert!(file_type.is_dir()); + writeln!( + err, + "writing recursive directory entry with name {full_path:?}" + ) + .unwrap(); + writer + .add_directory(&full_path, options) + .wrap_err_with(|| format!("failed to create directory entry {full_path}"))?; + writeln!( + err, + "adding subdirectories depth-first for recursive directory entry {entry_basename:?}" + ).unwrap(); + let new_readdir = fs::read_dir(dir_entry.path()).wrap_err_with(|| { + format!("failed to read recursive directory contents from {dir_entry:?}") + })?; + readdir_stack.push((new_readdir, entry_basename)); + } + } + } + Ok(()) +} + +pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), CommandError> { + let Compress { + output, + global_flags, + mod_seq, + } = args; + + let (out, do_append) = match output { + OutputType::File { path, append } => { + if append { + writeln!( + err, + "reading compressed zip from output file path {path:?} for append" + ) + .unwrap(); + match fs::OpenOptions::new() + .read(true) + .write(true) + .create(false) + .open(&path) + { + Ok(f) => { + writeln!(err, "output zip file existed, appending").unwrap(); + (OutputHandle::File(f), true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + writeln!( + err, + "output zip file did not exist, creating new file instead of appending" + ) + .unwrap(); + let out = + OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create new zip output file at {path:?}") + })?); + (out, false) + } + Err(e) => { + return Err(e).wrap_err_with(|| { + format!( + "unexpected error reading zip output file for append at {path:?}" + ) + }); + } + } + } else { + writeln!(err, "writing compressed zip to output file path {path:?}").unwrap(); + let out = OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create output file at {}", path.display()) + })?); + (out, false) + } + } + OutputType::Stdout { allow_tty } => { + writeln!( + err, + "writing to stdout and buffering compressed zip in memory" + ) + .unwrap(); + if io::stdout().is_terminal() && !allow_tty { + /* TODO: maybe figure out some way to ensure --stdout is still the correct flag */ + return Err(CommandError::InvalidArg( + "stdout is a tty, but --stdout was not set".to_string(), + )); + } + let out = OutputHandle::InMem(Cursor::new(Vec::new())); + (out, false) + } + }; + let mut writer = if do_append { + ZipWriter::new_append(out) + .wrap_err("failed to initialize zip writer from existing zip file for append")? + } else { + ZipWriter::new(out) + }; + + let GlobalFlags { archive_comment } = global_flags; + if let Some(comment) = archive_comment { + writeln!(err, "comment was provided: {comment:?}").unwrap(); + let comment = comment.into_encoded_bytes(); + writer.set_raw_comment(comment.into()); + } + + mod_seq.invoke(&mut writer, &mut err)?; + + let handle = writer + .finish() + .wrap_err("failed to write zip to output handle")?; + match handle { + OutputHandle::File(f) => { + let archive_len: u64 = f + .metadata() + .wrap_err_with(|| format!("failed reading metadata from file {f:?}"))? + .len(); + writeln!(err, "file archive {f:?} was {archive_len} bytes").unwrap(); + mem::drop(f); /* Superfluous explicit drop. */ + } + OutputHandle::InMem(mut cursor) => { + let archive_len: u64 = cursor.position(); + writeln!(err, "in-memory archive was {archive_len} bytes").unwrap(); + cursor.rewind().wrap_err("failed to rewind cursor")?; + let mut stdout = io::stdout().lock(); + io::copy(&mut cursor, &mut stdout) + .wrap_err("failed to copy {archive_len} byte archive to stdout")?; + } + } + + Ok(()) +} diff --git a/cli/src/extract.rs b/cli/src/extract.rs new file mode 100644 index 000000000..9ae58fb58 --- /dev/null +++ b/cli/src/extract.rs @@ -0,0 +1,195 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fs, + io::{self, Read, Write}, + rc::Rc, +}; + +use zip::read::{ZipArchive, ZipFile}; + +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub mod entries; +pub mod matcher; +pub mod named_outputs; +pub mod receiver; +pub mod transform; +use entries::{IterateEntries, StreamInput, ZipFileInput}; +use receiver::{CompiledEntrySpec, EntryData, EntryKind, EntryReceiver, ExtractEntry}; + +fn maybe_process_symlink<'a, 't>( + entry: &mut ZipFile<'a>, + err: &Rc>, + symlink_target: &'t mut Vec, +) -> Result, CommandError> { + let (kind, size) = { + /* FIXME: the ZipFile<'a> struct contains a *mutable* reference to the parent archive, + * and this actually imposes a mutable reference upon any references to the + * immutable ZipFileData contents. This means we cannot have any immutable + * references to the ZipFileData contents at the same time as a mutable + * reference. What this means here is that we have to create a temporary EntryData + * struct and then immediately throw it away in order to be able to read the entry + * contents with io::Read. ZipEntry<'a, R> from + * https://github.com/zip-rs/zip2/pull/233 avoids this issue!!! */ + let data = EntryData::from_entry(&entry); + (data.kind, data.uncompressed_size) + }; + if !matches!(kind, EntryKind::Symlink) { + return Ok(None); + } + + /* We can't read the entry name from EntryData because we can't have any immutable + * references to ZipFileData like the name at the same time we use the entry as + * a reader! That means our log message here is very unclear! */ + writeln!(&mut err.borrow_mut(), "reading symlink target").unwrap(); + /* Re-use the vector allocation, but make sure to avoid re-using the symlink data from + * a previous iteration. */ + symlink_target.clear(); + entry + .read_to_end(symlink_target) + .wrap_err("failed to read symlink target from zip archive entry")?; + debug_assert_eq!(u64::try_from(symlink_target.len()).unwrap(), size); + Ok(Some(symlink_target)) +} + +fn process_entry<'a, 'w, 'c, 'it>( + mut entry: ZipFile<'a>, + err: &Rc>, + compiled_specs: impl Iterator>, + copy_buf: &mut [u8], + symlink_target: &mut Vec, + deduped_concat_writers: &mut Vec<&'c Rc>>, + matching_handles: &mut Vec>, +) -> Result<(), CommandError> +where + 'w: 'it, + 'it: 'c, +{ + deduped_concat_writers.clear(); + matching_handles.clear(); + + let symlink_target = maybe_process_symlink(&mut entry, err, symlink_target)?; + /* We dropped any mutable handles to the entry, so now we can access its metadata again. */ + let data = EntryData::from_entry(&entry); + + let mut deduped_matching_extracts: Vec<(&'c Rc, Vec>)> = + Vec::new(); + for matching_spec in compiled_specs.filter_map(|spec| spec.try_match_and_transform(&data)) { + if matching_spec.is_nested_duplicate(deduped_concat_writers, &mut deduped_matching_extracts) + { + writeln!(&mut err.borrow_mut(), "skipping repeated output").unwrap(); + } + } + + matching_handles.extend( + deduped_matching_extracts + .into_iter() + .flat_map(|(recv, names)| names.into_iter().map(move |n| (recv, n))) + .map(|(recv, name)| recv.generate_entry_handle(&data, symlink_target.as_deref(), name)) + .collect::, _>>()? + .into_iter() + .flatten(), + ); + + let mut read_len: usize; + loop { + read_len = entry.read(copy_buf).wrap_err("read of entry failed")?; + if read_len == 0 { + break; + } + let cur_data: &[u8] = ©_buf[..read_len]; + for concat_writer in deduped_concat_writers.iter() { + concat_writer + .borrow_mut() + .write_all(cur_data) + .wrap_err("failed to write data to concat output")?; + } + for extract_writer in matching_handles.iter_mut() { + extract_writer + .write_all(cur_data) + .wrap_err("failed to write data to extract output")?; + } + } + + Ok(()) +} + +pub fn execute_extract(err: impl Write, extract: Extract) -> Result<(), CommandError> { + let Extract { + output_specs, + entry_specs, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = extract; + let err = Rc::new(RefCell::new(err)); + + writeln!(&mut err.borrow_mut(), "entry specs: {entry_specs:?}").unwrap(); + let compiled_specs = + named_outputs::process_entry_and_output_specs(err.clone(), entry_specs, output_specs)?; + writeln!(&mut err.borrow_mut(), "compiled specs: {compiled_specs:?}").unwrap(); + + let mut copy_buf: Vec = vec![0u8; 1024 * 16]; + let mut symlink_target: Vec = Vec::new(); + + let mut deduped_concat_writers: Vec<&Rc>> = Vec::new(); + let mut matching_handles: Vec> = Vec::new(); + + if stdin_stream { + writeln!(&mut err.borrow_mut(), "extracting from stdin").unwrap(); + let mut stdin = StreamInput::new(io::stdin().lock()); + + while let Some(entry) = stdin.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + for p in zip_paths.into_iter() { + writeln!( + &mut err.borrow_mut(), + "extracting from zip input file {p:?}", + ) + .unwrap(); + let zip = fs::File::open(&p) + .wrap_err_with(|| format!("failed to open zip input file path {p:?}")) + .and_then(|f| { + ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive for file {p:?}")) + })?; + let mut zip_entries = ZipFileInput::new(Box::new(zip)); + + while let Some(entry) = zip_entries.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + /* Finalize all extract entries. */ + for spec in compiled_specs.into_iter() { + match spec { + CompiledEntrySpec::Concat(_) => (), + CompiledEntrySpec::Extract(ExtractEntry { recv, .. }) => { + recv.finalize_entries()?; + } + } + } + + Ok(()) +} diff --git a/cli/src/extract/entries.rs b/cli/src/extract/entries.rs new file mode 100644 index 000000000..bb46fb79b --- /dev/null +++ b/cli/src/extract/entries.rs @@ -0,0 +1,132 @@ +use std::{fs, io, ops}; + +use zip::{ + read::{read_zipfile_from_stream, ZipFile}, + ZipArchive, +}; + +use crate::{CommandError, WrapCommandErr}; + +pub trait IterateEntries { + fn next_entry(&mut self) -> Result, CommandError>; +} + +pub struct ReadChecker { + inner: R, + bytes_read: u64, +} + +impl ReadChecker { + pub const fn current_bytes_read(&self) -> u64 { + self.bytes_read + } +} + +impl ReadChecker +where + R: io::Read, +{ + pub fn exhaust(mut self) -> io::Result<(R, u64)> { + io::copy(&mut self, &mut io::sink())?; + let Self { inner, bytes_read } = self; + Ok((inner, bytes_read)) + } +} + +impl io::Read for ReadChecker +where + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = self.inner.read(buf)?; + let num_read: u64 = n.try_into().unwrap(); + self.bytes_read += num_read; + Ok(n) + } +} + +pub struct StreamInput { + inner: ReadChecker, + entries_read: usize, +} + +impl StreamInput { + pub fn new(inner: R) -> Self { + Self { + inner: ReadChecker { + inner, + bytes_read: 0, + }, + entries_read: 0, + } + } + + pub fn into_inner(self) -> (ReadChecker, usize) { + let Self { + inner, + entries_read, + } = self; + (inner, entries_read) + } +} + +impl IterateEntries for StreamInput +where + R: io::Read, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if let Some(entry) = read_zipfile_from_stream(&mut self.inner) + .wrap_err("failed to read zip entries from stdin")? + { + self.entries_read += 1; + Ok(Some(entry)) + } else { + Ok(None) + } + } +} + +#[derive(Debug)] +pub struct ZipFileInput { + inner: A, + file_counter: usize, +} + +impl ZipFileInput { + pub fn new(inner: A) -> Self { + Self { + inner, + file_counter: 0, + } + } +} + +impl ZipFileInput +where + A: ops::Deref>, +{ + pub fn remaining(&self) -> usize { + self.inner.len() - self.file_counter + } + + pub fn none_left(&self) -> bool { + self.remaining() == 0 + } +} + +impl IterateEntries for ZipFileInput +where + A: ops::DerefMut>, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if self.none_left() { + return Ok(None); + } + let prev_counter = self.file_counter; + self.file_counter += 1; + self.inner + .by_index(prev_counter) + .map(Some) + .wrap_err_with(|| format!("failed to read entry #{prev_counter} from zip",)) + } +} diff --git a/cli/src/extract/matcher.rs b/cli/src/extract/matcher.rs new file mode 100644 index 000000000..9e3eb463f --- /dev/null +++ b/cli/src/extract/matcher.rs @@ -0,0 +1,528 @@ +use std::{borrow::Cow, fmt}; + +#[cfg(feature = "glob")] +use glob; +#[cfg(feature = "rx")] +use regex; + +use zip::CompressionMethod; + +use super::receiver::{EntryData, EntryKind}; +use super::transform::ComponentSplit; +use crate::{args::extract::*, CommandError}; + +#[inline(always)] +fn process_component_selector<'s>(sel: ComponentSelector, name: &'s str) -> Option<&'s str> { + ComponentSplit::split_by_component_selector(sel, name).map(|split| match split { + ComponentSplit::LeftAnchored { selected_left, .. } => selected_left, + ComponentSplit::RightAnchored { selected_right, .. } => selected_right, + ComponentSplit::Whole(s) => s, + }) +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SearchAnchoring { + #[default] + Unanchored, + LeftAnchored, + RightAnchored, + DoublyAnchored, +} + +impl SearchAnchoring { + pub const fn from_prefix_suffix_flags(prefix_anchored: bool, suffix_anchored: bool) -> Self { + match (prefix_anchored, suffix_anchored) { + (true, true) => Self::DoublyAnchored, + (true, false) => Self::LeftAnchored, + (false, true) => Self::RightAnchored, + (false, false) => Self::Unanchored, + } + } + + pub fn wrap_regex_pattern<'s>(self, pattern: &'s str) -> Cow<'s, str> { + match self { + Self::Unanchored => Cow::Borrowed(pattern), + Self::LeftAnchored => Cow::Owned(format!("^(?:{pattern})")), + Self::RightAnchored => Cow::Owned(format!("(?:{pattern})$")), + Self::DoublyAnchored => Cow::Owned(format!("^(?:{pattern})$")), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CaseSensitivity { + #[default] + Sensitive, + Insensitive, +} + +impl CaseSensitivity { + pub const fn from_case_insensitive_flag(case_insensitive: bool) -> Self { + match case_insensitive { + true => Self::Insensitive, + false => Self::Sensitive, + } + } + + pub fn string_equal(self, a: &str, b: &str) -> bool { + match self { + Self::Insensitive => a.eq_ignore_ascii_case(b), + Self::Sensitive => a == b, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MatchModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, +} + +impl MatchModifiers { + pub fn from_flags(flags: PatternModifierFlags) -> Result { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + if multiple_matches { + return Err(CommandError::InvalidArg(format!( + "multimatch modifier :g is unused in match expressions: {flags:?}" + ))); + } + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Ok(Self { anchoring, case }) + } +} + +trait NameMatcher: fmt::Debug { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized; + fn matches(&self, input: &str) -> bool; +} + +#[derive(Debug)] +struct LiteralMatcher { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, +} + +impl NameMatcher for LiteralMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + }) + } + + fn matches(&self, input: &str) -> bool { + if input.len() < self.lit.len() { + return false; + } + match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Insensitive => input.to_ascii_uppercase().contains(&self.lit), + CaseSensitivity::Sensitive => input.contains(&self.lit), + }, + SearchAnchoring::DoublyAnchored => self.case.string_equal(&self.lit, input), + SearchAnchoring::LeftAnchored => { + let prefix = &input[..self.lit.len()]; + self.case.string_equal(&self.lit, prefix) + } + SearchAnchoring::RightAnchored => { + let suffix = &input[(input.len() - self.lit.len())..]; + self.case.string_equal(&self.lit, suffix) + } + } + } +} + +#[derive(Debug)] +#[cfg(feature = "glob")] +struct GlobMatcher { + pat: glob::Pattern, + glob_opts: glob::MatchOptions, +} + +#[cfg(feature = "glob")] +impl NameMatcher for GlobMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { anchoring, case } = opts; + if !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "anchored search with :p or :s is incompatible with glob patterns: {opts:?}" + ))); + } + let glob_opts = glob::MatchOptions { + case_sensitive: match case { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + }, + ..Default::default() + }; + let pat = glob::Pattern::new(&pattern).map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct glob matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, glob_opts }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.matches_with(input, self.glob_opts) + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexMatcher { + pat: regex::Regex, +} + +#[cfg(feature = "rx")] +impl NameMatcher for RegexMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Sensitive => false, + CaseSensitivity::Insensitive => true, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.is_match(input) + } +} + +pub trait EntryMatcher: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn matches(&self, entry: &EntryData) -> bool; +} + +#[derive(Debug, Copy, Clone)] +enum TrivialMatcher { + True, + False, +} + +impl EntryMatcher for TrivialMatcher { + type Arg = TrivialPredicate where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialPredicate::True => Self::True, + TrivialPredicate::False => Self::False, + }) + } + + fn matches(&self, _entry: &EntryData) -> bool { + match self { + Self::True => true, + Self::False => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum EntryTypeMatcher { + File, + Dir, + Symlink, +} + +impl EntryMatcher for EntryTypeMatcher { + type Arg = EntryType where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + EntryType::File => Self::File, + EntryType::Dir => Self::Dir, + EntryType::Symlink => Self::Symlink, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match (self, entry.kind) { + (Self::File, EntryKind::File) => true, + (Self::Dir, EntryKind::Dir) => true, + (Self::Symlink, EntryKind::Symlink) => true, + _ => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum NonSpecificMethods { + Any, + Known, +} + +impl EntryMatcher for NonSpecificMethods { + type Arg = NonSpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + NonSpecificCompressionMethodArg::Any => Self::Any, + NonSpecificCompressionMethodArg::Known => Self::Known, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Any => true, + Self::Known => { + SpecificCompressionMethodArg::KNOWN_COMPRESSION_METHODS.contains(&entry.compression) + } + } + } +} + +#[derive(Debug)] +struct SpecificMethods { + specific_method: CompressionMethod, +} + +impl EntryMatcher for SpecificMethods { + type Arg = SpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + specific_method: arg.translate_to_zip(), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + self.specific_method == entry.compression + } +} + +#[derive(Debug, Copy, Clone)] +enum DepthLimit { + Max(usize), + Min(usize), +} + +impl EntryMatcher for DepthLimit { + type Arg = DepthLimitArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + DepthLimitArg::Max(max) => Self::Max(max.into()), + DepthLimitArg::Min(min) => Self::Min(min.into()), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + let num_components = entry.name.split('/').count(); + match self { + Self::Max(max) => num_components <= *max, + Self::Min(min) => num_components >= *min, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum Size { + Max(u64), + Min(u64), +} + +impl EntryMatcher for Size { + type Arg = SizeArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + SizeArg::Max(max) => Self::Max(max), + SizeArg::Min(min) => Self::Min(min), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Max(max) => entry.uncompressed_size <= *max, + Self::Min(min) => entry.uncompressed_size >= *min, + } + } +} + +#[derive(Debug)] +struct PatternMatcher { + matcher: Box, + comp_sel: ComponentSelector, +} + +impl EntryMatcher for PatternMatcher { + type Arg = MatchArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let MatchArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + } = arg; + + let opts = MatchModifiers::from_flags(modifiers)?; + let matcher: Box = match pat_sel { + PatternSelectorType::Glob => { + #[cfg(feature = "glob")] + { + Box::new(GlobMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "glob"))] + { + return Err(CommandError::InvalidArg(format!( + "glob patterns were requested, but this binary was built without the \"glob\" feature: {pattern:?}" + ))); + } + } + + PatternSelectorType::Literal => Box::new(LiteralMatcher::create(pattern, opts)?), + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { matcher, comp_sel }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match process_component_selector(self.comp_sel, entry.name) { + None => false, + Some(s) => self.matcher.matches(s), + } + } +} + +#[derive(Debug)] +pub enum CompiledMatcher { + Primitive(Box), + Negated(Box), + And { + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, +} + +impl CompiledMatcher { + fn create_primitive(arg: Predicate) -> Result { + Ok(Self::Primitive(match arg { + Predicate::Trivial(arg) => Box::new(TrivialMatcher::from_arg(arg)?), + Predicate::EntryType(arg) => Box::new(EntryTypeMatcher::from_arg(arg)?), + Predicate::CompressionMethod(method_arg) => match method_arg { + CompressionMethodArg::NonSpecific(arg) => { + Box::new(NonSpecificMethods::from_arg(arg)?) + } + CompressionMethodArg::Specific(arg) => Box::new(SpecificMethods::from_arg(arg)?), + }, + Predicate::DepthLimit(arg) => Box::new(DepthLimit::from_arg(arg)?), + Predicate::Size(arg) => Box::new(Size::from_arg(arg)?), + Predicate::Match(arg) => Box::new(PatternMatcher::from_arg(arg)?), + })) + } +} + +impl EntryMatcher for CompiledMatcher { + type Arg = MatchExpression where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + MatchExpression::PrimitivePredicate(pred) => Self::create_primitive(pred)?, + MatchExpression::Negated(arg) => Self::Negated(Box::new(Self::from_arg(*arg)?)), + MatchExpression::And { + explicit: _, + left, + right, + } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::And { left, right } + } + MatchExpression::Or { left, right } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::Or { left, right } + } + MatchExpression::Grouped(inner) => Self::from_arg(*inner)?, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Primitive(m) => m.matches(entry), + Self::Negated(m) => !m.matches(entry), + Self::And { left, right } => left.matches(entry) && right.matches(entry), + Self::Or { left, right } => left.matches(entry) || right.matches(entry), + } + } +} diff --git a/cli/src/extract/named_outputs.rs b/cli/src/extract/named_outputs.rs new file mode 100644 index 000000000..535cde155 --- /dev/null +++ b/cli/src/extract/named_outputs.rs @@ -0,0 +1,347 @@ +use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, + fs, + io::{self, Seek, Write}, + path::PathBuf, + rc::Rc, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::receiver::{ + CompiledEntrySpec, ConcatEntry, EntryReceiver, ExtractEntry, FilesystemReceiver, +}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub fn process_entry_and_output_specs<'w>( + err: Rc>, + entry_specs: impl IntoIterator, + output_specs: OutputSpecs, +) -> Result>, CommandError> { + let mut entry_specs: Vec = entry_specs + .into_iter() + .map(ParsedEntrySpecArg::from_entry_spec) + .collect::>()?; + if entry_specs.is_empty() { + entry_specs.push(ParsedEntrySpecArg { + matcher: None, + transforms: None, + output_name: OutputName::default_name(), + }); + } + let parsed_outputs = ParsedNamedOutputs::from_output_specs(err, output_specs)?; + parsed_outputs.process_entry_specs_for_outputs(entry_specs) +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct OutputName(pub String); + +impl OutputName { + pub fn default_name() -> Self { + Self("default".to_string()) + } +} + +struct ParsedEntrySpecArg { + pub matcher: Option, + pub transforms: Option, + pub output_name: OutputName, +} + +impl ParsedEntrySpecArg { + pub fn from_entry_spec(spec: EntrySpec) -> Result { + let EntrySpec { + match_expr, + name_transforms, + content_transform, + } = spec; + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let transforms = if name_transforms.is_empty() { + None + } else { + Some(CompiledTransformer::from_arg(name_transforms)?) + }; + let output_name = match content_transform { + ContentTransform::Extract { name } => name + .map(OutputName) + .unwrap_or_else(OutputName::default_name), + }; + Ok(Self { + matcher, + transforms, + output_name, + }) + } +} + +struct NamedOutputsBuilder<'w, W> { + err: Rc>, + concats: HashMap>>, + extracts: HashMap>, + seen_stdout: bool, + seen_files: HashSet, + seen_dirs: HashSet, + seen_names: HashSet, +} + +impl<'w, W> NamedOutputsBuilder<'w, W> { + pub fn new(err: Rc>) -> Self { + Self { + err, + concats: HashMap::new(), + extracts: HashMap::new(), + seen_stdout: false, + seen_files: HashSet::new(), + seen_dirs: HashSet::new(), + seen_names: HashSet::new(), + } + } + + pub fn into_tables( + self, + ) -> ( + HashMap>>, + HashMap>, + ) { + let Self { + concats, extracts, .. + } = self; + (concats, extracts) + } + + fn add_name( + &mut self, + name: OutputName, + f: impl FnOnce() -> Result, + ) -> Result { + if self.seen_names.contains(&name) { + return Err(CommandError::InvalidArg(format!( + "output name {name:?} provided more than once" + ))); + } + + let ret = f()?; + + assert!(self.seen_names.insert(name)); + + Ok(ret) + } + + fn add_concat( + &mut self, + name: OutputName, + handle: impl Write + 'w, + ) -> Result<(), CommandError> { + /* This should be assured by the check against self.seen_names. */ + assert!(!self.concats.contains_key(&name)); + + let handle = Rc::new(RefCell::new(handle)); + + assert!(self.concats.insert(name, handle).is_none()); + + Ok(()) + } + + pub fn add_stdout(&mut self, name: OutputName) -> Result<(), CommandError> { + if self.seen_stdout { + return Err(CommandError::InvalidArg( + "--stdout output provided for more than one receiver".to_string(), + )); + } + + let handle = self.add_name(name.clone(), || Ok(io::stdout()))?; + self.add_concat(name, handle)?; + + self.seen_stdout = true; + Ok(()) + } + + fn add_seen_file(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing path {path:?} failed"))?; + + if self.seen_files.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output file path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_files.insert(canon_path)); + + Ok(()) + } + + pub fn add_file( + &mut self, + path: PathBuf, + append: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let handle = self.add_name(name.clone(), || { + let mut f: fs::File = if append { + fs::OpenOptions::new() + .write(true) + .create(true) + .open(&path) + .wrap_err_with(|| format!("failed to open file for append at {path:?}"))? + } else { + fs::File::create(&path) + .wrap_err_with(|| format!("failed to open file with truncation at {path:?}"))? + }; + f.seek(io::SeekFrom::End(0)) + .wrap_err_with(|| format!("failed to seek to end of opened file {f:?}"))?; + Ok(f) + })?; + self.add_seen_file(path)?; + self.add_concat(name, handle)?; + Ok(()) + } + + fn add_seen_dir(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing dir path {path:?} failed"))?; + if self.seen_dirs.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output dir path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_dirs.insert(canon_path)); + + Ok(()) + } + + fn add_extract( + &mut self, + name: OutputName, + handle: impl EntryReceiver + 'w, + ) -> Result<(), CommandError> { + assert!(!self.extracts.contains_key(&name)); + + let handle = Rc::new(handle); + + assert!(self.extracts.insert(name, handle).is_none()); + + Ok(()) + } +} + +impl<'w, W> NamedOutputsBuilder<'w, W> +where + W: Write + 'w, +{ + pub fn add_dir( + &mut self, + output_dir: PathBuf, + mkdir: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let err = self.err.clone(); + let handle = self.add_name(name.clone(), || { + if mkdir { + fs::create_dir_all(&output_dir).wrap_err_with(|| { + format!("failed to create output directory {output_dir:?}") + })?; + }; + Ok(FilesystemReceiver::new(err, output_dir.clone())) + })?; + self.add_seen_dir(output_dir.clone())?; + self.add_extract(name, handle)?; + Ok(()) + } +} + +struct ParsedNamedOutputs<'w> { + concats: HashMap>>, + extracts: HashMap>, +} + +impl<'w> ParsedNamedOutputs<'w> { + pub fn process_entry_specs_for_outputs( + self, + args: impl IntoIterator, + ) -> Result>, CommandError> { + args.into_iter() + .map(|arg| self.lookup_entry_spec_arg(arg)) + .collect() + } + + fn lookup_entry_spec_arg( + &self, + arg: ParsedEntrySpecArg, + ) -> Result, CommandError> { + let ParsedEntrySpecArg { + matcher, + transforms, + output_name, + } = arg; + if let Some(stream) = self.concats.get(&output_name) { + if transforms.is_some() { + return Err(CommandError::InvalidArg(format!( + "entry name transforms do not apply to concat output {output_name:?}" + ))); + } + return Ok(CompiledEntrySpec::Concat(ConcatEntry { + matcher, + stream: stream.clone(), + })); + } + let Some(recv) = self.extracts.get(&output_name) else { + return Err(CommandError::InvalidArg(format!( + "output name {output_name:?} was not found" + ))); + }; + Ok(CompiledEntrySpec::Extract(ExtractEntry { + matcher, + transforms, + recv: recv.clone(), + })) + } + + pub fn from_output_specs( + err: Rc>, + spec: OutputSpecs, + ) -> Result { + let OutputSpecs { default, named } = spec; + + let mut builder = NamedOutputsBuilder::new(err); + + if let Some(default) = default { + let name = OutputName::default_name(); + match default { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + for NamedOutput { name, output } in named.into_iter() { + let name = OutputName(name); + match output { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + + let (concats, extracts) = builder.into_tables(); + Ok(Self { concats, extracts }) + } +} diff --git a/cli/src/extract/receiver.rs b/cli/src/extract/receiver.rs new file mode 100644 index 000000000..6495ccd60 --- /dev/null +++ b/cli/src/extract/receiver.rs @@ -0,0 +1,386 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fmt, fs, + io::{self, Write}, + mem, + path::{Path, PathBuf}, + rc::Rc, +}; + +use zip::{ + extra_fields::{ExtendedTimestamp, ExtraField}, + read::ZipFile, + CompressionMethod, DateTime, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{CommandError, WrapCommandErr}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum EntryKind { + File, + Dir, + Symlink, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct EntryData<'a> { + pub name: &'a str, + pub kind: EntryKind, + pub compression: CompressionMethod, + pub unix_mode: Option, + pub comment: &'a str, + pub uncompressed_size: u64, + pub compressed_size: u64, + pub local_header_start: u64, + pub content_start: u64, + pub central_header_start: u64, + pub crc32: u32, + pub last_modified_time: Option, + pub extended_timestamp: Option, +} + +impl<'a> EntryData<'a> { + #[inline(always)] + pub fn from_entry<'b>(entry: &'a ZipFile<'b>) -> Self { + Self { + name: entry.name(), + kind: if entry.is_dir() { + EntryKind::Dir + } else if entry.is_symlink() { + EntryKind::Symlink + } else { + EntryKind::File + }, + compression: entry.compression(), + unix_mode: entry.unix_mode(), + comment: entry.comment(), + uncompressed_size: entry.size(), + compressed_size: entry.compressed_size(), + local_header_start: entry.header_start(), + content_start: entry.data_start(), + central_header_start: entry.central_header_start(), + crc32: entry.crc32(), + last_modified_time: entry.last_modified(), + extended_timestamp: entry + .extra_data_fields() + .find_map(|f| match f { + ExtraField::ExtendedTimestamp(ts) => Some(ts), + }) + .cloned(), + } + } + + #[inline(always)] + pub const fn content_end(&self) -> u64 { + self.content_start + self.compressed_size + } +} + +pub struct ConcatEntry<'w> { + pub matcher: Option, + pub stream: Rc>, +} + +impl<'w> fmt::Debug for ConcatEntry<'w> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ConcatEntry {{ matcher: {:?}, stream: {:p} }}", + &self.matcher, &self.stream + ) + } +} + +impl<'w> ConcatEntry<'w> { + pub fn do_match<'a>(&self, data: &EntryData<'a>) -> Option<&Rc>> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + Some(&self.stream) + } else { + None + } + } +} + +#[derive(Debug)] +pub struct ExtractEntry<'w> { + pub matcher: Option, + pub transforms: Option, + pub recv: Rc, +} + +impl<'w> ExtractEntry<'w> { + pub fn do_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option<(Cow<'a, str>, &Rc)> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + let new_name = self + .transforms + .as_ref() + .map(|t| t.transform_name(data.name)) + .unwrap_or_else(|| Cow::Borrowed(data.name)); + Some((new_name, &self.recv)) + } else { + None + } + } +} + +#[derive(Debug)] +pub enum CompiledEntrySpec<'w> { + Concat(ConcatEntry<'w>), + Extract(ExtractEntry<'w>), +} + +impl<'w> CompiledEntrySpec<'w> { + pub fn try_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option> { + match self { + Self::Concat(c) => c.do_match(data).map(MatchingEntrySpec::Concat), + Self::Extract(e) => e + .do_match_and_transform(data) + .map(|(n, p)| MatchingEntrySpec::Extract(n, p)), + } + } +} + +pub enum MatchingEntrySpec<'a, 'c, 'w> { + Concat(&'c Rc>), + Extract(Cow<'a, str>, &'c Rc), +} + +impl<'a, 'c, 'w> MatchingEntrySpec<'a, 'c, 'w> { + /* Split output handles for concat, and split generated handles by extract source and + * name. use Rc::ptr_eq() to split, and Cow::<'s, str>::eq() with str AsRef. */ + pub fn is_nested_duplicate( + self, + deduped_concat_writers: &mut Vec<&'c Rc>>, + deduped_matching_extracts: &mut Vec<(&'c Rc, Vec>)>, + ) -> bool { + match self { + MatchingEntrySpec::Concat(concat_writer) => { + if deduped_concat_writers + .iter() + .any(|p| Rc::ptr_eq(p, &concat_writer)) + { + true + } else { + deduped_concat_writers.push(concat_writer); + false + } + } + MatchingEntrySpec::Extract(name, extract_receiver) => { + if let Some((_, names)) = deduped_matching_extracts + .iter_mut() + .find(|(p, _)| Rc::ptr_eq(p, &extract_receiver)) + { + if names.iter().any(|n| n.as_ref() == name.as_ref()) { + true + } else { + names.push(name); + false + } + } else { + deduped_matching_extracts.push((extract_receiver, vec![name])); + false + } + } + } + } +} + +pub trait EntryReceiver: fmt::Debug { + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError>; + + fn finalize_entries(&self) -> Result<(), CommandError>; +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg(unix)] +struct PermsEntry { + path: PathBuf, + mode: u32, +} + +pub struct FilesystemReceiver { + err: Rc>, + output_dir: PathBuf, + #[cfg(unix)] + perms_to_set: RefCell>, +} + +impl FilesystemReceiver { + pub fn new(err: Rc>, output_dir: PathBuf) -> Self { + Self { + err, + output_dir, + #[cfg(unix)] + perms_to_set: RefCell::new(Vec::new()), + } + } +} + +impl fmt::Debug for FilesystemReceiver { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "FilesystemReceiver {{ output_dir: {:?} }}", + &self.output_dir + ) + } +} + +impl FilesystemReceiver +where + W: Write, +{ + #[cfg(unix)] + fn create_or_overwrite_symlink( + err: &mut impl Write, + target: &[u8], + full_output_path: &Path, + ) -> Result<(), CommandError> { + use std::{ + ffi::OsStr, + os::unix::{ffi::OsStrExt, fs::symlink}, + }; + let target = OsStr::from_bytes(target); + writeln!(err, "entry is symlink to {target:?}, creating").unwrap(); + /* The stdlib symlink function has no functionality like OpenOptions to + * truncate a symlink if it already exists, so we have to do that ourselves + * here. */ + if let Err(e) = symlink(target, full_output_path) { + let e = match e.kind() { + io::ErrorKind::AlreadyExists => { + writeln!(err, "a file already existed at the symlink target {full_output_path:?}, removing") + .unwrap(); + fs::remove_file(full_output_path).wrap_err_with(|| { + format!("failed to remove file at symlink target {full_output_path:?}") + })?; + writeln!( + err, + "successfully removed file entry, creating symlink again" + ) + .unwrap(); + symlink(target, full_output_path).err() + } + _ => Some(e), + }; + if let Some(e) = e { + return Err(e).wrap_err_with(|| { + format!( + "failed to create symlink at {full_output_path:?} with target {target:?}" + ) + }); + } + } + Ok(()) + } +} + +impl EntryReceiver for FilesystemReceiver +where + W: Write, +{ + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError> { + let mut err = self.err.borrow_mut(); + let full_output_path = self.output_dir.join(name.as_ref()); + writeln!( + err, + "receiving entry {} with name {name} and writing to path {full_output_path:?}", + data.name + ) + .unwrap(); + + match data.kind { + EntryKind::Dir => { + writeln!(err, "entry is directory, creating").unwrap(); + fs::create_dir_all(&full_output_path).wrap_err_with(|| { + format!("failed to create directory entry at {full_output_path:?}") + })?; + } + EntryKind::Symlink => { + let target = symlink_target.expect("we should have generated this"); + + #[cfg(unix)] + Self::create_or_overwrite_symlink(&mut *err, target, &full_output_path)?; + #[cfg(not(unix))] + todo!("TODO: cannot create symlink for entry {name} on non-unix yet!"); + } + EntryKind::File => { + writeln!(err, "entry is file, creating").unwrap(); + if let Some(containing_dir) = full_output_path.parent() { + fs::create_dir_all(containing_dir).wrap_err_with(|| { + format!("failed to create parent dirs for file at {full_output_path:?}") + })?; + } else { + writeln!(err, "entry had no parent dir (in root dir?)").unwrap(); + } + let outfile = fs::File::create(&full_output_path) + .wrap_err_with(|| format!("failed to create file at {full_output_path:?}"))?; + return Ok(Some(Box::new(outfile))); + } + } + + #[cfg(unix)] + if let Some(mode) = data.unix_mode { + writeln!( + err, + "storing unix mode {mode} for path {full_output_path:?}" + ) + .unwrap(); + self.perms_to_set.borrow_mut().push(PermsEntry { + path: full_output_path, + mode, + }); + } + + Ok(None) + } + + fn finalize_entries(&self) -> Result<(), CommandError> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut perms_to_set = mem::take(&mut *self.perms_to_set.borrow_mut()); + perms_to_set.sort_unstable(); + writeln!( + &mut self.err.borrow_mut(), + "perms to set (these are done in reverse order): {perms_to_set:?}" + ) + .unwrap(); + for PermsEntry { path, mode } in perms_to_set.into_iter().rev() { + let perms = fs::Permissions::from_mode(mode); + fs::set_permissions(&path, perms.clone()) + .wrap_err_with(|| format!("error setting perms {perms:?} for path {path:?}"))?; + } + } + Ok(()) + } +} diff --git a/cli/src/extract/transform.rs b/cli/src/extract/transform.rs new file mode 100644 index 000000000..9494da36d --- /dev/null +++ b/cli/src/extract/transform.rs @@ -0,0 +1,707 @@ +use std::{borrow::Cow, collections::VecDeque, fmt, ops, path::Path, str}; + +#[cfg(feature = "rx")] +use regex; + +use super::matcher::{CaseSensitivity, SearchAnchoring}; +use crate::{args::extract::*, CommandError}; + +pub trait NameTransformer: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug, Copy, Clone)] +enum Trivial { + Identity, +} + +impl NameTransformer for Trivial { + type Arg = TrivialTransform where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialTransform::Identity => Self::Identity, + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + match self { + Self::Identity => Cow::Borrowed(name), + } + } +} + +#[derive(Debug)] +struct StripComponents { + num_components_to_strip: usize, +} + +impl NameTransformer for StripComponents { + type Arg = u8 where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + num_components_to_strip: arg.into(), + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* If no directory components, then nothing to strip. */ + if !name.contains('/') { + return Cow::Borrowed(name); + } + /* We allow stripping 0 components, which does nothing. */ + if self.num_components_to_strip == 0 { + return Cow::Borrowed(name); + } + /* Pop off prefix components until only one is left or we have stripped all the + * requested prefix components. */ + let mut remaining_to_strip = self.num_components_to_strip; + let mut separator_indices: VecDeque = + name.match_indices('/').map(|(i, _)| i).collect(); + debug_assert!(separator_indices.len() > 0); + /* Always keep the final separator, as regardless of how many we strip, we want + * to keep the basename in all cases. */ + while separator_indices.len() > 1 && remaining_to_strip > 0 { + let _ = separator_indices.pop_front().unwrap(); + remaining_to_strip -= 1; + } + debug_assert!(separator_indices.len() > 0); + let leftmost_remaining_separator_index: usize = separator_indices.pop_front().unwrap(); + Cow::Borrowed(&name[(leftmost_remaining_separator_index + 1)..]) + } +} + +#[derive(Debug)] +struct AddPrefix { + prefix_to_add: String, +} + +impl NameTransformer for AddPrefix { + type Arg = String where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { prefix_to_add: arg }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* We allow an empty prefix, which means to do nothing. */ + if self.prefix_to_add.is_empty() { + return Cow::Borrowed(name); + } + Cow::Owned(format!("{}/{}", self.prefix_to_add, name)) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Multiplicity { + #[default] + Single, + All, +} + +impl Multiplicity { + pub const fn from_multiple_matches_flag(multiple_matches: bool) -> Self { + match multiple_matches { + true => Self::All, + false => Self::Single, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReplaceModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, + pub multi: Multiplicity, +} + +impl ReplaceModifiers { + pub const fn from_flags(flags: PatternModifierFlags) -> Self { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + let multi = Multiplicity::from_multiple_matches_flag(multiple_matches); + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Self { + anchoring, + case, + multi, + } + } +} + +trait PatternTransformer: fmt::Debug { + type Replacement + where + Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized; + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug)] +struct LiteralTransformer { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, + multi: Multiplicity, + rep: String, +} + +impl LiteralTransformer { + fn format_single_replacement<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_index: usize, + ) -> Cow<'s, str> { + /* If the replacement is empty, we have the opportunity to return a borrowed Cow. */ + if rep.is_empty() { + /* Remove the prefix alone! */ + if match_index == 0 { + return Cow::Borrowed(&input[lit_len..]); + } + /* Remove the suffix alone! */ + if match_index == input.len() - lit_len { + return Cow::Borrowed(&input[..match_index]); + } + } + /* Otherwise, we allocate a new string. */ + Cow::Owned(format!( + "{}{}{}", + &input[..match_index], + rep, + &input[(match_index + lit_len)..] + )) + } + + fn replace_single_anchored<'s>( + input: &'s str, + lit: &str, + rep: &str, + range: ops::Range, + case: CaseSensitivity, + ) -> Cow<'s, str> { + let sub = &input[range.clone()]; + if case.string_equal(lit, sub) { + Self::format_single_replacement(input, lit.len(), rep, range.start) + } else { + Cow::Borrowed(input) + } + } + + fn replace_single_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + match input.find(lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn replace_single_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + match input.to_ascii_uppercase().find(&lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn format_multiple_replacements<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_indices: Vec, + ) -> Cow<'s, str> { + if match_indices.is_empty() { + return Cow::Borrowed(input); + } + if match_indices.len() == 1 { + return Self::format_single_replacement(input, lit_len, rep, match_indices[0]); + } + let expected_len: usize = + input.len() - (lit_len * match_indices.len()) + (rep.len() * match_indices.len()); + let mut ret = String::with_capacity(expected_len); + let mut last_source_position: usize = 0; + for i in match_indices.into_iter() { + ret.push_str(&input[last_source_position..i]); + ret.push_str(rep); + last_source_position = i + lit_len; + } + assert_eq!(ret.len(), expected_len); + Cow::Owned(ret) + } + + fn replace_multiple_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input.match_indices(lit).map(|(i, _)| i).collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } + + fn replace_multiple_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input + .to_ascii_uppercase() + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + .match_indices(&lit) + .map(|(i, _)| i) + .collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } +} + +impl PatternTransformer for LiteralTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + + if matches!(multi, Multiplicity::All) && !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "multimatch replacement with :g is not supported with anchoring flags :p or :s for literal transforms: {opts:?} {pattern:?}" + ))); + } + + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + multi, + rep, + }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + /* Empty replacement or literal is allowed, it just does nothing. */ + if self.lit.is_empty() || input.is_empty() { + return Cow::Borrowed(input); + } + /* Can't match input longer than the literal. */ + if self.lit.len() > input.len() { + return Cow::Borrowed(input); + } + + match self.multi { + Multiplicity::Single => match self.anchoring { + SearchAnchoring::DoublyAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..input.len(), + self.case, + ), + SearchAnchoring::LeftAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..self.lit.len(), + self.case, + ), + SearchAnchoring::RightAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + (input.len() - self.lit.len())..input.len(), + self.case, + ), + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_single_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_single_icase(input, &self.lit, &self.rep) + } + }, + }, + Multiplicity::All => match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_multiple_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_multiple_icase(input, &self.lit, &self.rep) + } + }, + _ => unreachable!("checked during construction"), + }, + } + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexpTransformer { + pat: regex::Regex, + multi: Multiplicity, + rep: String, +} + +#[cfg(feature = "rx")] +impl PatternTransformer for RegexpTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Insensitive => true, + CaseSensitivity::Sensitive => false, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex replacer from search pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, multi, rep }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + match self.multi { + Multiplicity::Single => self.pat.replace(input, &self.rep), + Multiplicity::All => self.pat.replace_all(input, &self.rep), + } + } +} + +pub enum ComponentSplit<'s> { + LeftAnchored { + selected_left: &'s str, + right: &'s str, + }, + RightAnchored { + left: &'s str, + selected_right: &'s str, + }, + Whole(&'s str), +} + +impl<'s> ComponentSplit<'s> { + #[inline(always)] + pub fn split_by_component_selector(sel: ComponentSelector, name: &'s str) -> Option { + let path = Path::new(name); + match sel { + ComponentSelector::Path => Some(ComponentSplit::Whole(name)), + ComponentSelector::Basename => path + .file_name() + .map(|bname| bname.to_str().unwrap()) + .map(|bname| name.split_at(name.len() - bname.len())) + .map(|(pfx, bname)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: bname, + }), + ComponentSelector::Dirname => path + .parent() + .map(|p| p.to_str().unwrap()) + /* "a".parent() becomes Some(""), which we want to treat as no parent */ + .filter(|s| !s.is_empty()) + .map(|dirname| name.split_at(dirname.len())) + .map(|(dirname, sfx)| ComponentSplit::LeftAnchored { + selected_left: dirname, + right: sfx, + }), + ComponentSelector::FileExtension => path + .extension() + .map(|ext| ext.to_str().unwrap()) + .map(|ext| name.split_at(name.len() - ext.len())) + .map(|(pfx, ext)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: ext, + }), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum SubstringAnchoring { + RetainsLeftAnchor, + RetainsRightAnchor, + RetainsBothAnchors, + LosesBothAnchors, +} + +impl SubstringAnchoring { + #[inline(always)] + pub fn analyze<'s, 't>(parent: &'s str, sub: &'t str) -> Self + where + 't: 's, + { + let p = parent.as_bytes().as_ptr_range(); + let s = sub.as_bytes().as_ptr_range(); + assert!(s.start >= p.start); + assert!(s.end <= p.end); + if p.start == s.start { + if p.end == s.end { + debug_assert_eq!(parent, sub); + Self::RetainsBothAnchors + } else { + Self::RetainsLeftAnchor + } + } else { + if p.end == s.end { + Self::RetainsRightAnchor + } else { + Self::LosesBothAnchors + } + } + } + + #[inline(always)] + pub fn split_then_transform_then_reformulate<'s>( + input: &'s str, + split: impl FnOnce(&'s str) -> Option>, + transform: impl FnOnce(&'s str) -> Cow<'s, str>, + ) -> Cow<'s, str> { + let components = match split(input) { + /* If the given name doesn't have the specified component, return it unchanged. */ + None => return Cow::Borrowed(input), + Some(s) => s, + }; + match components { + /* If there was no splitting (the whole path was selected), then we don't need to do + * any work to hook things back up! */ + ComponentSplit::Whole(s) => transform(s), + /* If there was splitting, we need to do more work. */ + ComponentSplit::LeftAnchored { + selected_left, + right, + } => match transform(selected_left) { + /* If we reallocated, then we have to reallocate the whole thing, so reuse the + * returned String. */ + Cow::Owned(mut new_left) => { + new_left.push_str(right); + Cow::Owned(new_left) + } + /* If no reallocation, we now have to figure out whether the result is still + * contiguous. */ + Cow::Borrowed(left_sub) => match Self::analyze(selected_left, left_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsRightAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left_sub, right)) + } + _ => Cow::Owned(format!("{}{}", left_sub, right)), + }, + }, + ComponentSplit::RightAnchored { + left, + selected_right, + } => match transform(selected_right) { + Cow::Owned(mut new_right) => { + new_right.insert_str(0, left); + Cow::Owned(new_right) + } + Cow::Borrowed(right_sub) => match Self::analyze(selected_right, right_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsLeftAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left, right_sub)) + } + _ => Cow::Owned(format!("{}{}", left, right_sub)), + }, + }, + } + } + + #[inline(always)] + fn join_adjacent_strings<'s, 't>(parent: &'s str, left: &'t str, right: &'t str) -> &'s str + where + 't: 's, + { + let parent_range = parent.as_bytes().as_ptr_range(); + let left = left.as_bytes().as_ptr_range(); + debug_assert!(left.start >= parent_range.start && left.end <= parent_range.end); + let right = right.as_bytes().as_ptr_range(); + debug_assert!(right.start >= parent_range.start && right.end <= parent_range.end); + debug_assert_eq!(left.end, right.start); + let start_offset = (left.start as usize) - (parent_range.start as usize); + let end_offset = (parent_range.end as usize) - (right.end as usize); + &parent[start_offset..(parent.len() - end_offset)] + } +} + +#[derive(Debug)] +struct ComponentTransformer { + pattern_trans: Box, + comp_sel: ComponentSelector, +} + +impl NameTransformer for ComponentTransformer { + type Arg = TransformArg where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let TransformArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + replacement_spec, + } = arg; + + let opts = ReplaceModifiers::from_flags(modifiers); + let pattern_trans: Box = match pat_sel { + PatternSelectorType::Glob => { + return Err(CommandError::InvalidArg(format!( + "glob patterns are not supported for name transformations: {pattern:?}" + ))); + } + PatternSelectorType::Literal => { + Box::new(LiteralTransformer::create(pattern, opts, replacement_spec)?) + } + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexpTransformer::create(pattern, opts, replacement_spec)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { + pattern_trans, + comp_sel, + }) + } + + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + SubstringAnchoring::split_then_transform_then_reformulate( + name, + move |name| ComponentSplit::split_by_component_selector(self.comp_sel, name), + |name| self.pattern_trans.replace(name), + ) + } +} + +#[derive(Debug)] +pub struct CompiledTransformer { + transformers: Vec>, +} + +impl CompiledTransformer { + fn make_single(trans: NameTransform) -> Result, CommandError> { + Ok(match trans { + NameTransform::Trivial(arg) => Box::new(Trivial::from_arg(arg)?), + NameTransform::Basic(basic_trans) => match basic_trans { + BasicTransform::StripComponents(arg) => Box::new(StripComponents::from_arg(arg)?), + BasicTransform::AddPrefix(arg) => Box::new(AddPrefix::from_arg(arg)?), + }, + NameTransform::Complex(complex_trans) => match complex_trans { + ComplexTransform::Transform(arg) => Box::new(ComponentTransformer::from_arg(arg)?), + }, + }) + } +} + +impl NameTransformer for CompiledTransformer { + type Arg = Vec where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + assert!(!arg.is_empty()); + Ok(Self { + transformers: arg + .into_iter() + .map(Self::make_single) + .collect::>()?, + }) + } + + /// Transform the name from the zip entry, maintaining a few invariants: + /// 1. If the transformations all return substrings (no prefixing, non-empty replacements, or + /// empty replacements that lead to non-contiguous input chunks), return a slice of the + /// original input, pointing back to the ZipFile's memory location with associated lifetime. + /// 2. If some intermediate transformation requires an allocation (e.g. adding a prefix), do + /// not perform intermediate reallocations for subsequent substring-only transformations. + /// - TODO: The returned string may be reallocated from the initial allocation exactly once + /// at the end, if substring-only transformations reduced its length. This is because Cow + /// can only describe a substring of the original input or an entirely new allocated + /// string, as opposed to a more general sort of string view wrapper. + fn transform_name<'s>(&self, mut original_name: &'s str) -> Cow<'s, str> { + let mut newly_allocated_name: Option = None; + let mut newly_allocated_str: Option<&str> = None; + for transformer in self.transformers.iter() { + match newly_allocated_str { + Some(s) => match transformer.transform_name(s) { + Cow::Borrowed(t) => { + let _ = newly_allocated_str.replace(t); + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_some()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + None => match transformer.transform_name(original_name) { + Cow::Borrowed(t) => { + original_name = t; + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_none()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + } + } + + if newly_allocated_name.is_none() { + /* If we have never allocated anything new, just return the substring of the original + * name! */ + Cow::Borrowed(original_name) + } else { + let subref = newly_allocated_str.unwrap(); + /* If the active substring is the same length as the backing string, assume it's + * unchanged, so we can return the backing string without reallocating. */ + if subref.len() == newly_allocated_name.as_ref().unwrap().len() { + Cow::Owned(newly_allocated_name.unwrap()) + } else { + let reallocated_string = subref.to_string(); + Cow::Owned(reallocated_string) + } + } + } +} diff --git a/cli/src/info.rs b/cli/src/info.rs new file mode 100644 index 000000000..4a206bdce --- /dev/null +++ b/cli/src/info.rs @@ -0,0 +1,167 @@ +use std::{ + fs, + io::{self, Write}, + path::PathBuf, +}; + +use zip::read::ZipArchive; + +use crate::{ + args::{extract::InputSpec, info::*}, + extract::{ + entries::{IterateEntries, StreamInput, ZipFileInput}, + matcher::{CompiledMatcher, EntryMatcher}, + receiver::EntryData, + }, + CommandError, WrapCommandErr, +}; + +mod directives; +mod formats; +use directives::{ + archive::{ + compiled::{CompiledArchiveDirective, CompiledArchiveFormat}, + ArchiveData, + }, + compiled::CompiledFormatSpec, + entry::compiled::{CompiledEntryDirective, CompiledEntryFormat}, +}; + +pub struct ArchiveWithPath { + pub path: PathBuf, + pub len: u64, + pub archive: ZipArchive, +} + +impl ArchiveWithPath { + pub fn open(path: PathBuf) -> Result { + let f = fs::File::open(&path) + .wrap_err_with(|| format!("failed to open zip input file path {:?}", &path))?; + let len = f + .metadata() + .wrap_err("failed to extract file metadata")? + .len(); + let archive = ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive from file {:?}", &path))?; + Ok(Self { path, len, archive }) + } +} + +fn format_entry_info( + mut err: impl Write, + entry_formatter: &CompiledFormatSpec, + matcher: Option<&CompiledMatcher>, + mut output_stream: impl Write, + source: &mut impl IterateEntries, +) -> Result<(), CommandError> { + if entry_formatter.is_empty() { + writeln!( + &mut err, + "empty entry format, skipping reading from any entries" + ) + .unwrap(); + return Ok(()); + } + + while let Some(entry) = source.next_entry()? { + let data = EntryData::from_entry(&entry); + if matcher.as_ref().is_some_and(|m| !m.matches(&data)) { + writeln!(&mut err, "matcher ignored entry: {:?}", data.name).unwrap(); + continue; + } + entry_formatter.execute_format(data, &mut output_stream)?; + } + Ok(()) +} + +fn format_archive_info( + mut err: impl Write, + archive_formatter: &CompiledFormatSpec, + mut output_stream: impl Write, + zip: ArchiveData, +) -> Result<(), CommandError> { + if archive_formatter.is_empty() { + writeln!(&mut err, "empty archive format, skipping archive overview").unwrap(); + return Ok(()); + } + + archive_formatter.execute_format(zip, &mut output_stream)?; + Ok(()) +} + +pub fn execute_info(mut err: impl Write, args: Info) -> Result<(), CommandError> { + let Info { + format_spec, + match_expr, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = args; + + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let (archive_formatter, entry_formatter) = match format_spec { + FormatSpec::Compact => todo!(), + FormatSpec::Extended => todo!(), + FormatSpec::Custom { overview, entry } => ( + CompiledFormatSpec::from_spec::(overview)?, + CompiledFormatSpec::from_spec::(entry)?, + ), + }; + let mut output_stream = io::stdout().lock(); + + if stdin_stream { + let mut stdin = StreamInput::new(io::stdin().lock()); + + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut stdin, + )?; + + let (stdin, num_entries) = stdin.into_inner(); + /* NB: The read_zipfile_from_stream() method overruns the size of a single local header into + * the CDE after reading the last input. There are unstable APIs to address this, but for + * now just rely on that internal knowledge. See e.g. zip::read::stream on master or + * zip::unstable::read in https://github.com/zip-rs/zip2/pull/233. */ + let cde_start = stdin.current_bytes_read() - 30; + let (_stdin, stream_length) = stdin + .exhaust() + .wrap_err("failed to exhaust all of stdin after reading all zip entries")?; + + let data = ArchiveData { + path: None, + stream_length, + num_entries, + comment: None, + first_entry_start: Some(0), + central_directory_start: Some(cde_start), + }; + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + for p in zip_paths.into_iter() { + let mut zip = ArchiveWithPath::open(p)?; + + { + let mut zip_entry_counter = ZipFileInput::new(&mut zip.archive); + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut zip_entry_counter, + )?; + } + + let data = ArchiveData::from_archive_with_path(&zip); + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + Ok(()) +} diff --git a/cli/src/info/directives.rs b/cli/src/info/directives.rs new file mode 100644 index 000000000..e4e3e5bfd --- /dev/null +++ b/cli/src/info/directives.rs @@ -0,0 +1,703 @@ +use std::{ + fmt, + io::{self, Write}, +}; + +use super::formats::FormatValue; +use crate::{ + args::info::{ParseableDirective, ParseableFormatComponent, ParseableFormatSpec}, + CommandError, WrapCommandErr, +}; + +pub trait Writeable { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error>; +} + +impl Writeable for S +where + S: fmt::Display, +{ + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + write!(out, "{}", self) + } +} + +pub trait FormatDirective { + type Data<'a>; + type FieldType: FormatValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a>; + fn value_formatter(&self) -> Self::FieldType; + + fn format_field<'a>( + &self, + data: Self::Data<'a>, + ) -> Result<::Output<'a>, ::E> + { + self.value_formatter() + .format_value(self.extract_field(data)) + } +} + +/// Wrap a [`FormatDirective`] and write it to a stream. This isn't directly type-eraseable, but it +/// removes one layer of polymorphism to enable us to do that in a subsequent wrapper trait. +pub trait DirectiveFormatter { + type Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; +} + +impl DirectiveFormatter for FD +where + FD: FormatDirective, + for<'a> <::FieldType as FormatValue>::Output<'a>: Writeable + fmt::Debug, + <::FieldType as FormatValue>::E: fmt::Display, +{ + type Data<'a> = ::Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + let output = self + .format_field(data) + .map_err(|e| CommandError::InvalidData(format!("error formatting field: {e}")))?; + output + .write_to(out) + .wrap_err_with(|| format!("failed to write output to stream: {output:?}")) + } +} + +pub mod compiled { + use super::*; + + enum CompiledFormatComponent { + Directive(F), + ContiguousLiteral(String), + } + + impl CompiledFormatComponent + where + F: DirectiveFormatter, + { + pub fn write_component<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::Directive(d) => d.write_directive(data, &mut out), + Self::ContiguousLiteral(lit) => out + .write_all(lit.as_bytes()) + .wrap_err_with(|| format!("failed to write literal {lit:?} to output")), + } + } + } + + pub trait CompiledFormat { + type Spec: ParseableDirective; + type Fmt: DirectiveFormatter; + + fn from_directive_spec(spec: Self::Spec) -> Result; + } + + pub struct CompiledFormatSpec { + components: Vec>, + } + + impl CompiledFormatSpec { + pub fn is_empty(&self) -> bool { + self.components.is_empty() + } + } + + impl CompiledFormatSpec + where + F: DirectiveFormatter, + { + pub fn from_spec( + spec: ParseableFormatSpec<::Spec>, + ) -> Result + where + CF: CompiledFormat, + { + let ParseableFormatSpec { + components: spec_components, + } = spec; + + let mut components: Vec> = Vec::new(); + for c in spec_components.into_iter() { + match c { + ParseableFormatComponent::Directive(d) => { + let d = CF::from_directive_spec(d)?; + components.push(CompiledFormatComponent::Directive(d)); + } + ParseableFormatComponent::Escaped(s) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(s); + } + _ => { + components + .push(CompiledFormatComponent::ContiguousLiteral(s.to_string())); + } + }, + ParseableFormatComponent::Literal(new_lit) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(new_lit.as_str()); + } + _ => { + components.push(CompiledFormatComponent::ContiguousLiteral(new_lit)); + } + }, + } + } + + Ok(Self { components }) + } + + pub fn execute_format<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> + where + ::Data<'a>: Clone, + { + for c in self.components.iter() { + c.write_component(data.clone(), &mut out)? + } + Ok(()) + } + } +} + +pub mod entry { + use super::{ + super::formats::{ + BinaryNumericValue, BinaryStringValue, ByteSizeValue, CompressionMethodValue, + FileTypeValue, FormatValue, NameString, OffsetValue, TimestampValue, UnixModeValue, + }, + FormatDirective, + }; + use crate::extract::receiver::EntryData; + + pub struct EntryNameField(pub NameString); + + impl FormatDirective for EntryNameField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = NameString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.name + } + fn value_formatter(&self) -> NameString { + self.0 + } + } + + pub struct FileTypeField(pub FileTypeValue); + + impl FormatDirective for FileTypeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = FileTypeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.kind + } + fn value_formatter(&self) -> FileTypeValue { + self.0 + } + } + + pub struct EntryCommentField(pub BinaryStringValue); + + impl FormatDirective for EntryCommentField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.comment.as_bytes()) + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct LocalHeaderStartField(pub OffsetValue); + + impl FormatDirective for LocalHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.local_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct ContentStartField(pub OffsetValue); + + impl FormatDirective for ContentStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct UncompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for UncompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.uncompressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct CompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for CompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct ContentEndField(pub OffsetValue); + + impl FormatDirective for ContentEndField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_end()) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralHeaderStartField(pub OffsetValue); + + impl FormatDirective for CentralHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.central_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CompressionMethodField(pub CompressionMethodValue); + + impl FormatDirective for CompressionMethodField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = CompressionMethodValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compression + } + fn value_formatter(&self) -> CompressionMethodValue { + self.0 + } + } + + pub struct UnixModeField(pub UnixModeValue); + + impl FormatDirective for UnixModeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = UnixModeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.unix_mode + } + fn value_formatter(&self) -> UnixModeValue { + self.0 + } + } + + pub struct Crc32Field(pub BinaryNumericValue); + + impl FormatDirective for Crc32Field { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryNumericValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.crc32 + } + fn value_formatter(&self) -> BinaryNumericValue { + self.0 + } + } + + pub struct TimestampField(pub TimestampValue); + + impl FormatDirective for TimestampField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = TimestampValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.last_modified_time + } + fn value_formatter(&self) -> TimestampValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::EntryFormatDirective, CommandError}; + + use std::io::Write; + + /// Used for type erasure by removing the lifetime-bounded associated type. + trait EntryDirectiveFormatter { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl EntryDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = &'a EntryData<'a>>, + { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + /// This re-implements the generic trait using the type-erased boxed vtable. + pub struct CompiledEntryDirective(Box); + + impl DirectiveFormatter for CompiledEntryDirective { + type Data<'a> = EntryData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_entry_directive(&data, out) + } + } + + pub struct CompiledEntryFormat; + + impl CompiledFormat for CompiledEntryFormat { + type Spec = EntryFormatDirective; + type Fmt = CompiledEntryDirective; + + fn from_directive_spec( + spec: EntryFormatDirective, + ) -> Result { + Ok(CompiledEntryDirective(match spec { + EntryFormatDirective::Name => Box::new(EntryNameField(NameString)), + EntryFormatDirective::FileType(f) => Box::new(FileTypeField(FileTypeValue(f))), + EntryFormatDirective::CompressedSize(f) => { + Box::new(CompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UncompressedSize(f) => { + Box::new(UncompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UnixMode(f) => Box::new(UnixModeField(UnixModeValue(f))), + EntryFormatDirective::CompressionMethod(f) => { + Box::new(CompressionMethodField(CompressionMethodValue(f))) + } + EntryFormatDirective::Comment(f) => { + Box::new(EntryCommentField(BinaryStringValue(f))) + } + EntryFormatDirective::LocalHeaderStart(f) => { + Box::new(LocalHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentStart(f) => { + Box::new(ContentStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentEnd(f) => { + Box::new(ContentEndField(OffsetValue(f))) + } + EntryFormatDirective::CentralHeaderStart(f) => { + Box::new(CentralHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::CrcValue(f) => { + Box::new(Crc32Field(BinaryNumericValue(f))) + } + EntryFormatDirective::Timestamp(f) => { + Box::new(TimestampField(TimestampValue(f))) + } + })) + } + } + } +} + +pub mod archive { + use super::{ + super::{ + formats::{ + BinaryStringValue, ByteSizeValue, DecimalNumberValue, FormatValue, OffsetValue, + PathString, + }, + ArchiveWithPath, + }, + FormatDirective, + }; + + use std::path::Path; + + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct ArchiveData<'a> { + pub path: Option<&'a Path>, + pub stream_length: u64, + pub num_entries: usize, + pub comment: Option<&'a [u8]>, + pub first_entry_start: Option, + pub central_directory_start: Option, + } + + impl<'a> ArchiveData<'a> { + pub fn from_archive_with_path(zip: &'a ArchiveWithPath) -> Self { + Self { + path: Some(zip.path.as_path()), + stream_length: zip.len, + num_entries: zip.archive.len(), + comment: Some(zip.archive.comment()), + first_entry_start: Some(zip.archive.offset()), + central_directory_start: Some(zip.archive.central_directory_start()), + } + } + } + + pub struct ArchiveNameField(pub PathString); + + impl FormatDirective for ArchiveNameField { + type Data<'a> = ArchiveData<'a>; + type FieldType = PathString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.path + } + fn value_formatter(&self) -> PathString { + self.0 + } + } + + pub struct ArchiveSizeField(pub ByteSizeValue); + + impl FormatDirective for ArchiveSizeField { + type Data<'a> = ArchiveData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.stream_length + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct NumEntriesField(pub DecimalNumberValue); + + impl FormatDirective for NumEntriesField { + type Data<'a> = ArchiveData<'a>; + type FieldType = DecimalNumberValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.num_entries.try_into().unwrap() + } + fn value_formatter(&self) -> DecimalNumberValue { + self.0 + } + } + + pub struct ArchiveCommentField(pub BinaryStringValue); + + impl FormatDirective for ArchiveCommentField { + type Data<'a> = ArchiveData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.comment + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct FirstEntryStartField(pub OffsetValue); + + impl FormatDirective for FirstEntryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.first_entry_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralDirectoryStartField(pub OffsetValue); + + impl FormatDirective for CentralDirectoryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.central_directory_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::ArchiveOverviewFormatDirective, CommandError}; + + use std::io::Write; + + trait ArchiveDirectiveFormatter { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl ArchiveDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = ArchiveData<'a>>, + { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + pub struct CompiledArchiveDirective(Box); + + impl DirectiveFormatter for CompiledArchiveDirective { + type Data<'a> = ArchiveData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_archive_directive(data, out) + } + } + + pub struct CompiledArchiveFormat; + + impl CompiledFormat for CompiledArchiveFormat { + type Spec = ArchiveOverviewFormatDirective; + type Fmt = CompiledArchiveDirective; + + fn from_directive_spec( + spec: ArchiveOverviewFormatDirective, + ) -> Result { + Ok(CompiledArchiveDirective(match spec { + ArchiveOverviewFormatDirective::ArchiveName => { + Box::new(ArchiveNameField(PathString)) + } + ArchiveOverviewFormatDirective::TotalSize(f) => { + Box::new(ArchiveSizeField(ByteSizeValue(f))) + } + ArchiveOverviewFormatDirective::NumEntries => { + Box::new(NumEntriesField(DecimalNumberValue)) + } + ArchiveOverviewFormatDirective::ArchiveComment(f) => { + Box::new(ArchiveCommentField(BinaryStringValue(f))) + } + ArchiveOverviewFormatDirective::FirstEntryStart(f) => { + Box::new(FirstEntryStartField(OffsetValue(f))) + } + ArchiveOverviewFormatDirective::CentralDirectoryStart(f) => { + Box::new(CentralDirectoryStartField(OffsetValue(f))) + } + })) + } + } + } +} diff --git a/cli/src/info/formats.rs b/cli/src/info/formats.rs new file mode 100644 index 000000000..a320fb122 --- /dev/null +++ b/cli/src/info/formats.rs @@ -0,0 +1,425 @@ +use std::{ + convert::Infallible, + fmt, + io::{self, Write}, + path, +}; + +use zip::{CompressionMethod, DateTime}; + +use super::directives::Writeable; +use crate::{args::info::*, extract::receiver::EntryKind}; + +pub trait FormatValue { + type Input<'a>; + type Output<'a>; + type E; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E>; +} + +#[derive(Copy, Clone)] +pub struct NameString; + +impl FormatValue for NameString { + type Input<'a> = &'a str; + type Output<'a> = &'a str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct PathString; + +#[derive(Debug)] +pub enum PathWriter<'a> { + Path(path::Display<'a>), + None, +} + +impl<'a> fmt::Display for PathWriter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Path(p) => path::Display::fmt(p, f), + Self::None => write!(f, ""), + } + } +} + +impl FormatValue for PathString { + type Input<'a> = Option<&'a path::Path>; + type Output<'a> = PathWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match input { + Some(p) => PathWriter::Path(p.display()), + None => PathWriter::None, + }) + } +} + +#[derive(Copy, Clone)] +pub struct FileTypeValue(pub FileTypeFormat); + +impl FormatValue for FileTypeValue { + type Input<'a> = EntryKind; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + FileTypeFormat::Full => match input { + EntryKind::File => "file", + EntryKind::Dir => "directory", + EntryKind::Symlink => "symlink", + }, + FileTypeFormat::Abbreviated => match input { + EntryKind::File => "-", + EntryKind::Dir => "d", + EntryKind::Symlink => "l", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct CompressionMethodValue(pub CompressionMethodFormat); + +impl FormatValue for CompressionMethodValue { + type Input<'a> = CompressionMethod; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + CompressionMethodFormat::Full => match input { + CompressionMethod::Stored => "stored", + CompressionMethod::Deflated => "deflate", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "deflate64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bzip2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zstd", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lzma", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "unknown", + }, + CompressionMethodFormat::Abbreviated => match input { + CompressionMethod::Stored => "stor", + CompressionMethod::Deflated => "defl", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "df64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bz2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zst", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lz", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "?", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct UnixModeValue(pub UnixModeFormat); + +impl UnixModeValue { + const S_IRUSR: u32 = 256; + const S_IWUSR: u32 = 128; + const S_IXUSR: u32 = 64; + + const S_IRGRP: u32 = 32; + const S_IWGRP: u32 = 16; + const S_IXGRP: u32 = 8; + + const S_IROTH: u32 = 4; + const S_IWOTH: u32 = 2; + const S_IXOTH: u32 = 1; + + const UNKNOWN_MODE_BITS: [u8; 9] = [b'?'; 9]; + + fn pretty_format_mode_bits(mode: u32) -> [u8; 9] { + let mut ret = [b'-'; 9]; + + if mode & Self::S_IRUSR == Self::S_IRUSR { + ret[0] = b'r'; + } + if mode & Self::S_IWUSR == Self::S_IWUSR { + ret[1] = b'w'; + } + if mode & Self::S_IXUSR == Self::S_IXUSR { + ret[2] = b'x'; + } + + if mode & Self::S_IRGRP == Self::S_IRGRP { + ret[3] = b'r'; + } + if mode & Self::S_IWGRP == Self::S_IWGRP { + ret[4] = b'w'; + } + if mode & Self::S_IXGRP == Self::S_IXGRP { + ret[5] = b'x'; + } + + if mode & Self::S_IROTH == Self::S_IROTH { + ret[6] = b'r'; + } + if mode & Self::S_IWOTH == Self::S_IWOTH { + ret[7] = b'w'; + } + if mode & Self::S_IXOTH == Self::S_IXOTH { + ret[8] = b'x'; + } + + ret + } +} + +#[derive(Debug)] +pub enum ModeValueWriter { + Octal(Option), + Pretty([u8; 9]), +} + +impl Writeable for ModeValueWriter { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::Octal(mode) => match mode { + Some(bits) => write!(out, "{:o}", bits), + None => write!(out, "?"), + }, + Self::Pretty(bits) => out.write_all(bits.as_ref()), + } + } +} + +impl FormatValue for UnixModeValue { + type Input<'a> = Option; + type Output<'a> = ModeValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + UnixModeFormat::Octal => ModeValueWriter::Octal(input), + UnixModeFormat::Pretty => ModeValueWriter::Pretty(match input { + Some(bits) => Self::pretty_format_mode_bits(bits), + None => Self::UNKNOWN_MODE_BITS, + }), + }) + } +} + +#[derive(Copy, Clone)] +pub struct ByteSizeValue(pub ByteSizeFormat); + +#[derive(Debug)] +pub enum ByteSizeWriter { + FullDecimal(u64), +} + +impl fmt::Display for ByteSizeWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::FullDecimal(n) => write!(f, "{}", n), + } + } +} + +impl FormatValue for ByteSizeValue { + type Input<'a> = u64; + type Output<'a> = ByteSizeWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + ByteSizeFormat::FullDecimal => ByteSizeWriter::FullDecimal(input), + ByteSizeFormat::HumanAbbreviated => todo!("human abbreviated byte sizes"), + }) + } +} + +#[derive(Copy, Clone)] +pub struct DecimalNumberValue; + +impl FormatValue for DecimalNumberValue { + type Input<'a> = u64; + type Output<'a> = u64; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct OffsetValue(pub OffsetFormat); + +#[derive(Debug)] +pub enum OffsetWriter { + Unknown, + Decimal(u64), + Hexadecimal(u64), +} + +impl fmt::Display for OffsetWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Unknown => write!(f, "?"), + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for OffsetValue { + type Input<'a> = Option; + type Output<'a> = OffsetWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(OffsetWriter::Unknown), + Some(input) => input, + }; + Ok(match self.0 { + OffsetFormat::Decimal => OffsetWriter::Decimal(input), + OffsetFormat::Hexadecimal => OffsetWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryNumericValue(pub BinaryNumericValueFormat); + +#[derive(Debug)] +pub enum BinaryNumericValueWriter { + Decimal(u32), + Hexadecimal(u32), +} + +impl fmt::Display for BinaryNumericValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for BinaryNumericValue { + type Input<'a> = u32; + type Output<'a> = BinaryNumericValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + BinaryNumericValueFormat::Decimal => BinaryNumericValueWriter::Decimal(input), + BinaryNumericValueFormat::Hexadecimal => BinaryNumericValueWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryStringValue(pub BinaryStringFormat); + +#[derive(Debug)] +pub enum BinaryStringWriter<'a> { + ReplaceNonUnicode(&'a [u8]), + EscapeAscii(&'a [u8]), + WriteExactly(&'a [u8]), +} + +impl<'a> BinaryStringWriter<'a> { + const INVALID_CHUNK_BUFS: [&'static str; 4] = ["", "�", "��", "���"]; +} + +impl<'a> Writeable for BinaryStringWriter<'a> { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::ReplaceNonUnicode(s) => { + for chunk in s.utf8_chunks() { + write!(out, "{}", chunk.valid())?; + /* The length of invalid bytes is never longer than 3. */ + write!(out, "{}", Self::INVALID_CHUNK_BUFS[chunk.invalid().len()])?; + } + Ok(()) + } + Self::EscapeAscii(s) => { + if s.is_empty() { + return write!(out, "\"\""); + } + write!(out, "\" ")?; + for b in s.iter().copied() { + write!(out, "{} ", b.escape_ascii())?; + } + write!(out, "\"")?; + Ok(()) + } + Self::WriteExactly(s) => out.write_all(s), + } + } +} + +impl FormatValue for BinaryStringValue { + type Input<'a> = Option<&'a [u8]>; + type Output<'a> = BinaryStringWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = input.unwrap_or(&[]); + Ok(match self.0 { + BinaryStringFormat::PrintAsString => BinaryStringWriter::ReplaceNonUnicode(input), + BinaryStringFormat::EscapeAscii => BinaryStringWriter::EscapeAscii(input), + BinaryStringFormat::WriteBinaryContents => BinaryStringWriter::WriteExactly(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct TimestampValue(pub TimestampFormat); + +#[derive(Debug)] +pub enum TimestampValueWriter { + None, + DateOnly(DateTime), + TimeOnly(DateTime), + DateAndTime(DateTime), +} + +impl fmt::Display for TimestampValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::None => write!(f, "?"), + Self::DateOnly(d) => write!(f, "{}-{}-{}", d.year(), d.month(), d.day()), + Self::TimeOnly(t) => write!(f, "{}:{}:{}", t.hour(), t.minute(), t.second()), + Self::DateAndTime(dt) => { + write!( + f, + "{}-{}-{} {}:{}:{}", + dt.year(), + dt.month(), + dt.day(), + dt.hour(), + dt.minute(), + dt.second() + ) + } + } + } +} + +impl FormatValue for TimestampValue { + type Input<'a> = Option; + type Output<'a> = TimestampValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(TimestampValueWriter::None), + Some(input) => input, + }; + Ok(match self.0 { + TimestampFormat::DateOnly => TimestampValueWriter::DateOnly(input), + TimestampFormat::TimeOnly => TimestampValueWriter::TimeOnly(input), + TimestampFormat::DateAndTime => TimestampValueWriter::DateAndTime(input), + }) + } +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 000000000..10cb0c0b4 --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,178 @@ +//! ??? + +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +use std::{fs, io}; + +pub mod args; +pub mod compress; +pub mod extract; +pub mod info; +pub mod print; +pub mod schema; + +pub enum ErrHandle { + Output(W), + NoOutput, +} + +impl io::Write for ErrHandle +where + W: io::Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::Output(w) => w.write(buf), + Self::NoOutput => Ok(buf.len()), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::Output(w) => w.flush(), + Self::NoOutput => Ok(()), + } + } +} + +pub enum OutputHandle { + File(fs::File), + InMem(io::Cursor>), +} + +impl io::Read for OutputHandle { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::File(f) => f.read(buf), + Self::InMem(c) => c.read(buf), + } + } +} + +impl io::Write for OutputHandle { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::File(f) => f.write(buf), + Self::InMem(c) => c.write(buf), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::File(f) => f.flush(), + Self::InMem(c) => c.flush(), + } + } +} + +impl io::Seek for OutputHandle { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match self { + Self::File(f) => f.seek(pos), + Self::InMem(c) => c.seek(pos), + } + } +} + +#[derive(Debug)] +pub enum CommandError { + InvalidArg(String), + InvalidData(String), + Io(String, io::Error), + Zip(String, zip::result::ZipError), +} + +pub trait WrapCommandErr: Sized { + fn wrap_err(self, context: &str) -> Result { + self.wrap_err_with(|| context.to_string()) + } + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result; +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Io(f(), e)) + } +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Zip(f(), e)) + } +} + +pub mod driver { + use std::env; + use std::io::{self, Write}; + use std::process; + + use super::args::{ArgParseError, CommandFormat, ZipCli, ZipCommand}; + use super::{CommandError, ErrHandle}; + + pub trait ExecuteCommand: CommandFormat { + fn execute(self, err: impl Write) -> Result<(), CommandError>; + + fn do_main(self, mut err: impl Write) -> ! + where + Self: Sized, + { + writeln!(&mut err, "{} args: {:?}", Self::COMMAND_NAME, &self).unwrap(); + match self.execute(err) { + Ok(()) => process::exit(ZipCli::NON_FAILURE_EXIT_CODE), + Err(e) => match e { + CommandError::InvalidArg(msg) => { + let msg = Self::generate_brief_help_text(&msg); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::InvalidData(msg) => { + let msg = format!("error processing zip data: {msg}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::Io(context, e) => { + let msg = format!("i/o error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + CommandError::Zip(context, e) => { + let msg = format!("zip error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + }, + } + } + } + + pub fn main() { + let ZipCli { verbose, command } = match ZipCli::parse_argv(env::args_os()) { + Ok(cli) => cli, + Err(e) => match e { + ArgParseError::StdoutMessage(msg) => { + io::stdout() + .write_all(msg.as_bytes()) + .expect("couldn't write message to stdout"); + process::exit(ZipCli::NON_FAILURE_EXIT_CODE); + } + ArgParseError::StderrMessage(msg) => { + /* If we can't write anything to stderr, no use aborting, so just exit. */ + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + }, + }; + let err = if verbose { + ErrHandle::Output(io::stderr()) + } else { + ErrHandle::NoOutput + }; + + match command { + ZipCommand::Info(info) => info.do_main(err), + ZipCommand::Extract(extract) => extract.do_main(err), + ZipCommand::Compress(compress) => compress.do_main(err), + /* TODO: ZipCommand::Crawl! */ + } + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/cli/src/print.rs b/cli/src/print.rs new file mode 100644 index 000000000..09b5263d3 --- /dev/null +++ b/cli/src/print.rs @@ -0,0 +1,66 @@ +pub mod printer { + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] + pub struct PrintOptions { + pub wrap_width: usize, + } + + impl Default for PrintOptions { + fn default() -> Self { + Self { wrap_width: 80 } + } + } + + #[derive(Debug, Clone)] + struct PrintContext { + prefix: String, + value_column: Option, + } + + impl PrintContext { + pub const fn new() -> Self { + Self { + prefix: String::new(), + value_column: None, + } + } + } + + pub struct Printer { + opts: PrintOptions, + ctx: PrintContext, + } + + impl Printer { + pub const fn create(opts: PrintOptions) -> Self { + Self { + opts, + ctx: PrintContext::new(), + } + } + } +} + +pub trait HelpCase { + fn value(&self) -> &str; + fn description(&self) -> String; + fn extended_description_with_caveats_and_defaults(&self) -> Option; +} + +pub trait HelpSection { + fn name(&self) -> &str; + fn id(&self) -> &str; + fn description(&self) -> Option; + fn extended_description_with_caveats_and_defaults(&self) -> Option; + fn cases(&self) -> Vec>; + fn post(&self) -> Option; +} + +pub struct FlagsSection {} + +pub struct StringPattern {} + +pub enum HelpVerbosity { + NameOnly, + NameAndDescription, + CompleteWithCaveats, +} diff --git a/cli/src/schema.rs b/cli/src/schema.rs new file mode 100644 index 000000000..04df1ec2c --- /dev/null +++ b/cli/src/schema.rs @@ -0,0 +1,208 @@ +pub mod backends { + pub trait Backend { + type Str<'a>; + type Val<'a>; + type Err<'a>; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>>; + /* fn print(v: Self::Val) -> Self::Str; */ + } + + #[cfg(feature = "json")] + pub mod json_backend { + pub struct JsonBackend; + + impl super::Backend for JsonBackend { + type Str<'a> = &'a str; + type Val<'a> = json::JsonValue; + type Err<'a> = json::Error; + + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { + json::parse(s) + } + /* fn print(v: json::JsonValue) -> String { */ + /* v.pretty(2) */ + /* } */ + } + } +} + +pub mod values; + +pub mod transformers { + use super::backends::Backend; + + use std::{error, ffi, fmt, io, marker::PhantomData, str}; + + pub trait Transformer { + type A<'a>; + type B<'a>; + type Error<'a>; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error<'a>>; + } + + pub struct StrTransformer; + + impl Transformer for StrTransformer { + type A<'a> = &'a ffi::OsStr; + type B<'a> = &'a str; + type Error<'a> = str::Utf8Error; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error<'a>> { + s.try_into() + } + } + + pub struct DecoderTransformer(PhantomData<(T, B)>); + + impl DecoderTransformer { + pub const fn new() -> Self { + Self(PhantomData) + } + } + + #[derive(Debug, Clone, PartialEq, Eq)] + pub enum WrapperError { + In(In), + Out(Out), + } + + impl fmt::Display for WrapperError + where + In: fmt::Display, + Out: fmt::Display, + { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::In(e) => e.fmt(f), + Self::Out(e) => e.fmt(f), + } + } + } + + impl error::Error for WrapperError + where + In: error::Error, + Out: error::Error, + { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self { + Self::In(e) => e.source(), + Self::Out(e) => e.source(), + } + } + } + + impl Backend for DecoderTransformer + where + T: Transformer, + for<'a> B: Backend = ::B<'a>>, + { + type Str<'a> = ::A<'a>; + type Val<'a> = ::Val<'a>; + + type Err<'a> = WrapperError<::Error<'a>, ::Err<'a>>; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { + let s: ::B<'a> = + ::convert_input(s).map_err(|e| WrapperError::In(e))?; + ::parse(s).map_err(|e| WrapperError::Out(e)) + } + + /* fn write(v: &::Value, w: impl io::Write) -> io::Result<()> { */ + /* ::write(v, w) */ + /* } */ + } +} + +#[cfg(test)] +mod test { + use super::{backends::Backend, *}; + use std::{ffi, io}; + + struct BoolBackend; + impl Backend for BoolBackend { + type Str<'a> = &'a str; + type Val<'a> = bool; + type Err<'a> = &'a str; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { + match s { + "true" => Ok(true), + "false" => Ok(false), + e => Err(e), + } + } + /* fn write(v: &bool, mut w: impl io::Write) -> io::Result<()> { */ + /* match v { */ + /* true => w.write_all(b"true"), */ + /* false => w.write_all(b"false"), */ + /* } */ + /* } */ + } + + #[test] + fn parse_bool() { + assert!(BoolBackend::parse("true").unwrap()); + assert!(!BoolBackend::parse("false").unwrap()); + assert_eq!(BoolBackend::parse("").err().unwrap(), ""); + assert_eq!(BoolBackend::parse("aaaaasdf").err().unwrap(), "aaaaasdf"); + } + + #[cfg(unix)] + mod unix { + use std::{ffi, os::unix::ffi::OsStrExt}; + + pub fn broken_utf8() -> &'static ffi::OsStr { + // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + // respectively. The value 0x80 is a lone continuation byte, invalid + // in a UTF-8 sequence. + ffi::OsStr::from_bytes(&[0x66, 0x6f, 0x80, 0x6f]) + } + } + #[cfg(windows)] + mod windows { + use std::{ffi, os::windows::ffi::OsStringExt}; + + pub fn broken_utf8() -> ffi::OsString { + // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + // respectively. The value 0xD800 is a lone surrogate half, invalid + // in a UTF-16 sequence. + ffi::OsString::from_wide(&[0x0066, 0x006f, 0xD800, 0x006f]) + } + } + fn broken_utf8() -> std::ffi::OsString { + #[cfg(unix)] + let broken = unix::broken_utf8().to_os_string(); + #[cfg(windows)] + let broken = windows::broken_utf8(); + + broken + } + + #[test] + fn utf8_parse_failure() { + let broken = broken_utf8(); + assert!(broken.to_str().is_none()); + } + + #[test] + fn str_wrapper() { + use std::str; + use transformers::{DecoderTransformer, StrTransformer, WrapperError}; + type Wrapper = DecoderTransformer; + + assert!(Wrapper::parse(ffi::OsStr::new("true")).unwrap()); + assert!(!Wrapper::parse(ffi::OsStr::new("false")).unwrap()); + assert_eq!( + Wrapper::parse(ffi::OsStr::new("")).err().unwrap(), + WrapperError::Out("") + ); + assert_eq!( + Wrapper::parse(ffi::OsStr::new("aaaaasdf")).err().unwrap(), + WrapperError::Out("aaaaasdf") + ); + + let broken = broken_utf8(); + assert_eq!( + Wrapper::parse(broken.as_ref()).err().unwrap(), + WrapperError::In(str::from_utf8(broken.as_encoded_bytes()).err().unwrap()), + ); + } +} diff --git a/cli/src/schema/cli.rs b/cli/src/schema/cli.rs new file mode 100644 index 000000000..d6241a404 --- /dev/null +++ b/cli/src/schema/cli.rs @@ -0,0 +1,75 @@ +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionName(String); + +impl SectionName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_uppercase())); + Self(name) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MetaVarName(String); + +impl MetaVarName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_lowercase() || c == '-')); + Self(name) + } +} + +pub trait MetaVar { + fn choices(&self) -> Option>; +} + +pub enum FormatCaseElement { + FormatRef(MetaVarName), + Literal(String), +} + +pub struct FormatCase { + pub elements: Vec, + pub description: Option, +} + +pub enum MetaVarKind { + /* e.g. */ + NameOnly(String), + Format { cases: Vec }, +} + +pub struct MetaVarDecl { + pub id: MetaVarName, + pub spec: MetaVarKind, +} + +pub struct FlagSuffixCase { + pub prefix_marker: &'static str, + pub format: MetaVarName, +} + +pub struct Flag { + pub short: Option, + pub long: String, + pub suffix_cases: Vec, + pub value: Option, +} + +pub enum FlagCaseElement { + SectionRef(SectionName), + Literal(Flag), + Optional(Box), +} + +pub struct FlagCase { + pub elements: Vec, +} + +pub struct FlagsSectionDecl { + pub id: SectionName, + pub cases: Vec, +} diff --git a/cli/src/schema/values.rs b/cli/src/schema/values.rs new file mode 100644 index 000000000..511500a6b --- /dev/null +++ b/cli/src/schema/values.rs @@ -0,0 +1,156 @@ +use super::backends::Backend; + +/* pub trait SchemaValue: Sized { */ +/* type DeserErr; */ +/* fn serialize(self) -> ::Value; */ +/* fn deserialize(s: ::Value) -> Result; */ +/* } */ + +pub trait NamedList { + fn f(self); +} + +/* pub enum Schema { */ +/* Bool, */ +/* Str, */ +/* Arr, */ +/* Obj, */ +/* Arr(Vec>) */ +/* Str(String), */ +/* Arr(Vec>), */ +/* Obj(Vec<(String, Box)>), */ +/* } */ + +/* pub trait Schema {} */ + +/* pub enum Command { */ +/* /// Write a JSON object to stdout which contains all the file paths under */ +/* /// the top-level `paths`. */ +/* Crawl { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* }, */ +/* /// Consume a JSON object from [`Self::Crawl`] over stdin and write those */ +/* /// files into a zip file at `output`. */ +/* Zip { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* }, */ +/* /// Merge the content of several zip files into one. */ +/* Merge { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* /// ??? */ +/* #[command(flatten)] */ +/* mtime_behavior: ModifiedTimeBehavior, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* /// Perform a `crawl` and then a `zip` on its output in memory. */ +/* CrawlZip { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* }, */ +/* /// Perform a `zip` and then a `merge` without releasing the output file */ +/* /// handle. */ +/* ZipMerge { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* /// Perform `crawl`, then a `zip` on its output in memory, then a `merge` */ +/* /// into the same output file. */ +/* CrawlZipMerge { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* } */ + +pub enum HydratedValue<'a> { + Bool(bool), + Str(&'a str), + Arr(Vec>>), + Obj(Vec<(&'a str, Box>)>), +} + +pub trait Hydrate { + fn hydrate(v: HydratedValue) -> Value; +} + +pub trait Schema: Backend { + fn print<'a>(v: HydratedValue<'a>) -> ::Val<'a>; +} + +#[cfg(feature = "json")] +pub mod json_value { + use super::*; + use crate::schema::backends::json_backend::JsonBackend; + + /* impl SchemaValue for bool { */ + /* type DeserErr = String; */ + + /* fn serialize(self) -> json::JsonValue { */ + /* json::JsonValue::Boolean(self) */ + /* } */ + /* fn deserialize(s: json::JsonValue) -> Result { */ + /* match s { */ + /* json::JsonValue::Boolean(value) => Ok(value), */ + /* s => Err(format!("non-boolean value {s}")), */ + /* } */ + /* } */ + /* } */ + + /* impl SchemaValue for String { */ + /* type DeserErr = String; */ + + /* fn serialize(self) -> json::JsonValue { */ + /* json::JsonValue::String(self) */ + /* } */ + /* fn deserialize(s: json::JsonValue) -> Result { */ + /* match s { */ + /* json::JsonValue::String(value) => Ok(value), */ + /* s => Err(format!("non-string value {s}")), */ + /* } */ + /* } */ + /* } */ +} + +/* pub enum SchemaValue { */ +/* Bool(bool), */ +/* Path(PathBuf), */ +/* } */ + +/* pub trait SchemaValue {} */ + +/* impl SchemaValue for bool {} */ diff --git a/src/compression.rs b/src/compression.rs index 83a7669bd..02c264641 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -10,7 +10,7 @@ use std::{fmt, io}; /// /// When creating ZIP files, you may choose the method to use with /// [`crate::write::FileOptions::compression_method`] -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[non_exhaustive] pub enum CompressionMethod { diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index 1cc0f1de4..0cf794c3c 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -4,7 +4,7 @@ use std::io::Read; /// extended timestamp, as described in -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtendedTimestamp { mod_time: Option, ac_time: Option, diff --git a/src/types.rs b/src/types.rs index de22f6055..3b43c3903 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1051,7 +1051,7 @@ pub enum AesVendorVersion { } /// AES variant used. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[repr(u8)] pub enum AesMode { diff --git a/src/write.rs b/src/write.rs index 8d077b595..8cd8d5f1b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -15,13 +15,12 @@ use crate::types::{ ZipRawValues, MIN_VERSION, }; use crate::write::ffi::S_IFLNK; -#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] -use core::num::NonZeroU64; use crc32fast::Hasher; use indexmap::IndexMap; use std::borrow::ToOwned; use std::default::Default; use std::fmt::{Debug, Formatter}; +use std::hash; use std::io; use std::io::prelude::*; use std::io::Cursor; @@ -234,6 +233,20 @@ pub(crate) enum EncryptWith<'k> { ZipCrypto(ZipCryptoKeys, PhantomData<&'k ()>), } +impl hash::Hash for EncryptWith<'_> { + fn hash(&self, state: &mut H) { + match self { + Self::Aes {mode, password} => { + mode.hash(state); + password.hash(state); + } + Self::ZipCrypto(keys, _ph) => { + keys.hash(state); + } + } + } +} + #[cfg(fuzzing)] impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { @@ -253,7 +266,8 @@ impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { } /// Metadata for a file to be written -#[derive(Clone, Debug, Copy, Eq, PartialEq)] +/* TODO: add accessors for this data as well so options can be introspected! */ +#[derive(Clone, Debug, Copy, Eq, PartialEq, Hash)] pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) compression_method: CompressionMethod, pub(crate) compression_level: Option, @@ -780,6 +794,8 @@ impl ZipWriter { } } +/* TODO: consider a ZipWriter which works with just a Write bound to support streaming output? This + * would require some work, but is possible in the protocol. */ impl ZipWriter { /// Initializes the archive. /// @@ -1441,6 +1457,7 @@ impl ZipWriter { /// implementations may materialize a symlink as a regular file, possibly with the /// content incorrectly set to the symlink target. For maximum portability, consider /// storing a regular file instead. + /* TODO: support OsStr instead of just str, for non-unicode paths. */ pub fn add_symlink( &mut self, name: N, @@ -1654,7 +1671,7 @@ impl GenericZipWriter { let best_non_zopfli = Compression::best().level(); if level > best_non_zopfli { let options = Options { - iteration_count: NonZeroU64::try_from( + iteration_count: core::num::NonZeroU64::try_from( (level - best_non_zopfli) as u64, ) .unwrap(),