OrlovEvgeny
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/release.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎Cargo.toml‎
Lines changed: 4 additions & 5 deletions b/‎Cargo.toml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎src/cli.rs‎
Lines changed: 22 additions & 23 deletions b/‎src/cli.rs‎
Lines changed: 22 additions & 23 deletions
diff --git a/‎src/commands/compact.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/commands/compact.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/commands/convert.rs‎
Lines changed: 4 additions & 4 deletions b/‎src/commands/convert.rs‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/commands/inspect.rs‎
Lines changed: 3 additions & 2 deletions b/‎src/commands/inspect.rs‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/commands/sample.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/commands/sample.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/commands/slice.rs‎
Lines changed: 3 additions & 2 deletions b/‎src/commands/slice.rs‎
Lines changed: 3 additions & 2 deletions
@@ -238,8 +238,13 @@ jobs:
 
       - uses: dtolnay/rust-toolchain@stable
 
+      - name: Set version from tag
+        run: |
+          VERSION="${GITHUB_REF_NAME#v}"
+          sed -i "0,/^version = .*/s//version = \"${VERSION}\"/" Cargo.toml
+
       - name: Publish
-        run: cargo publish
+        run: cargo publish --allow-dirty
         env:
           CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
 
 
@@ -1,13 +1,13 @@
 [package]
 name = "pq-parquet"
-version = "0.1.0"
+version = "1.0.3"
 edition = "2024"
-description = "The jq of Parquet. Inspect, transform, and operate on Parquet files from your terminal."
+description = "The jq of Parquet. Inspect, transform, and operate on Parquet files from your terminal. S3, GCS, Azure support. CLI tool."
 repository = "https://github.com/OrlovEvgeny/pq"
 license = "MIT"
 readme = "README.md"
-keywords = ["parquet", "cli", "arrow", "data", "analytics"]
-categories = ["command-line-utilities"]
+keywords = ["parquet", "cli", "data-engineering", "jq", "arrow"]
+categories = ["command-line-utilities", "database"]
 
 [[bin]]
 name = "pq"
@@ -52,7 +52,6 @@ bytesize = { version = "2", features = ["serde"] }
 num-format = "0.4"
 regex = "1"
 rand = "0.9"
-rayon = "1"
 
 tempfile = "3"
 
 
@@ -2,14 +2,33 @@ use clap::{Parser, Subcommand, ValueEnum};
 
 #[derive(Debug, Parser)]
 #[command(name = "pq", version, about = "The jq of Parquet")]
-#[command(args_conflicts_with_subcommands = true)]
 pub struct Cli {
     #[command(flatten)]
     pub global: GlobalArgs,
 
     #[command(subcommand)]
     pub command: Option<Command>,
 
+    /// Show all columns (no truncation)
+    #[arg(long)]
+    pub all: bool,
+
+    /// Only print schema, skip file metadata
+    #[arg(long)]
+    pub schema_only: bool,
+
+    /// Only print file metadata, skip schema
+    #[arg(long)]
+    pub meta_only: bool,
+
+    /// Show raw Parquet metadata (thrift-level detail)
+    #[arg(long)]
+    pub raw: bool,
+
+    /// Sort columns by: name, type, encoding, size, nulls
+    #[arg(short, long)]
+    pub sort: Option<String>,
+
     /// Files to inspect (when no subcommand is given)
     #[arg(trailing_var_arg = true)]
     pub files: Vec<String>,
@@ -412,22 +431,6 @@ pub struct ConvertArgs {
     #[arg(long)]
     pub schema: Option<String>,
 
-    /// strftime format for timestamp parsing
-    #[arg(long)]
-    pub timestamp_format: Option<String>,
-
-    /// Treat comma as decimal separator
-    #[arg(long)]
-    pub decimal_comma: bool,
-
-    /// Comma-separated null representations
-    #[arg(long)]
-    pub null_values: Option<String>,
-
-    /// CSV has header row
-    #[arg(long)]
-    pub header: Option<bool>,
-
     /// CSV lacks header row
     #[arg(long)]
     pub no_header: bool,
@@ -436,9 +439,9 @@ pub struct ConvertArgs {
     #[arg(long)]
     pub delimiter: Option<char>,
 
-    /// Columns with <= N unique values get dictionary encoding (default: 10000)
+    /// Enable dictionary encoding
     #[arg(long)]
-    pub dictionary_threshold: Option<usize>,
+    pub dictionary: bool,
 }
 
 #[derive(Debug, clap::Args)]
@@ -559,10 +562,6 @@ pub struct SizeArgs {
     #[arg(long)]
     pub top: Option<usize>,
 
-    /// Human-readable sizes (default)
-    #[arg(long)]
-    pub human: bool,
-
     /// Show exact byte counts
     #[arg(long)]
     pub bytes: bool,
 
@@ -119,7 +119,7 @@ pub fn execute(args: &CompactArgs, output: &mut OutputConfig) -> miette::Result<
         for &i in indices {
             let file_size = sources[i].file_size();
 
-            if file_size >= min_file_size && current_group.is_empty() {
+            if file_size >= min_file_size {
                 continue;
             }
 
 
@@ -216,7 +216,7 @@ fn csv_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Resu
     let mut props_builder =
         parquet::file::properties::WriterProperties::builder().set_compression(compression);
 
-    if args.dictionary_threshold.is_some() {
+    if args.dictionary {
         props_builder = props_builder.set_dictionary_enabled(true);
     }
 
@@ -267,7 +267,7 @@ fn json_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Res
     let mut props_builder =
         parquet::file::properties::WriterProperties::builder().set_compression(compression);
 
-    if args.dictionary_threshold.is_some() {
+    if args.dictionary {
         props_builder = props_builder.set_dictionary_enabled(true);
     }
 
@@ -367,7 +367,7 @@ fn parquet_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::
     if let Some(rg_size) = args.row_group_size {
         props_builder = props_builder.set_max_row_group_size(rg_size);
     }
-    if args.dictionary_threshold.is_some() {
+    if args.dictionary {
         props_builder = props_builder.set_dictionary_enabled(true);
     }
     let props = props_builder.build();
@@ -402,7 +402,7 @@ fn arrow_ipc_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette
     if let Some(rg_size) = args.row_group_size {
         props_builder = props_builder.set_max_row_group_size(rg_size);
     }
-    if args.dictionary_threshold.is_some() {
+    if args.dictionary {
         props_builder = props_builder.set_dictionary_enabled(true);
     }
     let props = props_builder.build();
 
@@ -430,8 +430,9 @@ fn build_metadata_pairs(
         let kv_str = kv_meta
             .iter()
             .map(|(k, v)| {
-                let truncated = if !verbose && v.len() > 60 {
-                    format!("{}...", &v[..57])
+                let truncated = if !verbose && v.chars().count() > 60 {
+                    let t: String = v.chars().take(57).collect();
+                    format!("{}...", t)
                 } else {
                     v.clone()
                 };
 
@@ -89,8 +89,8 @@ pub fn execute(args: &SampleArgs, output: &mut OutputConfig) -> miette::Result<(
             }
 
             if !batch_indices.is_empty() {
-                let indices_array = arrow::array::UInt32Array::from(
-                    batch_indices.iter().map(|&i| i as u32).collect::<Vec<_>>(),
+                let indices_array = arrow::array::UInt64Array::from(
+                    batch_indices.iter().map(|&i| i as u64).collect::<Vec<_>>(),
                 );
 
                 let columns: Vec<arrow::array::ArrayRef> = batch
 
@@ -632,8 +632,9 @@ fn execute_split_by(args: &SliceArgs, output: &OutputConfig, col_name: &str) ->
             let columns: Vec<arrow::array::ArrayRef> = batch
                 .columns()
                 .iter()
-                .map(|col| arrow::compute::take(col.as_ref(), &indices, None).expect("take failed"))
-                .collect();
+                .map(|col| arrow::compute::take(col.as_ref(), &indices, None))
+                .collect::<Result<Vec<_>, _>>()
+                .map_err(|e| miette::miette!("take error: {}", e))?;
 
             let taken_batch = RecordBatch::try_new(schema.clone(), columns)
                 .map_err(|e| miette::miette!("error creating batch: {}", e))?;
Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,7 @@ pub fn execute(args: &CompactArgs, output: &mut OutputConfig) -> miette::Result<`
`119`	`119`	`for &i in indices {`
`120`	`120`	`let file_size = sources[i].file_size();`
`121`	`121`
`122`		`- if file_size >= min_file_size && current_group.is_empty() {`
	`122`	`+ if file_size >= min_file_size {`
`123`	`123`	`continue;`
`124`	`124`	`}`
`125`	`125`
Original file line number	Diff line number	Diff line change
`@@ -216,7 +216,7 @@ fn csv_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Resu`
`216`	`216`	`let mut props_builder =`
`217`	`217`	`parquet::file::properties::WriterProperties::builder().set_compression(compression);`
`218`	`218`
`219`		`- if args.dictionary_threshold.is_some() {`
	`219`	`+ if args.dictionary {`
`220`	`220`	`props_builder = props_builder.set_dictionary_enabled(true);`
`221`	`221`	`}`
`222`	`222`
`@@ -267,7 +267,7 @@ fn json_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Res`
`267`	`267`	`let mut props_builder =`
`268`	`268`	`parquet::file::properties::WriterProperties::builder().set_compression(compression);`
`269`	`269`
`270`		`- if args.dictionary_threshold.is_some() {`
	`270`	`+ if args.dictionary {`
`271`	`271`	`props_builder = props_builder.set_dictionary_enabled(true);`
`272`	`272`	`}`
`273`	`273`
`@@ -367,7 +367,7 @@ fn parquet_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::`
`367`	`367`	`if let Some(rg_size) = args.row_group_size {`
`368`	`368`	`props_builder = props_builder.set_max_row_group_size(rg_size);`
`369`	`369`	`}`
`370`		`- if args.dictionary_threshold.is_some() {`
	`370`	`+ if args.dictionary {`
`371`	`371`	`props_builder = props_builder.set_dictionary_enabled(true);`
`372`	`372`	`}`
`373`	`373`	`let props = props_builder.build();`
`@@ -402,7 +402,7 @@ fn arrow_ipc_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette`
`402`	`402`	`if let Some(rg_size) = args.row_group_size {`
`403`	`403`	`props_builder = props_builder.set_max_row_group_size(rg_size);`
`404`	`404`	`}`
`405`		`- if args.dictionary_threshold.is_some() {`
	`405`	`+ if args.dictionary {`
`406`	`406`	`props_builder = props_builder.set_dictionary_enabled(true);`
`407`	`407`	`}`
`408`	`408`	`let props = props_builder.build();`
Original file line number	Diff line number	Diff line change
`@@ -89,8 +89,8 @@ pub fn execute(args: &SampleArgs, output: &mut OutputConfig) -> miette::Result<(`
`89`	`89`	`}`
`90`	`90`
`91`	`91`	`if !batch_indices.is_empty() {`
`92`		`- let indices_array = arrow::array::UInt32Array::from(`
`93`		`- batch_indices.iter().map(\|&i\| i as u32).collect::<Vec<_>>(),`
	`92`	`+ let indices_array = arrow::array::UInt64Array::from(`
	`93`	`+ batch_indices.iter().map(\|&i\| i as u64).collect::<Vec<_>>(),`
`94`	`94`	`);`
`95`	`95`
`96`	`96`	`let columns: Vec<arrow::array::ArrayRef> = batch`