Skip to content

Commit dd5f6ae

Browse files
committed
fix: clap global flag in inspect
1 parent b37bb24 commit dd5f6ae

File tree

16 files changed

+219
-143
lines changed

16 files changed

+219
-143
lines changed

.github/workflows/release.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,13 @@ jobs:
238238

239239
- uses: dtolnay/rust-toolchain@stable
240240

241+
- name: Set version from tag
242+
run: |
243+
VERSION="${GITHUB_REF_NAME#v}"
244+
sed -i "0,/^version = .*/s//version = \"${VERSION}\"/" Cargo.toml
245+
241246
- name: Publish
242-
run: cargo publish
247+
run: cargo publish --allow-dirty
243248
env:
244249
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
245250

Cargo.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
[package]
22
name = "pq-parquet"
3-
version = "0.1.0"
3+
version = "1.0.3"
44
edition = "2024"
5-
description = "The jq of Parquet. Inspect, transform, and operate on Parquet files from your terminal."
5+
description = "The jq of Parquet. Inspect, transform, and operate on Parquet files from your terminal. S3, GCS, Azure support. CLI tool."
66
repository = "https://github.com/OrlovEvgeny/pq"
77
license = "MIT"
88
readme = "README.md"
9-
keywords = ["parquet", "cli", "arrow", "data", "analytics"]
10-
categories = ["command-line-utilities"]
9+
keywords = ["parquet", "cli", "data-engineering", "jq", "arrow"]
10+
categories = ["command-line-utilities", "database"]
1111

1212
[[bin]]
1313
name = "pq"
@@ -52,7 +52,6 @@ bytesize = { version = "2", features = ["serde"] }
5252
num-format = "0.4"
5353
regex = "1"
5454
rand = "0.9"
55-
rayon = "1"
5655

5756
tempfile = "3"
5857

src/cli.rs

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,33 @@ use clap::{Parser, Subcommand, ValueEnum};
22

33
#[derive(Debug, Parser)]
44
#[command(name = "pq", version, about = "The jq of Parquet")]
5-
#[command(args_conflicts_with_subcommands = true)]
65
pub struct Cli {
76
#[command(flatten)]
87
pub global: GlobalArgs,
98

109
#[command(subcommand)]
1110
pub command: Option<Command>,
1211

12+
/// Show all columns (no truncation)
13+
#[arg(long)]
14+
pub all: bool,
15+
16+
/// Only print schema, skip file metadata
17+
#[arg(long)]
18+
pub schema_only: bool,
19+
20+
/// Only print file metadata, skip schema
21+
#[arg(long)]
22+
pub meta_only: bool,
23+
24+
/// Show raw Parquet metadata (thrift-level detail)
25+
#[arg(long)]
26+
pub raw: bool,
27+
28+
/// Sort columns by: name, type, encoding, size, nulls
29+
#[arg(short, long)]
30+
pub sort: Option<String>,
31+
1332
/// Files to inspect (when no subcommand is given)
1433
#[arg(trailing_var_arg = true)]
1534
pub files: Vec<String>,
@@ -412,22 +431,6 @@ pub struct ConvertArgs {
412431
#[arg(long)]
413432
pub schema: Option<String>,
414433

415-
/// strftime format for timestamp parsing
416-
#[arg(long)]
417-
pub timestamp_format: Option<String>,
418-
419-
/// Treat comma as decimal separator
420-
#[arg(long)]
421-
pub decimal_comma: bool,
422-
423-
/// Comma-separated null representations
424-
#[arg(long)]
425-
pub null_values: Option<String>,
426-
427-
/// CSV has header row
428-
#[arg(long)]
429-
pub header: Option<bool>,
430-
431434
/// CSV lacks header row
432435
#[arg(long)]
433436
pub no_header: bool,
@@ -436,9 +439,9 @@ pub struct ConvertArgs {
436439
#[arg(long)]
437440
pub delimiter: Option<char>,
438441

439-
/// Columns with <= N unique values get dictionary encoding (default: 10000)
442+
/// Enable dictionary encoding
440443
#[arg(long)]
441-
pub dictionary_threshold: Option<usize>,
444+
pub dictionary: bool,
442445
}
443446

444447
#[derive(Debug, clap::Args)]
@@ -559,10 +562,6 @@ pub struct SizeArgs {
559562
#[arg(long)]
560563
pub top: Option<usize>,
561564

562-
/// Human-readable sizes (default)
563-
#[arg(long)]
564-
pub human: bool,
565-
566565
/// Show exact byte counts
567566
#[arg(long)]
568567
pub bytes: bool,

src/commands/compact.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ pub fn execute(args: &CompactArgs, output: &mut OutputConfig) -> miette::Result<
119119
for &i in indices {
120120
let file_size = sources[i].file_size();
121121

122-
if file_size >= min_file_size && current_group.is_empty() {
122+
if file_size >= min_file_size {
123123
continue;
124124
}
125125

src/commands/convert.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ fn csv_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Resu
216216
let mut props_builder =
217217
parquet::file::properties::WriterProperties::builder().set_compression(compression);
218218

219-
if args.dictionary_threshold.is_some() {
219+
if args.dictionary {
220220
props_builder = props_builder.set_dictionary_enabled(true);
221221
}
222222

@@ -267,7 +267,7 @@ fn json_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::Res
267267
let mut props_builder =
268268
parquet::file::properties::WriterProperties::builder().set_compression(compression);
269269

270-
if args.dictionary_threshold.is_some() {
270+
if args.dictionary {
271271
props_builder = props_builder.set_dictionary_enabled(true);
272272
}
273273

@@ -367,7 +367,7 @@ fn parquet_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette::
367367
if let Some(rg_size) = args.row_group_size {
368368
props_builder = props_builder.set_max_row_group_size(rg_size);
369369
}
370-
if args.dictionary_threshold.is_some() {
370+
if args.dictionary {
371371
props_builder = props_builder.set_dictionary_enabled(true);
372372
}
373373
let props = props_builder.build();
@@ -402,7 +402,7 @@ fn arrow_ipc_to_parquet(input: &str, output: &str, args: &ConvertArgs) -> miette
402402
if let Some(rg_size) = args.row_group_size {
403403
props_builder = props_builder.set_max_row_group_size(rg_size);
404404
}
405-
if args.dictionary_threshold.is_some() {
405+
if args.dictionary {
406406
props_builder = props_builder.set_dictionary_enabled(true);
407407
}
408408
let props = props_builder.build();

src/commands/inspect.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,9 @@ fn build_metadata_pairs(
430430
let kv_str = kv_meta
431431
.iter()
432432
.map(|(k, v)| {
433-
let truncated = if !verbose && v.len() > 60 {
434-
format!("{}...", &v[..57])
433+
let truncated = if !verbose && v.chars().count() > 60 {
434+
let t: String = v.chars().take(57).collect();
435+
format!("{}...", t)
435436
} else {
436437
v.clone()
437438
};

src/commands/sample.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ pub fn execute(args: &SampleArgs, output: &mut OutputConfig) -> miette::Result<(
8989
}
9090

9191
if !batch_indices.is_empty() {
92-
let indices_array = arrow::array::UInt32Array::from(
93-
batch_indices.iter().map(|&i| i as u32).collect::<Vec<_>>(),
92+
let indices_array = arrow::array::UInt64Array::from(
93+
batch_indices.iter().map(|&i| i as u64).collect::<Vec<_>>(),
9494
);
9595

9696
let columns: Vec<arrow::array::ArrayRef> = batch

src/commands/slice.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,9 @@ fn execute_split_by(args: &SliceArgs, output: &OutputConfig, col_name: &str) ->
632632
let columns: Vec<arrow::array::ArrayRef> = batch
633633
.columns()
634634
.iter()
635-
.map(|col| arrow::compute::take(col.as_ref(), &indices, None).expect("take failed"))
636-
.collect();
635+
.map(|col| arrow::compute::take(col.as_ref(), &indices, None))
636+
.collect::<Result<Vec<_>, _>>()
637+
.map_err(|e| miette::miette!("take error: {}", e))?;
637638

638639
let taken_batch = RecordBatch::try_new(schema.clone(), columns)
639640
.map_err(|e| miette::miette!("error creating batch: {}", e))?;

0 commit comments

Comments
 (0)