Skip to content

Commit 7ddd3ea

Browse files
committed
fix test and clippy
1 parent 09135a7 commit 7ddd3ea

4 files changed

Lines changed: 50 additions & 38 deletions

File tree

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,6 @@ pub use arrow_reader::{ArrowReader, ArrowReaderBuilder};
7474
pub use arrow_writer::{ArrowWriter, ArrowWriterBuilder};
7575
#[cfg(feature = "async")]
7676
pub use async_arrow_reader::ArrowStreamReader;
77-
pub use predicate::{ComparisonOp, Predicate};
77+
pub use predicate::{ComparisonOp, Predicate, PredicateValue};
7878
pub use row_selection::{RowSelection, RowSelector};
7979
pub use schema::{ArrowSchemaOptions, TimestampPrecision};

src/predicate.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ impl Predicate {
179179
}
180180

181181
/// Negate a predicate
182+
#[allow(clippy::should_implement_trait)]
182183
pub fn not(predicate: Predicate) -> Self {
183184
Self::Not(Box::new(predicate))
184185
}

src/row_group_filter.rs

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ fn find_column_index(schema: &RootDataType, column_name: &str) -> Result<usize>
125125
.find(|(_, col)| col.name() == column_name)
126126
.map(|(idx, _)| idx)
127127
.context(UnexpectedSnafu {
128-
msg: format!("Column '{}' not found in schema", column_name),
128+
msg: format!("Column '{column_name}' not found in schema"),
129129
})
130130
}
131131

@@ -143,28 +143,30 @@ fn evaluate_comparison(
143143
// Get row group index for this column
144144
let col_index = row_index.column(column_idx).context(UnexpectedSnafu {
145145
msg: format!(
146-
"Row index not found for column '{}' (index {})",
147-
column, column_idx
146+
"Row index not found for column '{column}' (index {column_idx})",
148147
),
149148
})?;
150149

151150
// Evaluate each row group
152-
for row_group_idx in 0..col_index.num_row_groups() {
151+
for (row_group_idx, result_item) in result
152+
.iter_mut()
153+
.enumerate()
154+
.take(col_index.num_row_groups())
155+
{
153156
let entry = col_index.entry(row_group_idx);
154157
let entry = entry.context(UnexpectedSnafu {
155158
msg: format!(
156-
"Row group entry not found for column {}, row group {}",
157-
column_idx, row_group_idx
159+
"Row group entry not found for column {column_idx}, row group {row_group_idx}",
158160
),
159161
})?;
160162

161163
// Get statistics for this row group
162164
if let Some(stats) = &entry.statistics {
163165
let matches = evaluate_comparison_with_stats(stats, op, value)?;
164-
result[row_group_idx] = matches;
166+
*result_item = matches;
165167
} else {
166168
// No statistics available, keep row group (maybe)
167-
result[row_group_idx] = true;
169+
*result_item = true;
168170
}
169171
}
170172

@@ -428,19 +430,22 @@ fn evaluate_is_null(
428430
let column_idx = find_column_index(schema, column)?;
429431
let col_index = row_index.column(column_idx).context(UnexpectedSnafu {
430432
msg: format!(
431-
"Row index not found for column '{}' (index {})",
432-
column, column_idx
433+
"Row index not found for column '{column}' (index {column_idx})",
433434
),
434435
})?;
435436

436-
for row_group_idx in 0..col_index.num_row_groups() {
437+
for (row_group_idx, result_item) in result
438+
.iter_mut()
439+
.enumerate()
440+
.take(col_index.num_row_groups())
441+
{
437442
if let Some(entry) = col_index.entry(row_group_idx) {
438443
if let Some(stats) = &entry.statistics {
439444
// IS NULL: keep if has_null is true
440-
result[row_group_idx] = stats.has_null();
445+
*result_item = stats.has_null();
441446
} else {
442447
// No statistics, keep row group (maybe)
443-
result[row_group_idx] = true;
448+
*result_item = true;
444449
}
445450
}
446451
}
@@ -457,19 +462,22 @@ fn evaluate_is_not_null(
457462
let column_idx = find_column_index(schema, column)?;
458463
let col_index = row_index.column(column_idx).context(UnexpectedSnafu {
459464
msg: format!(
460-
"Row index not found for column '{}' (index {})",
461-
column, column_idx
465+
"Row index not found for column '{column}' (index {column_idx})",
462466
),
463467
})?;
464468

465-
for row_group_idx in 0..col_index.num_row_groups() {
469+
for (row_group_idx, result_item) in result
470+
.iter_mut()
471+
.enumerate()
472+
.take(col_index.num_row_groups())
473+
{
466474
if let Some(entry) = col_index.entry(row_group_idx) {
467475
if let Some(stats) = &entry.statistics {
468476
// IS NOT NULL: keep if number_of_values > 0 (has non-null values)
469-
result[row_group_idx] = stats.number_of_values() > 0;
477+
*result_item = stats.number_of_values() > 0;
470478
} else {
471479
// No statistics, keep row group (maybe)
472-
result[row_group_idx] = true;
480+
*result_item = true;
473481
}
474482
}
475483
}
@@ -479,7 +487,6 @@ fn evaluate_is_not_null(
479487

480488
#[cfg(test)]
481489
mod tests {
482-
use super::*;
483490
use crate::row_index::{RowGroupEntry, RowGroupIndex, StripeRowIndex};
484491
use crate::statistics::ColumnStatistics;
485492
use crate::proto;
@@ -496,28 +503,32 @@ mod tests {
496503
let age_entries = vec![
497504
RowGroupEntry::new(
498505
Some({
499-
let mut proto_stats = proto::ColumnStatistics::default();
500-
proto_stats.number_of_values = Some(5000);
501-
proto_stats.has_null = Some(false);
502-
proto_stats.int_statistics = Some(proto::IntegerStatistics {
503-
minimum: Some(18),
504-
maximum: Some(25),
505-
sum: Some(107500),
506-
});
506+
let proto_stats = proto::ColumnStatistics {
507+
number_of_values: Some(5000),
508+
has_null: Some(false),
509+
int_statistics: Some(proto::IntegerStatistics {
510+
minimum: Some(18),
511+
maximum: Some(25),
512+
sum: Some(107500),
513+
}),
514+
..Default::default()
515+
};
507516
ColumnStatistics::try_from(&proto_stats).unwrap()
508517
}),
509518
vec![],
510519
),
511520
RowGroupEntry::new(
512521
Some({
513-
let mut proto_stats = proto::ColumnStatistics::default();
514-
proto_stats.number_of_values = Some(5000);
515-
proto_stats.has_null = Some(false);
516-
proto_stats.int_statistics = Some(proto::IntegerStatistics {
517-
minimum: Some(26),
518-
maximum: Some(65),
519-
sum: Some(227500),
520-
});
522+
let proto_stats = proto::ColumnStatistics {
523+
number_of_values: Some(5000),
524+
has_null: Some(false),
525+
int_statistics: Some(proto::IntegerStatistics {
526+
minimum: Some(26),
527+
maximum: Some(65),
528+
sum: Some(227500),
529+
}),
530+
..Default::default()
531+
};
521532
ColumnStatistics::try_from(&proto_stats).unwrap()
522533
}),
523534
vec![],

src/row_index.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ impl StripeRowIndex {
152152
if self.rows_per_group == 0 {
153153
return 0;
154154
}
155-
(self.total_rows + self.rows_per_group - 1) / self.rows_per_group
155+
self.total_rows.div_ceil(self.rows_per_group)
156156
}
157157

158158
/// Get statistics for a specific row group and column
@@ -196,7 +196,7 @@ fn parse_row_index(
196196
let statistics = entry
197197
.statistics
198198
.as_ref()
199-
.map(|s| ColumnStatistics::try_from(s))
199+
.map(ColumnStatistics::try_from)
200200
.transpose()?;
201201
Ok(RowGroupEntry::new(statistics, entry.positions.clone()))
202202
})

0 commit comments

Comments
 (0)