@@ -247,12 +247,15 @@ void decode_index_field(
247
247
void handle_truncation (
248
248
Column& dest_column,
249
249
const ColumnTruncation& truncate) {
250
- if (dest_column.num_blocks () == 1 && truncate.start_ && truncate.end_ )
250
+ if (dest_column.num_blocks () == 1 && truncate.start_ && truncate.end_ ) {
251
251
dest_column.truncate_single_block (*truncate.start_ , *truncate.end_ );
252
- else if (truncate.start_ )
253
- dest_column.truncate_first_block (*truncate.start_ );
254
- else if (truncate.end_ )
255
- dest_column.truncate_last_block (*truncate.end_ );
252
+ }
253
+ else {
254
+ if (truncate.start_ )
255
+ dest_column.truncate_first_block (*truncate.start_ );
256
+ if (truncate.end_ )
257
+ dest_column.truncate_last_block (*truncate.end_ );
258
+ }
256
259
}
257
260
258
261
void handle_truncation (
@@ -329,36 +332,40 @@ void decode_or_expand(
329
332
template <typename IndexValueType>
330
333
ColumnTruncation get_truncate_range_from_index (
331
334
const Column& column,
332
- const IndexValueType& start,
333
- const IndexValueType& end,
334
- std::optional<int64_t > start_offset = std::nullopt,
335
- std::optional<int64_t > end_offset = std::nullopt) {
336
- int64_t start_row = column.search_sorted <IndexValueType>(start, false , start_offset, end_offset);
337
- int64_t end_row = column.search_sorted <IndexValueType>(end, true , start_offset, end_offset);
335
+ const IndexValueType& filter_start,
336
+ const IndexValueType& filter_end,
337
+ int64_t start_col_offset,
338
+ int64_t end_col_offset) {
339
+ // search_sorted expects inclusive end_col_offset
340
+ auto inclusive_end_col_offset = end_col_offset - 1 ;
341
+ int64_t start_row = column.search_sorted <IndexValueType>(filter_start, false , start_col_offset, inclusive_end_col_offset);
342
+ int64_t end_row = column.search_sorted <IndexValueType>(filter_end, true , start_col_offset, inclusive_end_col_offset);
343
+
338
344
std::optional<int64_t > truncate_start;
339
345
std::optional<int64_t > truncate_end;
340
- if ((start_offset && start_row != *start_offset) || (!start_offset && start_row > 0 ) )
346
+ if (start_row != start_col_offset )
341
347
truncate_start = start_row;
342
348
343
- if ((end_offset && end_row != *end_offset) || (!end_offset && end_row < column. row_count () - 1 ) )
349
+ if (end_row != end_col_offset )
344
350
truncate_end = end_row;
345
351
346
352
return {truncate_start, truncate_end};
347
353
}
348
354
349
- std::pair<std::optional< int64_t >, std::optional< int64_t >> get_truncate_range_from_rows (
350
- const RowRange& row_range ,
351
- size_t start_offset ,
352
- size_t end_offset ) {
355
+ ColumnTruncation get_truncate_range_from_rows (
356
+ const RowRange& slice_range ,
357
+ size_t row_filter_start ,
358
+ size_t row_filter_end ) {
353
359
std::optional<int64_t > truncate_start;
354
360
std::optional<int64_t > truncate_end;
355
- if (contains (row_range, start_offset))
356
- truncate_start = start_offset;
361
+ // TODO: Explain
362
+ if (contains (slice_range, row_filter_start) && row_filter_start != slice_range.start ())
363
+ truncate_start = row_filter_start;
357
364
358
- if (contains (row_range, end_offset ))
359
- truncate_end = end_offset ;
365
+ if (contains (slice_range, row_filter_end) && row_filter_end != slice_range. start ( ))
366
+ truncate_end = row_filter_end ;
360
367
361
- return std::make_pair ( truncate_start, truncate_end) ;
368
+ return { truncate_start, truncate_end} ;
362
369
}
363
370
364
371
ColumnTruncation get_truncate_range (
@@ -370,32 +377,39 @@ ColumnTruncation get_truncate_range(
370
377
const EncodedFieldImpl& index_field,
371
378
const uint8_t * index_field_offset) {
372
379
ColumnTruncation truncate_rows;
380
+ const auto & slice_row_range = context.slice_and_key ().slice ().row_range ;
381
+ const auto & first_row_offset = frame.offset ();
382
+ auto column_slice_row_range = RowRange (slice_row_range.first - first_row_offset, slice_row_range.second - first_row_offset);
373
383
if (read_options.output_format () == OutputFormat::ARROW) {
374
384
util::variant_match (read_query.row_filter ,
375
- [&truncate_rows, &frame, &context, &index_field, index_field_offset, encoding_version] (const IndexRange& index_range ) {
376
- const auto & time_range = static_cast <const TimestampRange&>(index_range );
385
+ [&truncate_rows, &column_slice_row_range, & frame, &context, &index_field, index_field_offset, encoding_version] (const IndexRange& index_filter ) {
386
+ const auto & time_filter = static_cast <const TimestampRange&>(index_filter );
377
387
const auto & slice_time_range = context.slice_and_key ().key ().time_range ();
378
- if (contains (slice_time_range, time_range.first ) || contains (slice_time_range, time_range.second )) {
388
+ // The `get_truncate_range_from_index` is O(logn). The `contains` checks serves to avoid the expensive
389
+ // O(logn) check for blocks in the middle of the range
390
+ if (contains (slice_time_range, time_filter.first ) || contains (slice_time_range, time_filter.second )) {
379
391
if (context.fetch_index ()) {
380
392
const auto & index_column = frame.column (0 );
381
- truncate_rows = get_truncate_range_from_index (index_column, time_range .first , time_range .second );
393
+ truncate_rows = get_truncate_range_from_index (index_column, time_filter .first , time_filter. second , column_slice_row_range. first , column_slice_row_range .second );
382
394
} else {
383
395
const auto & frame_index_desc = frame.descriptor ().fields (0UL );
384
396
Column sink{frame_index_desc.type (), encoding_sizes::field_uncompressed_size (index_field), AllocationType::PRESIZED, Sparsity::PERMITTED};
385
397
std::optional<util::BitMagic> bv;
386
398
(void )decode_field (frame_index_desc.type (), index_field, index_field_offset, sink, bv, encoding_version);
387
- truncate_rows = get_truncate_range_from_index (sink, time_range .first , time_range .second );
399
+ truncate_rows = get_truncate_range_from_index (sink, time_filter .first , time_filter. second , column_slice_row_range. first , column_slice_row_range .second );
388
400
}
389
401
}
390
402
},
391
- [&context] (const RowRange& row_range) {
392
- const auto & slice_row_range = context.slice_and_key ().slice ().row_range ;
393
- get_truncate_range_from_rows (row_range, slice_row_range.start (), slice_row_range.end ());
403
+ [&truncate_rows, &column_slice_row_range, &first_row_offset] (const RowRange& row_filter) {
404
+ // The row_filter is with respect to global offset. Column truncation cares about column indices.
405
+ auto row_filter_start = row_filter.first - first_row_offset;
406
+ auto row_filter_end = row_filter.second - first_row_offset;
407
+ truncate_rows = get_truncate_range_from_rows (column_slice_row_range, row_filter_start, row_filter_end);
394
408
},
395
409
[] (const auto &) {
396
410
// Do nothing
397
411
});
398
- }
412
+ }
399
413
return truncate_rows;
400
414
};
401
415
0 commit comments