|
3 | 3 | // |
4 | 4 | package io.deephaven.parquet.table; |
5 | 5 |
|
| 6 | +import io.deephaven.api.RawString; |
6 | 7 | import io.deephaven.api.filter.Filter; |
7 | 8 | import io.deephaven.base.FileUtils; |
8 | 9 | import io.deephaven.engine.context.ExecutionContext; |
|
18 | 19 | import io.deephaven.engine.table.impl.select.WhereFilter; |
19 | 20 | import io.deephaven.engine.table.impl.util.ColumnHolder; |
20 | 21 | import io.deephaven.engine.table.impl.util.ImmediateJobScheduler; |
| 22 | +import io.deephaven.engine.testutil.filters.ParallelizedRowSetCapturingFilter; |
| 23 | +import io.deephaven.engine.testutil.filters.RowSetCapturingFilter; |
21 | 24 | import io.deephaven.engine.testutil.junit4.EngineCleanup; |
22 | 25 | import io.deephaven.engine.util.TableTools; |
23 | 26 | import io.deephaven.parquet.table.location.ParquetColumnResolverMap; |
|
43 | 46 | import java.time.Instant; |
44 | 47 | import java.util.*; |
45 | 48 | import java.util.concurrent.CompletableFuture; |
| 49 | +import java.util.concurrent.atomic.AtomicLong; |
46 | 50 | import java.util.function.BiFunction; |
47 | 51 |
|
48 | 52 | import static io.deephaven.base.FileUtils.convertToURI; |
@@ -136,6 +140,10 @@ private static void filterAndVerifyResults(Table diskTable, Table memTable, Stri |
136 | 140 | verifyResults(diskTable.where(filters).coalesce(), memTable.where(filters).coalesce()); |
137 | 141 | } |
138 | 142 |
|
| 143 | + private static void filterAndVerifyResults(Table diskTable, Table memTable, Filter filter) { |
| 144 | + verifyResults(diskTable.where(filter).coalesce(), memTable.where(filter).coalesce()); |
| 145 | + } |
| 146 | + |
139 | 147 | private static void filterAndVerifyResults(Table diskTable, Table memTable, WhereFilter filter) { |
140 | 148 | verifyResults(diskTable.where(filter).coalesce(), memTable.where(filter).coalesce()); |
141 | 149 | } |
@@ -412,6 +420,83 @@ public void flatPartitionsNoDataIndexAllNullTest() { |
412 | 420 | filterAndVerifyResultsAllowEmpty(diskTable, memTable, "boolean_col = true"); |
413 | 421 | } |
414 | 422 |
|
| 423 | + // New test with custom function counting invocations |
| 424 | + @Test |
| 425 | + public void partitionedDataSerialFilterTest() { |
| 426 | + final String destPath = Path.of(rootFile.getPath(), "ParquetTest_kvPartitionsSerialTest").toString(); |
| 427 | + final int tableSize = 1_000_000; |
| 428 | + |
| 429 | + final Instant baseTime = parseInstant("2023-01-01T00:00:00 NY"); |
| 430 | + QueryScope.addParam("baseTime", baseTime); |
| 431 | + |
| 432 | + final Table largeTable = TableTools.emptyTable(tableSize).update( |
| 433 | + "symbol = ii % 100", |
| 434 | + "sequential_val = ii"); |
| 435 | + |
| 436 | + final PartitionedTable partitionedTable = largeTable.partitionBy("symbol"); |
| 437 | + ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, EMPTY); |
| 438 | + |
| 439 | + final Table diskTable = ParquetTools.readTable(destPath); |
| 440 | + final Table memTable = diskTable.select(); |
| 441 | + |
| 442 | + assertTableEquals(diskTable, memTable); |
| 443 | + |
| 444 | + final AtomicLong invocationCount = new AtomicLong(); |
| 445 | + QueryScope.addParam("invocationCount", invocationCount); |
| 446 | + |
| 447 | + final Filter partitionFilter = RawString.of("symbol >= 0 && invocationCount.incrementAndGet() >= 0"); |
| 448 | + final Filter serialPartitionFilter = partitionFilter.withSerial(); |
| 449 | + |
| 450 | + final Filter nonPartitionFilter = |
| 451 | + RawString.of("sequential_val >= 0 && invocationCount.incrementAndGet() >= 0"); |
| 452 | + final Filter serialNonPartitionFilter = nonPartitionFilter.withSerial(); |
| 453 | + |
| 454 | + Table result; |
| 455 | + |
| 456 | + // Test non-serial partition filter |
| 457 | + assertEquals(0L, invocationCount.get()); |
| 458 | + result = diskTable.where(partitionFilter).coalesce(); |
| 459 | + assertEquals(100L, invocationCount.get()); // one per partition |
| 460 | + // Verify the table contents are equivalent |
| 461 | + assertTableEquals(result, diskTable.coalesce().where(partitionFilter)); |
| 462 | + |
| 463 | + // Test serial partition filter |
| 464 | + invocationCount.set(0); |
| 465 | + assertEquals(0L, invocationCount.get()); |
| 466 | + result = diskTable.where(serialPartitionFilter).coalesce(); |
| 467 | + assertEquals(1_000_000L, invocationCount.get()); // one per row |
| 468 | + // Verify the table contents are equivalent |
| 469 | + assertTableEquals(result, diskTable.coalesce().where(serialPartitionFilter)); |
| 470 | + |
| 471 | + // Test non-serial non-partition filter |
| 472 | + invocationCount.set(0); |
| 473 | + assertEquals(0L, invocationCount.get()); |
| 474 | + result = diskTable.where(nonPartitionFilter).coalesce(); |
| 475 | + assertEquals(1_000_000L, invocationCount.get()); // one per row |
| 476 | + // Verify the table contents are equivalent |
| 477 | + assertTableEquals(result, diskTable.coalesce().where(nonPartitionFilter)); |
| 478 | + |
| 479 | + // Test serial non-partition filter |
| 480 | + invocationCount.set(0); |
| 481 | + assertEquals(0L, invocationCount.get()); |
| 482 | + result = diskTable.where(serialNonPartitionFilter).coalesce(); |
| 483 | + assertEquals(1_000_000L, invocationCount.get()); // one per row |
| 484 | + // Verify the table contents are equivalent |
| 485 | + assertTableEquals(result, diskTable.coalesce().where(serialNonPartitionFilter)); |
| 486 | + |
| 487 | + // Test stateless partition filter |
| 488 | + final RowSetCapturingFilter statelessPartitionFilter = |
| 489 | + new ParallelizedRowSetCapturingFilter(RawString.of("symbol >= 0")); |
| 490 | + result = diskTable.where(statelessPartitionFilter).coalesce(); |
| 491 | + assertEquals(100, statelessPartitionFilter.numRowsProcessed()); // one per partition |
| 492 | + |
| 493 | + // Test stateless non-partition filter |
| 494 | + final RowSetCapturingFilter statelessNonPartitionFilter = |
| 495 | + new ParallelizedRowSetCapturingFilter(RawString.of("sequential_val >= 0")); |
| 496 | + result = diskTable.where(statelessNonPartitionFilter).coalesce(); |
| 497 | + assertEquals(1_000_000, statelessNonPartitionFilter.numRowsProcessed()); // one per row |
| 498 | + } |
| 499 | + |
415 | 500 | @Test |
416 | 501 | public void partitionedNoDataIndexTest() { |
417 | 502 | final String destPath = Path.of(rootFile.getPath(), "ParquetTest_kvPartitionsTest").toString(); |
@@ -442,6 +527,18 @@ public void partitionedNoDataIndexTest() { |
442 | 527 | filterAndVerifyResults(diskTable, memTable, "symbol < `s100`"); |
443 | 528 | filterAndVerifyResults(diskTable, memTable, "symbol = `s500`"); |
444 | 529 |
|
| 530 | + // Conditional on partition column |
| 531 | + filterAndVerifyResults(diskTable, memTable, "symbol = `s` + `500`"); |
| 532 | + // Serial conditional on partition column |
| 533 | + filterAndVerifyResults(diskTable, memTable, |
| 534 | + Filter.serial(Filter.and(Filter.from("symbol = `s` + `500`")))); |
| 535 | + |
| 536 | + // Conditional on non-partition column |
| 537 | + filterAndVerifyResults(diskTable, memTable, "sequential_val >= 50 + 1"); |
| 538 | + // Serial conditional on non-partition column |
| 539 | + filterAndVerifyResults(diskTable, memTable, |
| 540 | + Filter.serial(Filter.and(Filter.from("sequential_val >= 50 + 1")))); |
| 541 | + |
445 | 542 | // Timestamp range and match filters |
446 | 543 | filterAndVerifyResults(diskTable, memTable, "Timestamp < '2023-01-02T00:00:00 NY'"); |
447 | 544 | filterAndVerifyResults(diskTable, memTable, "Timestamp = '2023-01-02T00:00:00 NY'"); |
|
0 commit comments