|
19 | 19 |
|
20 | 20 | package org.apache.comet.exec |
21 | 21 |
|
22 | | -import org.apache.hadoop.fs.Path |
23 | 22 | import org.scalactic.source.Position |
24 | 23 | import org.scalatest.Tag |
25 | 24 |
|
@@ -294,8 +293,8 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper |
294 | 293 | test("native reader - read a STRUCT subfield - field from second") { |
295 | 294 | testSingleLineQuery( |
296 | 295 | """ |
297 | | - |select 1 a, named_struct('a', 1, 'b', 'n') c0 |
298 | | - |""".stripMargin, |
| 296 | + |select 1 a, named_struct('a', 1, 'b', 'n') c0 |
| 297 | + |""".stripMargin, |
299 | 298 | "select c0.b from tbl") |
300 | 299 | } |
301 | 300 |
|
@@ -599,61 +598,39 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper |
599 | 598 | test("native reader - support ARRAY literal nested ARRAY fields") { |
600 | 599 | testSingleLineQuery( |
601 | 600 | """ |
602 | | - |select 1 a |
603 | | - |""".stripMargin, |
| 601 | + |select 1 a |
| 602 | + |""".stripMargin, |
604 | 603 | "select array(array(1, 2, null), array(), array(10), null, array(null)) from tbl") |
605 | 604 | } |
606 | 605 |
|
| 606 | + // Regression test found during DataFusion 53 upgrade (PR #3629). |
| 607 | + // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array |
| 608 | + // and in clause", "select explode of nested field of array of struct") were |
| 609 | + // failing because wrap_all_type_mismatches in Comet's schema adapter looked up |
| 610 | + // the logical field by column index instead of by name. Filter expressions |
| 611 | + // built against the pruned required_schema had "friends" at index 0, but the |
| 612 | + // full logical_file_schema had "id: Int32" at index 0. |
607 | 613 | test("native reader - nested schema pruning with array of struct and filter") { |
608 | | - // Regression test found during DataFusion 53 upgrade (PR #3629). |
609 | | - // Spark's SchemaPruningSuite tests (e.g. "select a single complex field array |
610 | | - // and in clause", "select explode of nested field of array of struct", |
611 | | - // "SPARK-34638: nested column prune on generator output") were failing with: |
612 | | - // native panic: called `Result::unwrap()` on an `Err` value: |
613 | | - // Internal("Unexpected data type in GetArrayStructFields: Int32") |
614 | | - // The root cause was wrap_all_type_mismatches in Comet's schema adapter |
615 | | - // looking up the logical field by column index instead of by name. When |
616 | | - // filter expressions are created against the pruned required_schema (where |
617 | | - // "friends" is at index 0), the fallback would index into the full |
618 | | - // logical_file_schema and get "id: Int32" instead of "friends: List<...>". |
619 | | - withTempDir { dir => |
620 | | - val path = new Path(dir.toURI.toString, "test").toUri.toString |
621 | | - |
622 | | - // Create a table with multiple columns so that nested schema pruning |
623 | | - // can prune away unneeded columns. The friends column is an array of |
624 | | - // structs with first/middle/last, but the query only needs first and middle. |
625 | | - withSQLConf(CometConf.COMET_ENABLED.key -> "false") { |
626 | | - spark.sql( |
627 | | - """ |
628 | | - |select |
629 | | - | 0 as id, |
630 | | - | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, |
631 | | - | '123 Main Street' as address, |
632 | | - | 1 as pets, |
633 | | - | array( |
634 | | - | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') |
635 | | - | ) as friends |
636 | | - |union all |
637 | | - |select |
638 | | - | 1 as id, |
639 | | - | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, |
640 | | - | '321 Wall Street' as address, |
641 | | - | 3 as pets, |
642 | | - | array( |
643 | | - | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') |
644 | | - | ) as friends |
645 | | - |""".stripMargin).repartition(1).write.parquet(path) |
646 | | - } |
647 | | - |
648 | | - val schema = spark.read.parquet(path).schema |
649 | | - |
650 | | - readParquetFile(path, Some(schema)) { df => |
651 | | - df.createOrReplaceTempView("tbl") |
652 | | - } |
653 | | - |
654 | | - val query = "select friends.middle from tbl where friends.first[0] = 'Susan'" |
655 | | - val df = sql(query) |
656 | | - checkSparkAnswer(df) |
657 | | - } |
| 614 | + testSingleLineQuery( |
| 615 | + """ |
| 616 | + |select |
| 617 | + | 0 as id, |
| 618 | + | named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name, |
| 619 | + | '123 Main Street' as address, |
| 620 | + | 1 as pets, |
| 621 | + | array( |
| 622 | + | named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith') |
| 623 | + | ) as friends |
| 624 | + |union all |
| 625 | + |select |
| 626 | + | 1 as id, |
| 627 | + | named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name, |
| 628 | + | '321 Wall Street' as address, |
| 629 | + | 3 as pets, |
| 630 | + | array( |
| 631 | + | named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones') |
| 632 | + | ) as friends |
| 633 | + |""".stripMargin, |
| 634 | + "select friends.middle from tbl where friends.first[0] = 'Susan'") |
658 | 635 | } |
659 | 636 | } |
0 commit comments