Skip to content

Commit 51ebbc3

Browse files
committed
make test consistent with others in the file
1 parent 2be791c commit 51ebbc3

1 file changed

Lines changed: 32 additions & 55 deletions

File tree

spark/src/test/scala/org/apache/comet/exec/CometNativeReaderSuite.scala

Lines changed: 32 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
package org.apache.comet.exec
2121

22-
import org.apache.hadoop.fs.Path
2322
import org.scalactic.source.Position
2423
import org.scalatest.Tag
2524

@@ -294,8 +293,8 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper
294293
test("native reader - read a STRUCT subfield - field from second") {
295294
testSingleLineQuery(
296295
"""
297-
|select 1 a, named_struct('a', 1, 'b', 'n') c0
298-
|""".stripMargin,
296+
|select 1 a, named_struct('a', 1, 'b', 'n') c0
297+
|""".stripMargin,
299298
"select c0.b from tbl")
300299
}
301300

@@ -599,61 +598,39 @@ class CometNativeReaderSuite extends CometTestBase with AdaptiveSparkPlanHelper
599598
test("native reader - support ARRAY literal nested ARRAY fields") {
600599
testSingleLineQuery(
601600
"""
602-
|select 1 a
603-
|""".stripMargin,
601+
|select 1 a
602+
|""".stripMargin,
604603
"select array(array(1, 2, null), array(), array(10), null, array(null)) from tbl")
605604
}
606605

606+
// Regression test found during DataFusion 53 upgrade (PR #3629).
607+
// Spark's SchemaPruningSuite tests (e.g. "select a single complex field array
608+
// and in clause", "select explode of nested field of array of struct") were
609+
// failing because wrap_all_type_mismatches in Comet's schema adapter looked up
610+
// the logical field by column index instead of by name. Filter expressions
611+
// built against the pruned required_schema had "friends" at index 0, but the
612+
// full logical_file_schema had "id: Int32" at index 0.
607613
test("native reader - nested schema pruning with array of struct and filter") {
608-
// Regression test found during DataFusion 53 upgrade (PR #3629).
609-
// Spark's SchemaPruningSuite tests (e.g. "select a single complex field array
610-
// and in clause", "select explode of nested field of array of struct",
611-
// "SPARK-34638: nested column prune on generator output") were failing with:
612-
// native panic: called `Result::unwrap()` on an `Err` value:
613-
// Internal("Unexpected data type in GetArrayStructFields: Int32")
614-
// The root cause was wrap_all_type_mismatches in Comet's schema adapter
615-
// looking up the logical field by column index instead of by name. When
616-
// filter expressions are created against the pruned required_schema (where
617-
// "friends" is at index 0), the fallback would index into the full
618-
// logical_file_schema and get "id: Int32" instead of "friends: List<...>".
619-
withTempDir { dir =>
620-
val path = new Path(dir.toURI.toString, "test").toUri.toString
621-
622-
// Create a table with multiple columns so that nested schema pruning
623-
// can prune away unneeded columns. The friends column is an array of
624-
// structs with first/middle/last, but the query only needs first and middle.
625-
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
626-
spark.sql(
627-
"""
628-
|select
629-
| 0 as id,
630-
| named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name,
631-
| '123 Main Street' as address,
632-
| 1 as pets,
633-
| array(
634-
| named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith')
635-
| ) as friends
636-
|union all
637-
|select
638-
| 1 as id,
639-
| named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name,
640-
| '321 Wall Street' as address,
641-
| 3 as pets,
642-
| array(
643-
| named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones')
644-
| ) as friends
645-
|""".stripMargin).repartition(1).write.parquet(path)
646-
}
647-
648-
val schema = spark.read.parquet(path).schema
649-
650-
readParquetFile(path, Some(schema)) { df =>
651-
df.createOrReplaceTempView("tbl")
652-
}
653-
654-
val query = "select friends.middle from tbl where friends.first[0] = 'Susan'"
655-
val df = sql(query)
656-
checkSparkAnswer(df)
657-
}
614+
testSingleLineQuery(
615+
"""
616+
|select
617+
| 0 as id,
618+
| named_struct('first', 'Jane', 'middle', 'X.', 'last', 'Doe') as name,
619+
| '123 Main Street' as address,
620+
| 1 as pets,
621+
| array(
622+
| named_struct('first', 'Susan', 'middle', 'Z.', 'last', 'Smith')
623+
| ) as friends
624+
|union all
625+
|select
626+
| 1 as id,
627+
| named_struct('first', 'John', 'middle', 'Y.', 'last', 'Doe') as name,
628+
| '321 Wall Street' as address,
629+
| 3 as pets,
630+
| array(
631+
| named_struct('first', 'Alice', 'middle', 'A.', 'last', 'Jones')
632+
| ) as friends
633+
|""".stripMargin,
634+
"select friends.middle from tbl where friends.first[0] = 'Susan'")
658635
}
659636
}

0 commit comments

Comments
 (0)