Skip to content

Commit c21e25b

Browse files
authored
Test/withtable extra properties (#539)
* test: support extra TBLPROPERTIES in withTable helper Adds an `extraProperties: Map[String, String]` parameter to the `withTable` test helper across all four Spark module versions (3.3, 3.4, 3.5, 4.0). Each pair is rendered as a quoted `'key' = 'value'` entry in the generated CREATE TABLE TBLPROPERTIES block, alongside the existing `engine` and `order_by` properties. This unblocks tests that need ClickHouse `settings.*` overrides (e.g. `settings.allow_nullable_key`) or connector-specific options (e.g. `clickhouse.column.data.variant_types`) that the helper previously couldn't express, forcing those tests to open-code their own CREATE/DROP cycle. The TBLPROPERTIES block is now assembled from a Seq joined with `,\n `, which also incidentally cleans up the leading blank line emitted when `sortKeys` was empty. * test: migrate manual try/finally tests in ClickHouseWriterTestBase to withTable Refactors all tests in `ClickHouseWriterTestBase` that previously open-coded their CREATE DATABASE / CREATE TABLE / DROP TABLE / DROP DATABASE cycle inside a try/finally to use the shared `withTable` test helper instead. - spark-3.3, spark-3.4, spark-3.5: migrates the `write StructType - full round-trip with Spark-created table` test. - spark-4.0: additionally migrates the StructType round-trip test plus every VariantType write test in the file (simple/nested/arrays/NULLs/ round-trip, Array/Map/Struct/Array-of-Struct of VariantType, and the five `variant_types` option variants), all of which need the new `extraProperties` parameter on `withTable` to express `settings.allow_nullable_key` and the connector-specific `clickhouse.column.data.variant_types` table property. For the three tests that require ClickHouse's experimental JSON type as a Variant subtype, the prerequisite `SET allow_experimental_json_type = 1` is hoisted to immediately before the `withTable(...)` call (it's a Spark session conf, so it remains in effect for the helper's CREATE TABLE). The PROBE debug block in `write Map of String to VariantType` is preserved as-is (only `\$db`/`\$tbl` rewritten to `\$actualDb`/`\$actualTbl`). No tests are added or removed; this is a pure migration to the helper.
1 parent 4f81e20 commit c21e25b

8 files changed

Lines changed: 193 additions & 394 deletions

File tree

spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseWriterTestBase.scala

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -974,11 +974,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
974974
}
975975

976976
test("write StructType - full round-trip with Spark-created table") {
977-
// This test verifies the complete cycle: Spark creates table, writes data, reads it back
978-
val db = "test_db"
979-
val tbl = "test_struct_roundtrip"
980-
981-
// Create schema with various struct configurations
982977
val schema = StructType(
983978
StructField("id", LongType, false) ::
984979
StructField(
@@ -1001,24 +996,7 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
1001996
) :: Nil
1002997
)
1003998

1004-
try {
1005-
// Create database via Spark
1006-
spark.sql(s"CREATE DATABASE IF NOT EXISTS $db")
1007-
1008-
// Create table via Spark with explicit schema
1009-
spark.sql(
1010-
s"""CREATE TABLE IF NOT EXISTS $db.$tbl (
1011-
| id BIGINT NOT NULL,
1012-
| user_info STRUCT<name: STRING, age: INT, active: BOOLEAN> NOT NULL,
1013-
| metadata STRUCT<created: DATE, score: DOUBLE, tags: ARRAY<STRING>> NOT NULL
1014-
|) USING clickhouse
1015-
|TBLPROPERTIES (
1016-
| engine = 'MergeTree()',
1017-
| order_by = 'id'
1018-
|)
1019-
|""".stripMargin
1020-
)
1021-
// Write data via Spark
999+
withTable("test_db", "test_struct_roundtrip", schema) { (actualDb, actualTbl) =>
10221000
val writeData = Seq(
10231001
Row(
10241002
1L,
@@ -1042,22 +1020,19 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10421020
schema
10431021
)
10441022

1045-
writeDF.writeTo(s"$db.$tbl").append()
1023+
writeDF.writeTo(s"$actualDb.$actualTbl").append()
10461024

1047-
// Read data back via Spark
1048-
val readDF = spark.table(s"$db.$tbl").sort("id")
1025+
val readDF = spark.table(s"$actualDb.$actualTbl").sort("id")
10491026
val result = readDF.collect()
10501027

1051-
// Verify schema structure (note: array containsNull may change to true in ClickHouse)
1028+
// Note: array containsNull may change to true in ClickHouse
10521029
assert(readDF.schema.fields.length === schema.fields.length)
10531030
assert(readDF.schema.fields(0).name === "id")
10541031
assert(readDF.schema.fields(1).name === "user_info")
10551032
assert(readDF.schema.fields(2).name === "metadata")
10561033

1057-
// Verify data integrity
10581034
assert(result.length === 3)
10591035

1060-
// Verify first row
10611036
val row0 = result(0)
10621037
assert(row0.getLong(0) === 1L)
10631038
val userInfo0 = row0.getStruct(1)
@@ -1069,7 +1044,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10691044
assert(metadata0.getDouble(1) === 95.5)
10701045
assert(metadata0.getSeq[String](2) === Seq("premium", "verified"))
10711046

1072-
// Verify second row
10731047
val row1 = result(1)
10741048
assert(row1.getLong(0) === 2L)
10751049
val userInfo1 = row1.getStruct(1)
@@ -1081,7 +1055,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10811055
assert(metadata1.getDouble(1) === 87.3)
10821056
assert(metadata1.getSeq[String](2) === Seq("basic"))
10831057

1084-
// Verify third row
10851058
val row2 = result(2)
10861059
assert(row2.getLong(0) === 3L)
10871060
val userInfo2 = row2.getStruct(1)
@@ -1092,11 +1065,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10921065
assert(metadata2.getDate(0) === date("2024-03-10"))
10931066
assert(metadata2.getDouble(1) === 92.1)
10941067
assert(metadata2.getSeq[String](2) === Seq("premium", "admin", "verified"))
1095-
1096-
} finally {
1097-
// Clean up: drop table and database
1098-
spark.sql(s"DROP TABLE IF EXISTS $db.$tbl")
1099-
spark.sql(s"DROP DATABASE IF EXISTS $db")
11001068
}
11011069
}
11021070

spark-3.3/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/SparkClickHouseSingleTest.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,30 @@ trait SparkClickHouseSingleTest extends SparkTest with ClickHouseProvider
105105
schema: StructType,
106106
engine: String = "MergeTree()",
107107
sortKeys: Seq[String] = "id" :: Nil,
108-
partKeys: Seq[String] = Seq.empty
108+
partKeys: Seq[String] = Seq.empty,
109+
extraProperties: Map[String, String] = Map.empty
109110
)(f: (String, String) => Unit): Unit = {
110111
val actualDb = if (useSuiteLevelDatabase) testDatabaseName else db
111112
try {
112113
if (!useSuiteLevelDatabase) {
113114
runClickHouseSQL(s"CREATE DATABASE IF NOT EXISTS `$actualDb`")
114115
}
115116

117+
val tblProperties: String = {
118+
val props =
119+
sortKeys.headOption.map(_ => s"order_by = '${sortKeys.mkString(", ")}'").toSeq ++
120+
Seq(s"engine = '$engine'") ++
121+
extraProperties.toSeq.map { case (k, v) => s"'$k' = '$v'" }
122+
props.mkString(",\n ")
123+
}
124+
116125
spark.sql(
117126
s"""CREATE TABLE `$actualDb`.`$tbl` (
118127
| ${schema.fields.map(_.toDDL).mkString(",\n ")}
119128
|) USING ClickHouse
120129
|${if (partKeys.isEmpty) "" else partKeys.mkString("PARTITIONED BY(", ", ", ")")}
121130
|TBLPROPERTIES (
122-
| ${if (sortKeys.isEmpty) "" else sortKeys.mkString("order_by = '", ", ", "',")}
123-
| engine = '$engine'
131+
| $tblProperties
124132
|)
125133
|""".stripMargin
126134
)

spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseWriterTestBase.scala

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -974,11 +974,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
974974
}
975975

976976
test("write StructType - full round-trip with Spark-created table") {
977-
// This test verifies the complete cycle: Spark creates table, writes data, reads it back
978-
val db = "test_db"
979-
val tbl = "test_struct_roundtrip"
980-
981-
// Create schema with various struct configurations
982977
val schema = StructType(
983978
StructField("id", LongType, false) ::
984979
StructField(
@@ -1001,24 +996,7 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
1001996
) :: Nil
1002997
)
1003998

1004-
try {
1005-
// Create database via Spark
1006-
spark.sql(s"CREATE DATABASE IF NOT EXISTS $db")
1007-
1008-
// Create table via Spark with explicit schema
1009-
spark.sql(
1010-
s"""CREATE TABLE IF NOT EXISTS $db.$tbl (
1011-
| id BIGINT NOT NULL,
1012-
| user_info STRUCT<name: STRING, age: INT, active: BOOLEAN> NOT NULL,
1013-
| metadata STRUCT<created: DATE, score: DOUBLE, tags: ARRAY<STRING>> NOT NULL
1014-
|) USING clickhouse
1015-
|TBLPROPERTIES (
1016-
| engine = 'MergeTree()',
1017-
| order_by = 'id'
1018-
|)
1019-
|""".stripMargin
1020-
)
1021-
// Write data via Spark
999+
withTable("test_db", "test_struct_roundtrip", schema) { (actualDb, actualTbl) =>
10221000
val writeData = Seq(
10231001
Row(
10241002
1L,
@@ -1042,22 +1020,19 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10421020
schema
10431021
)
10441022

1045-
writeDF.writeTo(s"$db.$tbl").append()
1023+
writeDF.writeTo(s"$actualDb.$actualTbl").append()
10461024

1047-
// Read data back via Spark
1048-
val readDF = spark.table(s"$db.$tbl").sort("id")
1025+
val readDF = spark.table(s"$actualDb.$actualTbl").sort("id")
10491026
val result = readDF.collect()
10501027

1051-
// Verify schema structure (note: array containsNull may change to true in ClickHouse)
1028+
// Note: array containsNull may change to true in ClickHouse
10521029
assert(readDF.schema.fields.length === schema.fields.length)
10531030
assert(readDF.schema.fields(0).name === "id")
10541031
assert(readDF.schema.fields(1).name === "user_info")
10551032
assert(readDF.schema.fields(2).name === "metadata")
10561033

1057-
// Verify data integrity
10581034
assert(result.length === 3)
10591035

1060-
// Verify first row
10611036
val row0 = result(0)
10621037
assert(row0.getLong(0) === 1L)
10631038
val userInfo0 = row0.getStruct(1)
@@ -1069,7 +1044,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10691044
assert(metadata0.getDouble(1) === 95.5)
10701045
assert(metadata0.getSeq[String](2) === Seq("premium", "verified"))
10711046

1072-
// Verify second row
10731047
val row1 = result(1)
10741048
assert(row1.getLong(0) === 2L)
10751049
val userInfo1 = row1.getStruct(1)
@@ -1081,7 +1055,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10811055
assert(metadata1.getDouble(1) === 87.3)
10821056
assert(metadata1.getSeq[String](2) === Seq("basic"))
10831057

1084-
// Verify third row
10851058
val row2 = result(2)
10861059
assert(row2.getLong(0) === 3L)
10871060
val userInfo2 = row2.getStruct(1)
@@ -1092,11 +1065,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10921065
assert(metadata2.getDate(0) === date("2024-03-10"))
10931066
assert(metadata2.getDouble(1) === 92.1)
10941067
assert(metadata2.getSeq[String](2) === Seq("premium", "admin", "verified"))
1095-
1096-
} finally {
1097-
// Clean up: drop table and database
1098-
spark.sql(s"DROP TABLE IF EXISTS $db.$tbl")
1099-
spark.sql(s"DROP DATABASE IF EXISTS $db")
11001068
}
11011069
}
11021070
}

spark-3.4/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/SparkClickHouseSingleTest.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,22 +104,30 @@ trait SparkClickHouseSingleTest extends SparkTest with ClickHouseProvider with B
104104
schema: StructType,
105105
engine: String = "MergeTree()",
106106
sortKeys: Seq[String] = "id" :: Nil,
107-
partKeys: Seq[String] = Seq.empty
107+
partKeys: Seq[String] = Seq.empty,
108+
extraProperties: Map[String, String] = Map.empty
108109
)(f: (String, String) => Unit): Unit = {
109110
val actualDb = if (useSuiteLevelDatabase) testDatabaseName else db
110111
try {
111112
if (!useSuiteLevelDatabase) {
112113
runClickHouseSQL(s"CREATE DATABASE IF NOT EXISTS `$actualDb`")
113114
}
114115

116+
val tblProperties: String = {
117+
val props =
118+
sortKeys.headOption.map(_ => s"order_by = '${sortKeys.mkString(", ")}'").toSeq ++
119+
Seq(s"engine = '$engine'") ++
120+
extraProperties.toSeq.map { case (k, v) => s"'$k' = '$v'" }
121+
props.mkString(",\n ")
122+
}
123+
115124
spark.sql(
116125
s"""CREATE TABLE `$actualDb`.`$tbl` (
117126
| ${schema.fields.map(_.toDDL).mkString(",\n ")}
118127
|) USING ClickHouse
119128
|${if (partKeys.isEmpty) "" else partKeys.mkString("PARTITIONED BY(", ", ", ")")}
120129
|TBLPROPERTIES (
121-
| ${if (sortKeys.isEmpty) "" else sortKeys.mkString("order_by = '", ", ", "',")}
122-
| engine = '$engine'
130+
| $tblProperties
123131
|)
124132
|""".stripMargin
125133
)

spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/ClickHouseWriterTestBase.scala

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -974,11 +974,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
974974
}
975975

976976
test("write StructType - full round-trip with Spark-created table") {
977-
// This test verifies the complete cycle: Spark creates table, writes data, reads it back
978-
val db = "test_db"
979-
val tbl = "test_struct_roundtrip"
980-
981-
// Create schema with various struct configurations
982977
val schema = StructType(
983978
StructField("id", LongType, false) ::
984979
StructField(
@@ -1001,24 +996,7 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
1001996
) :: Nil
1002997
)
1003998

1004-
try {
1005-
// Create database via Spark
1006-
spark.sql(s"CREATE DATABASE IF NOT EXISTS $db")
1007-
1008-
// Create table via Spark with explicit schema
1009-
spark.sql(
1010-
s"""CREATE TABLE IF NOT EXISTS $db.$tbl (
1011-
| id BIGINT NOT NULL,
1012-
| user_info STRUCT<name: STRING, age: INT, active: BOOLEAN> NOT NULL,
1013-
| metadata STRUCT<created: DATE, score: DOUBLE, tags: ARRAY<STRING>> NOT NULL
1014-
|) USING clickhouse
1015-
|TBLPROPERTIES (
1016-
| engine = 'MergeTree()',
1017-
| order_by = 'id'
1018-
|)
1019-
|""".stripMargin
1020-
)
1021-
// Write data via Spark
999+
withTable("test_db", "test_struct_roundtrip", schema) { (actualDb, actualTbl) =>
10221000
val writeData = Seq(
10231001
Row(
10241002
1L,
@@ -1042,22 +1020,19 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10421020
schema
10431021
)
10441022

1045-
writeDF.writeTo(s"$db.$tbl").append()
1023+
writeDF.writeTo(s"$actualDb.$actualTbl").append()
10461024

1047-
// Read data back via Spark
1048-
val readDF = spark.table(s"$db.$tbl").sort("id")
1025+
val readDF = spark.table(s"$actualDb.$actualTbl").sort("id")
10491026
val result = readDF.collect()
10501027

1051-
// Verify schema structure (note: array containsNull may change to true in ClickHouse)
1028+
// Note: array containsNull may change to true in ClickHouse
10521029
assert(readDF.schema.fields.length === schema.fields.length)
10531030
assert(readDF.schema.fields(0).name === "id")
10541031
assert(readDF.schema.fields(1).name === "user_info")
10551032
assert(readDF.schema.fields(2).name === "metadata")
10561033

1057-
// Verify data integrity
10581034
assert(result.length === 3)
10591035

1060-
// Verify first row
10611036
val row0 = result(0)
10621037
assert(row0.getLong(0) === 1L)
10631038
val userInfo0 = row0.getStruct(1)
@@ -1069,7 +1044,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10691044
assert(metadata0.getDouble(1) === 95.5)
10701045
assert(metadata0.getSeq[String](2) === Seq("premium", "verified"))
10711046

1072-
// Verify second row
10731047
val row1 = result(1)
10741048
assert(row1.getLong(0) === 2L)
10751049
val userInfo1 = row1.getStruct(1)
@@ -1081,7 +1055,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10811055
assert(metadata1.getDouble(1) === 87.3)
10821056
assert(metadata1.getSeq[String](2) === Seq("basic"))
10831057

1084-
// Verify third row
10851058
val row2 = result(2)
10861059
assert(row2.getLong(0) === 3L)
10871060
val userInfo2 = row2.getStruct(1)
@@ -1092,11 +1065,6 @@ trait ClickHouseWriterTestBase extends SparkClickHouseSingleTest {
10921065
assert(metadata2.getDate(0) === date("2024-03-10"))
10931066
assert(metadata2.getDouble(1) === 92.1)
10941067
assert(metadata2.getSeq[String](2) === Seq("premium", "admin", "verified"))
1095-
1096-
} finally {
1097-
// Clean up: drop table and database
1098-
spark.sql(s"DROP TABLE IF EXISTS $db.$tbl")
1099-
spark.sql(s"DROP DATABASE IF EXISTS $db")
11001068
}
11011069
}
11021070
}

spark-3.5/clickhouse-spark-it/src/test/scala/org/apache/spark/sql/clickhouse/single/SparkClickHouseSingleTest.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,30 @@ trait SparkClickHouseSingleTest extends SparkTest with ClickHouseProvider
105105
schema: StructType,
106106
engine: String = "MergeTree()",
107107
sortKeys: Seq[String] = "id" :: Nil,
108-
partKeys: Seq[String] = Seq.empty
108+
partKeys: Seq[String] = Seq.empty,
109+
extraProperties: Map[String, String] = Map.empty
109110
)(f: (String, String) => Unit): Unit = {
110111
val actualDb = if (useSuiteLevelDatabase) testDatabaseName else db
111112
try {
112113
if (!useSuiteLevelDatabase) {
113114
runClickHouseSQL(s"CREATE DATABASE IF NOT EXISTS `$actualDb`")
114115
}
115116

117+
val tblProperties: String = {
118+
val props =
119+
sortKeys.headOption.map(_ => s"order_by = '${sortKeys.mkString(", ")}'").toSeq ++
120+
Seq(s"engine = '$engine'") ++
121+
extraProperties.toSeq.map { case (k, v) => s"'$k' = '$v'" }
122+
props.mkString(",\n ")
123+
}
124+
116125
spark.sql(
117126
s"""CREATE TABLE `$actualDb`.`$tbl` (
118127
| ${schema.fields.map(_.toDDL).mkString(",\n ")}
119128
|) USING ClickHouse
120129
|${if (partKeys.isEmpty) "" else partKeys.mkString("PARTITIONED BY(", ", ", ")")}
121130
|TBLPROPERTIES (
122-
| ${if (sortKeys.isEmpty) "" else sortKeys.mkString("order_by = '", ", ", "',")}
123-
| engine = '$engine'
131+
| $tblProperties
124132
|)
125133
|""".stripMargin
126134
)

0 commit comments

Comments
 (0)