Skip to content

Commit 0ed5832

Browse files
authored
chore(audit): audit string expressions across Spark 3.4.3, 3.5.8, 4.0.1 (#4461)
1 parent 8fa83e7 commit 0ed5832

10 files changed

Lines changed: 285 additions & 24 deletions

File tree

docs/source/contributor-guide/expression-audits/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ map_funcs <map_funcs>
4040
math_funcs <math_funcs>
4141
misc_funcs <misc_funcs>
4242
predicate_funcs <predicate_funcs>
43+
string_funcs <string_funcs>
4344
struct_funcs <struct_funcs>
4445
url_funcs <url_funcs>
4546
window_funcs <window_funcs>

docs/source/contributor-guide/expression-audits/string_funcs.md

Lines changed: 253 additions & 0 deletions
Large diffs are not rendered by default.

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
227227
classOf[StringSplit] -> CometStringSplit,
228228
classOf[StringTranslate] -> CometScalarFunction("translate"),
229229
classOf[StringTrim] -> CometScalarFunction("trim"),
230-
classOf[StringTrimBoth] -> CometScalarFunction("btrim"),
231230
classOf[StringTrimLeft] -> CometScalarFunction("ltrim"),
232231
classOf[StringTrimRight] -> CometScalarFunction("rtrim"),
233232
classOf[Left] -> CometLeft,

spark/src/main/scala/org/apache/comet/serde/strings.scala

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -349,18 +349,22 @@ object CometRLike extends CometExpressionSerde[RLike] {
349349
}
350350
}
351351

352+
private object PadReasons {
353+
val literalStrReason = "Scalar values are not supported for the `str` argument."
354+
val nonLiteralPadReason = "Only scalar values are supported for the `pad` argument."
355+
}
356+
352357
object CometStringRPad extends CometExpressionSerde[StringRPad] {
353358

354-
override def getUnsupportedReasons(): Seq[String] = Seq(
355-
"Scalar values are not supported for the `str` argument." +
356-
" Only scalar values are supported for the `pad` argument.")
359+
override def getUnsupportedReasons(): Seq[String] =
360+
Seq(PadReasons.literalStrReason, PadReasons.nonLiteralPadReason)
357361

358362
override def getSupportLevel(expr: StringRPad): SupportLevel = {
359363
if (expr.str.isInstanceOf[Literal]) {
360-
return Unsupported(Some("Scalar values are not supported for the str argument"))
364+
return Unsupported(Some(PadReasons.literalStrReason))
361365
}
362366
if (!expr.pad.isInstanceOf[Literal]) {
363-
return Unsupported(Some("Only scalar values are supported for the pad argument"))
367+
return Unsupported(Some(PadReasons.nonLiteralPadReason))
364368
}
365369
Compatible()
366370
}
@@ -380,16 +384,15 @@ object CometStringRPad extends CometExpressionSerde[StringRPad] {
380384

381385
object CometStringLPad extends CometExpressionSerde[StringLPad] {
382386

383-
override def getUnsupportedReasons(): Seq[String] = Seq(
384-
"Scalar values are not supported for the `str` argument." +
385-
" Only scalar values are supported for the `pad` argument.")
387+
override def getUnsupportedReasons(): Seq[String] =
388+
Seq(PadReasons.literalStrReason, PadReasons.nonLiteralPadReason)
386389

387390
override def getSupportLevel(expr: StringLPad): SupportLevel = {
388391
if (expr.str.isInstanceOf[Literal]) {
389-
return Unsupported(Some("Scalar values are not supported for the str argument"))
392+
return Unsupported(Some(PadReasons.literalStrReason))
390393
}
391394
if (!expr.pad.isInstanceOf[Literal]) {
392-
return Unsupported(Some("Only scalar values are supported for the pad argument"))
395+
return Unsupported(Some(PadReasons.nonLiteralPadReason))
393396
}
394397
Compatible()
395398
}

spark/src/test/resources/sql-tests/expressions/string/lower_enabled.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
-- specific language governing permissions and limitations
1616
-- under the License.
1717

18-
-- Test lower() with case conversion enabled (happy path)
19-
-- Config: spark.comet.caseConversion.enabled=true
18+
-- Test lower() with the standard allowIncompatible opt-in (happy path)
19+
-- Config: spark.comet.expression.Lower.allowIncompatible=true
2020

2121
statement
2222
CREATE TABLE test_lower_enabled(s string) USING parquet

spark/src/test/resources/sql-tests/expressions/string/string_lpad.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ CREATE TABLE test_lpad(s string, len int, pad string) USING parquet
2121
statement
2222
INSERT INTO test_lpad VALUES ('hi', 5, 'x'), ('hello', 3, 'x'), ('hi', 5, 'xy'), ('', 3, 'a'), (NULL, 5, 'x'), ('hi', 0, 'x'), ('hi', -1, 'x')
2323

24-
query expect_fallback(Only scalar values are supported for the pad argument)
24+
query expect_fallback(Only scalar values are supported for the `pad` argument)
2525
SELECT lpad(s, len, pad) FROM test_lpad
2626

2727
query
@@ -32,5 +32,5 @@ query
3232
SELECT lpad(s, 5, 'x') FROM test_lpad
3333

3434
-- literal + literal + literal
35-
query expect_fallback(Scalar values are not supported for the str argument)
35+
query expect_fallback(Scalar values are not supported for the `str` argument)
3636
SELECT lpad('hi', 5, 'x'), lpad('hello', 3, 'x'), lpad('', 3, 'a'), lpad(NULL, 5, 'x')

spark/src/test/resources/sql-tests/expressions/string/string_rpad.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ CREATE TABLE test_rpad(s string, len int, pad string) USING parquet
2121
statement
2222
INSERT INTO test_rpad VALUES ('hi', 5, 'x'), ('hello', 3, 'x'), ('hi', 5, 'xy'), ('', 3, 'a'), (NULL, 5, 'x'), ('hi', 0, 'x'), ('hi', -1, 'x')
2323

24-
query expect_fallback(Only scalar values are supported for the pad argument)
24+
query expect_fallback(Only scalar values are supported for the `pad` argument)
2525
SELECT rpad(s, len, pad) FROM test_rpad
2626

2727
query
@@ -32,5 +32,5 @@ query
3232
SELECT rpad(s, 5, 'x') FROM test_rpad
3333

3434
-- literal + literal + literal
35-
query expect_fallback(Scalar values are not supported for the str argument)
35+
query expect_fallback(Scalar values are not supported for the `str` argument)
3636
SELECT rpad('hi', 5, 'x'), rpad('hello', 3, 'x'), rpad('', 3, 'a'), rpad(NULL, 5, 'x')

spark/src/test/resources/sql-tests/expressions/string/upper_enabled.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
-- specific language governing permissions and limitations
1616
-- under the License.
1717

18-
-- Test upper() with case conversion enabled (happy path)
19-
-- Config: spark.comet.caseConversion.enabled=true
18+
-- Test upper() with the standard allowIncompatible opt-in (happy path)
19+
-- Config: spark.comet.expression.Upper.allowIncompatible=true
2020

2121
statement
2222
CREATE TABLE test_upper_enabled(s string) USING parquet

spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,11 @@ class CometStringExpressionSuite extends CometTestBase {
8989
} else if (isLiteralStr) {
9090
checkSparkAnswerAndFallbackReason(
9191
sql,
92-
"Scalar values are not supported for the str argument")
92+
"Scalar values are not supported for the `str` argument")
9393
} else if (!isLiteralPad) {
9494
checkSparkAnswerAndFallbackReason(
9595
sql,
96-
"Only scalar values are supported for the pad argument")
96+
"Only scalar values are supported for the `pad` argument")
9797
} else {
9898
checkSparkAnswerAndOperator(sql)
9999
}
@@ -261,7 +261,9 @@ class CometStringExpressionSuite extends CometTestBase {
261261
}
262262

263263
test("Upper and Lower") {
264-
withSQLConf(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") {
264+
withSQLConf(
265+
CometConf.getExprAllowIncompatConfigKey("Upper") -> "true",
266+
CometConf.getExprAllowIncompatConfigKey("Lower") -> "true") {
265267
val table = "names"
266268
withTable(table) {
267269
sql(s"create table $table(id int, name varchar(20)) using parquet")
@@ -339,7 +341,7 @@ class CometStringExpressionSuite extends CometTestBase {
339341
}
340342

341343
test("trim") {
342-
withSQLConf(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") {
344+
withSQLConf(CometConf.getExprAllowIncompatConfigKey("Upper") -> "true") {
343345
val table = "test"
344346
withTable(table) {
345347
sql(s"create table $table(col varchar(20)) using parquet")

spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
8686
dir,
8787
spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1 FROM $tbl"))
8888

89-
val extraConfigs = Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true")
89+
val extraConfigs = Map(
90+
CometConf.getExprAllowIncompatConfigKey("Upper") -> "true",
91+
CometConf.getExprAllowIncompatConfigKey("Lower") -> "true",
92+
CometConf.getExprAllowIncompatConfigKey("InitCap") -> "true")
9093

9194
stringExpressions.foreach { config =>
9295
val allConfigs = extraConfigs ++ config.extraCometConfigs

0 commit comments

Comments
 (0)