Skip to content

Commit d1ea99d

Browse files
authored
feat: opt array_intersect, array_except, array_join into codegen dispatch (#4636)
1 parent 400106f commit d1ea99d

5 files changed

Lines changed: 79 additions & 8 deletions

File tree

docs/source/user-guide/latest/expressions.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,10 @@ The tables below list every Spark built-in expression with its current status.
138138
| `array_compact` || |
139139
| `array_contains` || NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) |
140140
| `array_distinct` || NaN/signed-zero handling may differ ([details](compatibility/floating-point.md)) |
141-
| `array_except` || Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
141+
| `array_except` || Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
142142
| `array_insert` || |
143-
| `array_intersect` || Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
144-
| `array_join` || Incompatible; falls back by default ([details](compatibility/expressions/array.md)) |
143+
| `array_intersect` || Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
144+
| `array_join` || Routes through the JVM codegen dispatcher by default; the incompatible native path is opt-in via allowIncompatible ([details](compatibility/expressions/array.md)) |
145145
| `array_max` || NaN ordering may differ ([details](compatibility/floating-point.md)) |
146146
| `array_min` || NaN ordering may differ ([details](compatibility/floating-point.md)) |
147147
| `array_position` || Binary/struct/map/null elements fall back |

spark/src/main/scala/org/apache/comet/serde/arrays.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,10 @@ object CometSortArray extends CometExpressionSerde[SortArray] {
187187
}
188188
}
189189

190-
object CometArrayIntersect extends CometExpressionSerde[ArrayIntersect] with CometTypeShim {
190+
object CometArrayIntersect
191+
extends CometExpressionSerde[ArrayIntersect]
192+
with CometTypeShim
193+
with CodegenDispatchFallback {
191194

192195
private val incompatReason: String =
193196
"Result array element order may differ from Spark when the right array is longer " +
@@ -328,7 +331,10 @@ object CometArrayCompact extends CometExpressionSerde[Expression] {
328331
}
329332
}
330333

331-
object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExprShim {
334+
object CometArrayExcept
335+
extends CometExpressionSerde[ArrayExcept]
336+
with CometExprShim
337+
with CodegenDispatchFallback {
332338

333339
private val incompatReason = "Null handling and ordering may differ from Spark"
334340

@@ -372,7 +378,7 @@ object CometArrayExcept extends CometExpressionSerde[ArrayExcept] with CometExpr
372378
}
373379
}
374380

375-
object CometArrayJoin extends CometExpressionSerde[ArrayJoin] {
381+
object CometArrayJoin extends CometExpressionSerde[ArrayJoin] with CodegenDispatchFallback {
376382

377383
private val incompatReason = "Null handling may differ from Spark"
378384

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ArrayExcept mixes in CodegenDispatchFallback, so with allowIncompatible unset its Incompatible
19+
-- null-handling/ordering case routes through the JVM codegen dispatcher and matches Spark exactly,
20+
-- including the literal/literal case the native path could not handle.
21+
22+
statement
23+
CREATE TABLE test_ae_dispatch(a array<int>, b array<int>) USING parquet
24+
25+
statement
26+
INSERT INTO test_ae_dispatch VALUES (array(1, 2, 3), array(2, 3, 4)), (array(1, 2), array()), (array(), array(1)), (NULL, array(1)), (array(1, NULL), array(NULL))
27+
28+
query
29+
SELECT array_except(a, b) FROM test_ae_dispatch
30+
31+
query
32+
SELECT array_except(array(1, 2, 3), array(2, 3, 4)), array_except(array(1, 2), array()), array_except(array(), array(1)), array_except(cast(NULL as array<int>), array(1))
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ArrayIntersect mixes in CodegenDispatchFallback, so with allowIncompatible unset its
19+
-- Incompatible element-order case routes through the JVM codegen dispatcher and matches Spark
20+
-- exactly, including the right-longer-than-left case the native path orders differently (no
21+
-- sort_array workaround needed here).
22+
23+
statement
24+
CREATE TABLE test_ai_dispatch(a array<int>, b array<int>) USING parquet
25+
26+
statement
27+
INSERT INTO test_ai_dispatch VALUES (array(2, 1), array(3, 1, 2)), (array(3, 1), array(1, 2, 3, 4)), (array(1, NULL), array(NULL, 2)), (NULL, array(1))
28+
29+
query
30+
SELECT array_intersect(a, b) FROM test_ai_dispatch
31+
32+
query
33+
SELECT array_intersect(array(2, 1), array(3, 1, 2)), array_intersect(array(3, 1), array(1, 2, 3, 4))

spark/src/test/resources/sql-tests/expressions/array/array_join.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ CREATE TABLE test_array_join(arr array<string>) USING parquet
2121
statement
2222
INSERT INTO test_array_join VALUES (array('a', 'b', 'c')), (array('hello', 'world')), (array()), (NULL), (array('a', NULL, 'c'))
2323

24-
query spark_answer_only
24+
query
2525
SELECT array_join(arr, ',') FROM test_array_join
2626

27-
query spark_answer_only
27+
query
2828
SELECT array_join(arr, ',', 'NULL') FROM test_array_join

0 commit comments

Comments
 (0)