Skip to content

Commit 0b421b0

Browse files
[SPARK-56561][DOCS] Document order preservation for array_distinct, array_intersect, array_union, array_except
1 parent c241d5a commit 0b421b0

3 files changed

Lines changed: 28 additions & 10 deletions

File tree

python/pyspark/sql/functions/builtin.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19651,6 +19651,8 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
1965119651
def array_distinct(col: "ColumnOrName") -> Column:
1965219652
"""
1965319653
Array function: removes duplicate values from the array.
19654+
The order of elements in the result is the same as the order of their first occurrence
19655+
in the input.
1965419656

1965519657
.. versionadded:: 2.4.0
1965619658

@@ -19830,7 +19832,7 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An
1983019832
def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1983119833
"""
1983219834
Array function: returns a new array containing the intersection of elements in col1 and col2,
19833-
without duplicates.
19835+
without duplicates. The result preserves the order of elements from the first array.
1983419836

1983519837
.. versionadded:: 2.4.0
1983619838

@@ -19923,7 +19925,8 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1992319925
def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1992419926
"""
1992519927
Array function: returns a new array containing the union of elements in col1 and col2,
19926-
without duplicates.
19928+
without duplicates. The result preserves the order of elements from the first array,
19929+
followed by elements from the second array that are not in the first.
1992719930

1992819931
.. versionadded:: 2.4.0
1992919932

@@ -20016,7 +20019,7 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
2001620019
def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
2001720020
"""
2001820021
Array function: returns a new array containing the elements present in col1 but not in col2,
20019-
without duplicates.
20022+
without duplicates. The result preserves the order of elements from the first array.
2002020023

2002120024
.. versionadded:: 2.4.0
2002220025

sql/api/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8845,15 +8845,16 @@ object functions {
88458845
Column.fn("array_prepend", column, lit(element))
88468846

88478847
/**
8848-
* Removes duplicate values from the array.
8848+
* Removes duplicate values from the array. The order of elements in the result is the same as
8849+
* the order of their first occurrence in the input.
88498850
* @group array_funcs
88508851
* @since 2.4.0
88518852
*/
88528853
def array_distinct(e: Column): Column = Column.fn("array_distinct", e)
88538854

88548855
/**
88558856
* Returns an array of the elements in the intersection of the given two arrays, without
8856-
* duplicates.
8857+
* duplicates. The result preserves the order of elements from the first array.
88578858
*
88588859
* @group array_funcs
88598860
* @since 2.4.0
@@ -8872,6 +8873,8 @@ object functions {
88728873

88738874
/**
88748875
* Returns an array of the elements in the union of the given two arrays, without duplicates.
8876+
* The result preserves the order of elements from the first array, followed by elements from
8877+
* the second array that are not in the first.
88758878
*
88768879
* @group array_funcs
88778880
* @since 2.4.0
@@ -8881,7 +8884,7 @@ object functions {
88818884

88828885
/**
88838886
* Returns an array of the elements in the first array but not in the second array, without
8884-
* duplicates. The order of elements in the result is not determined
8887+
* duplicates. The result preserves the order of elements from the first array.
88858888
*
88868889
* @group array_funcs
88878890
* @since 2.4.0

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4200,9 +4200,13 @@ trait ArraySetLike {
42004200

42014201
/**
42024202
* Removes duplicate values from the array.
4203+
* The order of elements in the result is the same as the order of their first occurrence
4204+
* in the input.
42034205
*/
42044206
@ExpressionDescription(
4205-
usage = "_FUNC_(array) - Removes duplicate values from the array.",
4207+
usage = """_FUNC_(array) - Removes duplicate values from the array.
4208+
The order of elements in the result is the same as the order of their first occurrence
4209+
in the input.""",
42064210
examples = """
42074211
Examples:
42084212
> SELECT _FUNC_(array(1, 2, 3, null, 3));
@@ -4391,12 +4395,16 @@ trait ArrayBinaryLike
43914395
}
43924396

43934397
/**
4394-
* Returns an array of the elements in the union of x and y, without duplicates
4398+
* Returns an array of the elements in the union of x and y, without duplicates.
4399+
* The result preserves the order of elements from the first array, followed by elements
4400+
* from the second array that are not in the first.
43954401
*/
43964402
@ExpressionDescription(
43974403
usage = """
43984404
_FUNC_(array1, array2) - Returns an array of the elements in the union of array1 and array2,
43994405
without duplicates.
4406+
The result preserves the order of elements from the first array, followed by elements
4407+
from the second array that are not in the first.
44004408
""",
44014409
examples = """
44024410
Examples:
@@ -4568,12 +4576,14 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
45684576
}
45694577

45704578
/**
4571-
* Returns an array of the elements in the intersect of x and y, without duplicates
4579+
* Returns an array of the elements in the intersect of x and y, without duplicates.
4580+
* The result preserves the order of elements from the first array.
45724581
*/
45734582
@ExpressionDescription(
45744583
usage = """
45754584
_FUNC_(array1, array2) - Returns an array of the elements in the intersection of array1 and
45764585
array2, without duplicates.
4586+
The result preserves the order of elements from the first array.
45774587
""",
45784588
examples = """
45794589
Examples:
@@ -4800,12 +4810,14 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
48004810
}
48014811

48024812
/**
4803-
* Returns an array of the elements in the intersect of x and y, without duplicates
4813+
* Returns an array of the elements in array1 but not in array2, without duplicates.
4814+
* The result preserves the order of elements from the first array.
48044815
*/
48054816
@ExpressionDescription(
48064817
usage = """
48074818
_FUNC_(array1, array2) - Returns an array of the elements in array1 but not in array2,
48084819
without duplicates.
4820+
The result preserves the order of elements from the first array.
48094821
""",
48104822
examples = """
48114823
Examples:

0 commit comments

Comments
 (0)