Skip to content

Commit cdc9e95

Browse files
authored
BUG: DataFrame.explode doesn't work for pyarrow.large_list type (#61105)
1 parent 40a8180 commit cdc9e95

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ Reshaping
782782
^^^^^^^^^
783783
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
784784
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
785+
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
785786
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
786787
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
787788
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)

pandas/core/arrays/arrow/array.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,10 @@ def _explode(self):
19381938
"""
19391939
# child class explode method supports only list types; return
19401940
# default implementation for non list types.
1941-
if not pa.types.is_list(self.dtype.pyarrow_dtype):
1941+
if not (
1942+
pa.types.is_list(self.dtype.pyarrow_dtype)
1943+
or pa.types.is_large_list(self.dtype.pyarrow_dtype)
1944+
):
19421945
return super()._explode()
19431946
values = self
19441947
counts = pa.compute.list_value_length(values._pa_array)

pandas/tests/series/methods/test_explode.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,9 @@ def test_explode_scalars_can_ignore_index():
145145

146146

147147
@pytest.mark.parametrize("ignore_index", [True, False])
148-
def test_explode_pyarrow_list_type(ignore_index):
149-
# GH 53602
148+
@pytest.mark.parametrize("list_type", ["list_", "large_list"])
149+
def test_explode_pyarrow_list_type(ignore_index, list_type):
150+
# GH 53602, 61091
150151
pa = pytest.importorskip("pyarrow")
151152

152153
data = [
@@ -156,7 +157,7 @@ def test_explode_pyarrow_list_type(ignore_index):
156157
[2, 3],
157158
None,
158159
]
159-
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
160+
ser = pd.Series(data, dtype=pd.ArrowDtype(getattr(pa, list_type)(pa.int64())))
160161
result = ser.explode(ignore_index=ignore_index)
161162
expected = pd.Series(
162163
data=[None, None, 1, None, 2, 3, None],

0 commit comments

Comments
 (0)