BUG: DataFrame.explode doesn't work for pyarrow.large_list type (#61105)

snitish · web-flow · commit cdc9e952f139 · 2025-03-11T15:20:33.000-07:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -782,6 +782,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
+- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
 - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1938,7 +1938,10 @@ def _explode(self):
         """
         # child class explode method supports only list types; return
         # default implementation for non list types.
-        if not pa.types.is_list(self.dtype.pyarrow_dtype):
+        if not (
+            pa.types.is_list(self.dtype.pyarrow_dtype)
+            or pa.types.is_large_list(self.dtype.pyarrow_dtype)
+        ):
             return super()._explode()
         values = self
         counts = pa.compute.list_value_length(values._pa_array)
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -145,8 +145,9 @@ def test_explode_scalars_can_ignore_index():
 
 
 @pytest.mark.parametrize("ignore_index", [True, False])
-def test_explode_pyarrow_list_type(ignore_index):
-    # GH 53602
+@pytest.mark.parametrize("list_type", ["list_", "large_list"])
+def test_explode_pyarrow_list_type(ignore_index, list_type):
+    # GH 53602, 61091
     pa = pytest.importorskip("pyarrow")
 
     data = [
@@ -156,7 +157,7 @@ def test_explode_pyarrow_list_type(ignore_index):
         [2, 3],
         None,
     ]
-    ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+    ser = pd.Series(data, dtype=pd.ArrowDtype(getattr(pa, list_type)(pa.int64())))
     result = ser.explode(ignore_index=ignore_index)
     expected = pd.Series(
         data=[None, None, 1, None, 2, 3, None],