Skip to content

Commit 882fa9c

Browse files
yuanx749veljaninmroeschke
authored
BUG: Fix inconsistency of converting empty categorical with dtype_backend='pyarrow' (#61131)
* Handling the case where converting empty categorical to 'pyarrow' dtype_backend results in error. Since conversion of non-empty categorical returns categorical of 'numpy_nullable' dtype_backend, now, instead of raising an error, we ensure empty categorical is returned as well. * additional revisions * removing the change for input_array... * reverting newline in Series.convert_dtypes and precising respective docs in whatsnew * revised testing to resolve CI errors v2 * Finish the pr * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * paramterize test * move condition --------- Co-authored-by: veljanin <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 6b6393d commit 882fa9c

File tree

4 files changed

+27
-0
lines changed

4 files changed

+27
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ Bug fixes
636636
Categorical
637637
^^^^^^^^^^^
638638
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
639+
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
639640
-
640641

641642
Datetimelike

pandas/core/dtypes/cast.py

+1
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,7 @@ def convert_dtypes(
11271127
or (
11281128
inferred_dtype.kind not in "iufcb"
11291129
and not isinstance(inferred_dtype, StringDtype)
1130+
and not isinstance(inferred_dtype, CategoricalDtype)
11301131
)
11311132
):
11321133
if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(

pandas/tests/frame/methods/test_convert_dtypes.py

+15
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79
import pandas._testing as tm
810

@@ -35,6 +37,19 @@ def test_convert_empty(self):
3537
empty_df = pd.DataFrame()
3638
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
3739

40+
@td.skip_if_no("pyarrow")
41+
def test_convert_empty_categorical_to_pyarrow(self):
42+
# GH#59934
43+
df = pd.DataFrame(
44+
{
45+
"A": pd.Categorical([None] * 5),
46+
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
47+
}
48+
)
49+
converted = df.convert_dtypes(dtype_backend="pyarrow")
50+
expected = df
51+
tm.assert_frame_equal(converted, expected)
52+
3853
def test_convert_dtypes_retain_column_names(self):
3954
# GH#41435
4055
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

pandas/tests/series/methods/test_convert_dtypes.py

+10
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from pandas._libs import lib
7+
import pandas.util._test_decorators as td
78

89
import pandas as pd
910
import pandas._testing as tm
@@ -298,6 +299,15 @@ def test_convert_dtypes_pyarrow_null(self):
298299
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
299300
tm.assert_series_equal(result, expected)
300301

302+
@td.skip_if_no("pyarrow")
303+
@pytest.mark.parametrize("categories", [None, ["S1", "S2"]])
304+
def test_convert_empty_categorical_to_pyarrow(self, categories):
305+
# GH#59934
306+
ser = pd.Series(pd.Categorical([None] * 5, categories=categories))
307+
converted = ser.convert_dtypes(dtype_backend="pyarrow")
308+
expected = ser
309+
tm.assert_series_equal(converted, expected)
310+
301311
def test_convert_dtype_pyarrow_timezone_preserve(self):
302312
# GH 60237
303313
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)