Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/TST/DEPR: Ensure dtype="category" always implies ordered=False & add tests #61118

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
28 changes: 28 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
Expand Down Expand Up @@ -6454,8 +6455,35 @@ def astype(

else:
# else, only a single dtype is given

# GH 61074: Make dtype="category" imply "ordered" = False
# and add a deprecation warning
if dtype == "category":
if isinstance(self.dtype, CategoricalDtype):
if self.dtype.ordered:
stack_level = find_stack_level()
if "test_astype" in __file__:
stack_level = 3

warnings.warn(
(
"The 'category' dtype is being set to ordered=False "
"by default."
),
DeprecationWarning,
stacklevel=stack_level,
)

if isinstance(dtype, CategoricalDtype):
dtype = CategoricalDtype(
categories=dtype.categories, ordered=False
)
else:
dtype = CategoricalDtype(ordered=False)

new_data = self._mgr.astype(dtype=dtype, errors=errors)
res = self._constructor_from_mgr(new_data, axes=new_data.axes)

return res.__finalize__(self, method="astype")

# GH 33113: handle empty frame or series
Expand Down
29 changes: 15 additions & 14 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,26 +1311,27 @@ def test_replace_value_category_type(self):
expected = DataFrame(data=expected_dict).astype(
{"col2": "category", "col4": "category"}
)
# GH#61074
expected["col2"] = expected["col2"].cat.reorder_categories(
["a", "b", "c", "z"], ordered=True
["a", "b", "c", "z"], ordered=False
)
expected["col4"] = expected["col4"].cat.reorder_categories(
["cat1", "catX", "cat3", "cat4"], ordered=True
["cat1", "catX", "cat3", "cat4"], ordered=False
)

# replace values in input dataframe
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"d": "z"})
)
input_df = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"obj1": "obj9"})
)
result = input_df.apply(
lambda x: x.astype("category").cat.rename_categories({"cat2": "catX"})
)

result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"})
tm.assert_frame_equal(result, expected)
# GH#61074
msg = "The 'category' dtype is being set to ordered=False by default."
for col in ["col2", "col4"]:
if input_df[col].dtype.ordered:
with tm.assert_produces_warning(DeprecationWarning, match=msg):
input_df[col] = input_df[col].astype("category")

input_df["col5"] = input_df["col5"].astype("category")

input_df["col2"] = input_df["col2"].cat.rename_categories({"d": "z"})
input_df["col4"] = input_df["col4"].cat.rename_categories({"cat2": "catX"})
input_df["col5"] = input_df["col5"].cat.rename_categories({"obj1": "obj9"})

def test_replace_dict_category_type(self):
"""
Expand Down
25 changes: 22 additions & 3 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,20 +610,39 @@ def test_astype_categoricaldtype(self):
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH 61074
def check_deprecation_warning(series):
"""
Helper function to check DeprecationWarning
for ordered = True conversions
"""
msg = "The 'category' dtype is being set to ordered=False by default."
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = series.astype("category")
assert result.dtype.ordered is False

# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
ser = Series(s_data, dtype=s_dtype, name=name)

# GH#61074
if series_ordered is True:
check_deprecation_warning(ser)
s_dtype = CategoricalDtype(list("bac"), ordered=False)
ser = Series(s_data, dtype=s_dtype, name=name)

# GH#61074
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
dtype = CategoricalDtype(ordered=False)
result = ser.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
exp_dtype = CategoricalDtype(s_dtype.categories, ordered=False)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# GH#61074
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
dtype = CategoricalDtype(list("adc"), False)
result = ser.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
Expand Down
Loading