Skip to content

Commit ac165e8

Browse files
committed
Fix dropna bug when mode
1 parent 8943c97 commit ac165e8

File tree

8 files changed

+37
-15
lines changed

8 files changed

+37
-15
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,7 @@ Other
838838
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
839839
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
840840
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
841+
- Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`)
841842
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
842843
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
843844
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`)

pandas/_libs/hashtable_func_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
430430

431431
if na_counter > 0:
432432
res_mask = np.zeros(j+1, dtype=np.bool_)
433-
res_mask[j] = True
433+
res_mask[j] = (na_counter == max_count)
434434
return modes[:j + 1], res_mask
435435

436436

pandas/core/algorithms.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ def duplicated(
987987

988988
def mode(
989989
values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
990-
) -> ArrayLike:
990+
) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray:
991991
"""
992992
Returns the mode(s) of an array.
993993
@@ -1000,7 +1000,7 @@ def mode(
10001000
10011001
Returns
10021002
-------
1003-
np.ndarray or ExtensionArray
1003+
Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray]
10041004
"""
10051005
values = _ensure_arraylike(values, func_name="mode")
10061006
original = values
@@ -1014,8 +1014,10 @@ def mode(
10141014
values = _ensure_data(values)
10151015

10161016
npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
1017-
if res_mask is not None:
1018-
return npresult, res_mask # type: ignore[return-value]
1017+
if res_mask is None:
1018+
res_mask = np.zeros(npresult.shape, dtype=np.bool_)
1019+
else:
1020+
return npresult, res_mask
10191021

10201022
try:
10211023
npresult = safe_sort(npresult)
@@ -1026,7 +1028,7 @@ def mode(
10261028
)
10271029

10281030
result = _reconstruct_data(npresult, original.dtype, original)
1029-
return result
1031+
return result, res_mask
10301032

10311033

10321034
def rank(

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2477,7 +2477,7 @@ def _mode(self, dropna: bool = True) -> Categorical:
24772477
if dropna:
24782478
mask = self.isna()
24792479

2480-
res_codes = algorithms.mode(codes, mask=mask)
2480+
res_codes, _ = algorithms.mode(codes, mask=mask)
24812481
res_codes = cast(np.ndarray, res_codes)
24822482
assert res_codes.dtype == codes.dtype
24832483
res = self._from_backing_data(res_codes)

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1635,7 +1635,7 @@ def _mode(self, dropna: bool = True):
16351635
if dropna:
16361636
mask = self.isna()
16371637

1638-
i8modes = algorithms.mode(self.view("i8"), mask=mask)
1638+
i8modes, _ = algorithms.mode(self.view("i8"), mask=mask)
16391639
npmodes = i8modes.view(self._ndarray.dtype)
16401640
npmodes = cast(np.ndarray, npmodes)
16411641
return self._from_backing_data(npmodes)

pandas/core/arrays/masked.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1099,12 +1099,8 @@ def value_counts(self, dropna: bool = True) -> Series:
10991099
return Series(arr, index=index, name="count", copy=False)
11001100

11011101
def _mode(self, dropna: bool = True) -> Self:
1102-
if dropna:
1103-
result = mode(self._data, dropna=dropna, mask=self._mask)
1104-
res_mask = np.zeros(result.shape, dtype=np.bool_)
1105-
else:
1106-
result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
1107-
result = type(self)(result, res_mask) # type: ignore[arg-type]
1102+
result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
1103+
result = type(self)(result, res_mask)
11081104
return result[result.argsort()]
11091105

11101106
@doc(ExtensionArray.equals)

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2071,7 +2071,7 @@ def mode(self, dropna: bool = True) -> Series:
20712071
# TODO: Add option for bins like value_counts()
20722072
values = self._values
20732073
if isinstance(values, np.ndarray):
2074-
res_values = algorithms.mode(values, dropna=dropna)
2074+
res_values, _ = algorithms.mode(values, dropna=dropna)
20752075
else:
20762076
res_values = values._mode(dropna=dropna)
20772077

pandas/tests/series/test_reductions.py

+23
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric_ea_dtype):
5151
tm.assert_series_equal(result, expected)
5252

5353

54+
def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype):
55+
# GH##58926
56+
ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype)
57+
result = ser.mode(dropna=False)
58+
expected = Series([1], dtype=any_numeric_ea_dtype)
59+
tm.assert_series_equal(result, expected)
60+
61+
ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype)
62+
result = ser2.mode(dropna=False)
63+
expected = Series([1], dtype=any_numeric_ea_dtype)
64+
tm.assert_series_equal(result, expected)
65+
66+
ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
67+
result = ser3.mode(dropna=False)
68+
expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
69+
tm.assert_series_equal(result, expected)
70+
71+
ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
72+
result = ser4.mode(dropna=False)
73+
expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
74+
tm.assert_series_equal(result, expected)
75+
76+
5477
def test_mode_infer_string():
5578
# GH#56183
5679
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)