Skip to content

Commit 9769544

Browse files
committed
Merge remote-tracking branch 'upstream/main' into convert-dtype-pyarrow
2 parents 90d6251 + 6b6393d commit 9769544

17 files changed

+211
-49
lines changed

.github/workflows/wheels.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
154154

155155
- name: Build wheels
156-
uses: pypa/[email protected].0
156+
uses: pypa/[email protected].1
157157
with:
158158
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
159159
env:

ci/code_checks.sh

-4
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
75-
-i "pandas.Timedelta.max PR02" \
76-
-i "pandas.Timedelta.min PR02" \
77-
-i "pandas.Timedelta.resolution PR02" \
7875
-i "pandas.Timestamp.max PR02" \
7976
-i "pandas.Timestamp.min PR02" \
8077
-i "pandas.Timestamp.resolution PR02" \
81-
-i "pandas.Timestamp.tzinfo GL08" \
8278
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
8379
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
8480
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \

doc/source/whatsnew/v3.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,7 @@ Indexing
704704
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
705705
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
706706
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
707+
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
707708

708709
Missing
709710
^^^^^^^
@@ -839,6 +840,7 @@ Other
839840
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
840841
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
841842
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
843+
- Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`)
842844
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
843845
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
844846
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`)

pandas/_libs/hashtable_func_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
430430

431431
if na_counter > 0:
432432
res_mask = np.zeros(j+1, dtype=np.bool_)
433-
res_mask[j] = True
433+
res_mask[j] = (na_counter == max_count)
434434
return modes[:j + 1], res_mask
435435

436436

pandas/_libs/tslibs/timedeltas.pyx

+77-6
Original file line numberDiff line numberDiff line change
@@ -998,8 +998,9 @@ class MinMaxReso:
998998
and Timedelta class. On an instance, these depend on the object's _reso.
999999
On the class, we default to the values we would get with nanosecond _reso.
10001000
"""
1001-
def __init__(self, name):
1001+
def __init__(self, name, docstring):
10021002
self._name = name
1003+
self.__doc__ = docstring
10031004

10041005
def __get__(self, obj, type=None):
10051006
if self._name == "min":
@@ -1012,9 +1013,13 @@ class MinMaxReso:
10121013

10131014
if obj is None:
10141015
# i.e. this is on the class, default to nanos
1015-
return Timedelta(val)
1016+
result = Timedelta(val)
10161017
else:
1017-
return Timedelta._from_value_and_reso(val, obj._creso)
1018+
result = Timedelta._from_value_and_reso(val, obj._creso)
1019+
1020+
result.__doc__ = self.__doc__
1021+
1022+
return result
10181023

10191024
def __set__(self, obj, value):
10201025
raise AttributeError(f"{self._name} is not settable.")
@@ -1033,9 +1038,75 @@ cdef class _Timedelta(timedelta):
10331038

10341039
# higher than np.ndarray and np.matrix
10351040
__array_priority__ = 100
1036-
min = MinMaxReso("min")
1037-
max = MinMaxReso("max")
1038-
resolution = MinMaxReso("resolution")
1041+
1042+
_docstring_min = """
1043+
Returns the minimum bound possible for Timedelta.
1044+
1045+
This property provides access to the smallest possible value that
1046+
can be represented by a Timedelta object.
1047+
1048+
Returns
1049+
-------
1050+
Timedelta
1051+
1052+
See Also
1053+
--------
1054+
Timedelta.max: Returns the maximum bound possible for Timedelta.
1055+
Timedelta.resolution: Returns the smallest possible difference between
1056+
non-equal Timedelta objects.
1057+
1058+
Examples
1059+
--------
1060+
>>> pd.Timedelta.min
1061+
-106752 days +00:12:43.145224193
1062+
"""
1063+
1064+
_docstring_max = """
1065+
Returns the maximum bound possible for Timedelta.
1066+
1067+
This property provides access to the largest possible value that
1068+
can be represented by a Timedelta object.
1069+
1070+
Returns
1071+
-------
1072+
Timedelta
1073+
1074+
See Also
1075+
--------
1076+
Timedelta.min: Returns the minimum bound possible for Timedelta.
1077+
Timedelta.resolution: Returns the smallest possible difference between
1078+
non-equal Timedelta objects.
1079+
1080+
Examples
1081+
--------
1082+
>>> pd.Timedelta.max
1083+
106751 days 23:47:16.854775807
1084+
"""
1085+
1086+
_docstring_reso = """
1087+
Returns the smallest possible difference between non-equal Timedelta objects.
1088+
1089+
The resolution value is determined by the underlying representation of time
1090+
units and is equivalent to Timedelta(nanoseconds=1).
1091+
1092+
Returns
1093+
-------
1094+
Timedelta
1095+
1096+
See Also
1097+
--------
1098+
Timedelta.max: Returns the maximum bound possible for Timedelta.
1099+
Timedelta.min: Returns the minimum bound possible for Timedelta.
1100+
1101+
Examples
1102+
--------
1103+
>>> pd.Timedelta.resolution
1104+
0 days 00:00:00.000000001
1105+
"""
1106+
1107+
min = MinMaxReso("min", _docstring_min)
1108+
max = MinMaxReso("max", _docstring_max)
1109+
resolution = MinMaxReso("resolution", _docstring_reso)
10391110

10401111
@property
10411112
def value(self):

pandas/_libs/tslibs/timestamps.pyx

+28
Original file line numberDiff line numberDiff line change
@@ -2208,6 +2208,34 @@ class Timestamp(_Timestamp):
22082208
"""
22092209
return super().tzname()
22102210
2211+
@property
2212+
def tzinfo(self):
2213+
"""
2214+
Returns the timezone info of the Timestamp.
2215+
2216+
This property returns a `datetime.tzinfo` object if the Timestamp
2217+
is timezone-aware. If the Timestamp has no timezone, it returns `None`.
2218+
If the Timestamp is in UTC or a fixed-offset timezone,
2219+
it returns `datetime.timezone`. If the Timestamp uses an
2220+
IANA timezone (e.g., "America/New_York"), it returns `zoneinfo.ZoneInfo`.
2221+
2222+
See Also
2223+
--------
2224+
Timestamp.tz : Alias for `tzinfo`, may return a `zoneinfo.ZoneInfo` object.
2225+
Timestamp.tz_convert : Convert timezone-aware Timestamp to another time zone.
2226+
Timestamp.tz_localize : Localize the Timestamp to a specific timezone.
2227+
2228+
Examples
2229+
--------
2230+
>>> ts = pd.Timestamp("2023-01-01 12:00:00", tz="UTC")
2231+
>>> ts.tzinfo
2232+
datetime.timezone.utc
2233+
2234+
>>> ts_naive = pd.Timestamp("2023-01-01 12:00:00")
2235+
>>> ts_naive.tzinfo
2236+
"""
2237+
return super().tzinfo
2238+
22112239
def utcoffset(self):
22122240
"""
22132241
Return utc offset.

pandas/core/algorithms.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ def duplicated(
987987

988988
def mode(
989989
values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
990-
) -> ArrayLike:
990+
) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray:
991991
"""
992992
Returns the mode(s) of an array.
993993
@@ -1000,7 +1000,7 @@ def mode(
10001000
10011001
Returns
10021002
-------
1003-
np.ndarray or ExtensionArray
1003+
Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray]
10041004
"""
10051005
values = _ensure_arraylike(values, func_name="mode")
10061006
original = values
@@ -1014,8 +1014,10 @@ def mode(
10141014
values = _ensure_data(values)
10151015

10161016
npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
1017-
if res_mask is not None:
1018-
return npresult, res_mask # type: ignore[return-value]
1017+
if res_mask is None:
1018+
res_mask = np.zeros(npresult.shape, dtype=np.bool_)
1019+
else:
1020+
return npresult, res_mask
10191021

10201022
try:
10211023
npresult = safe_sort(npresult)
@@ -1026,7 +1028,7 @@ def mode(
10261028
)
10271029

10281030
result = _reconstruct_data(npresult, original.dtype, original)
1029-
return result
1031+
return result, res_mask
10301032

10311033

10321034
def rank(

pandas/core/arrays/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2511,8 +2511,9 @@ def _mode(self, dropna: bool = True) -> Self:
25112511
Sorted, if possible.
25122512
"""
25132513
# error: Incompatible return value type (got "Union[ExtensionArray,
2514-
# ndarray[Any, Any]]", expected "Self")
2515-
return mode(self, dropna=dropna) # type: ignore[return-value]
2514+
# Tuple[np.ndarray, npt.NDArray[np.bool_]]", expected "Self")
2515+
result, _ = mode(self, dropna=dropna)
2516+
return result # type: ignore[return-value]
25162517

25172518
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
25182519
if any(

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2477,7 +2477,7 @@ def _mode(self, dropna: bool = True) -> Categorical:
24772477
if dropna:
24782478
mask = self.isna()
24792479

2480-
res_codes = algorithms.mode(codes, mask=mask)
2480+
res_codes, _ = algorithms.mode(codes, mask=mask)
24812481
res_codes = cast(np.ndarray, res_codes)
24822482
assert res_codes.dtype == codes.dtype
24832483
res = self._from_backing_data(res_codes)

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1635,7 +1635,7 @@ def _mode(self, dropna: bool = True):
16351635
if dropna:
16361636
mask = self.isna()
16371637

1638-
i8modes = algorithms.mode(self.view("i8"), mask=mask)
1638+
i8modes, _ = algorithms.mode(self.view("i8"), mask=mask)
16391639
npmodes = i8modes.view(self._ndarray.dtype)
16401640
npmodes = cast(np.ndarray, npmodes)
16411641
return self._from_backing_data(npmodes)

pandas/core/arrays/masked.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1099,12 +1099,8 @@ def value_counts(self, dropna: bool = True) -> Series:
10991099
return Series(arr, index=index, name="count", copy=False)
11001100

11011101
def _mode(self, dropna: bool = True) -> Self:
1102-
if dropna:
1103-
result = mode(self._data, dropna=dropna, mask=self._mask)
1104-
res_mask = np.zeros(result.shape, dtype=np.bool_)
1105-
else:
1106-
result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
1107-
result = type(self)(result, res_mask) # type: ignore[arg-type]
1102+
result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
1103+
result = type(self)(result, res_mask)
11081104
return result[result.argsort()]
11091105

11101106
@doc(ExtensionArray.equals)

pandas/core/internals/blocks.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,9 @@ class Block(PandasObject, libinternals.Block):
155155
def _validate_ndim(self) -> bool:
156156
"""
157157
We validate dimension for blocks that can hold 2D values, which for now
158-
means numpy dtypes or DatetimeTZDtype.
158+
means numpy dtypes or EA dtypes like DatetimeTZDtype and PeriodDtype.
159159
"""
160-
dtype = self.dtype
161-
return not isinstance(dtype, ExtensionDtype) or isinstance(
162-
dtype, DatetimeTZDtype
163-
)
160+
return not is_1d_only_ea_dtype(self.dtype)
164161

165162
@final
166163
@cache_readonly

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2071,7 +2071,7 @@ def mode(self, dropna: bool = True) -> Series:
20712071
# TODO: Add option for bins like value_counts()
20722072
values = self._values
20732073
if isinstance(values, np.ndarray):
2074-
res_values = algorithms.mode(values, dropna=dropna)
2074+
res_values, _ = algorithms.mode(values, dropna=dropna)
20752075
else:
20762076
res_values = values._mode(dropna=dropna)
20772077

pandas/tests/indexing/test_indexing.py

+21
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,27 @@ def test_partial_boolean_frame_indexing(self):
763763
)
764764
tm.assert_frame_equal(result, expected)
765765

766+
def test_period_column_slicing(self):
767+
# GH#60273 The transpose operation creates a single 5x1 block of PeriodDtype
768+
# Make sure it is reindexed correctly
769+
df = DataFrame(
770+
pd.period_range("2021-01-01", periods=5, freq="D"),
771+
columns=["A"],
772+
).T
773+
result = df[[0, 1, 2]]
774+
expected = DataFrame(
775+
[
776+
[
777+
pd.Period("2021-01-01", freq="D"),
778+
pd.Period("2021-01-02", freq="D"),
779+
pd.Period("2021-01-03", freq="D"),
780+
]
781+
],
782+
index=["A"],
783+
columns=[0, 1, 2],
784+
)
785+
tm.assert_frame_equal(result, expected)
786+
766787
def test_no_reference_cycle(self):
767788
df = DataFrame({"a": [0, 1], "b": [2, 3]})
768789
for name in ("loc", "iloc", "at", "iat"):

pandas/tests/internals/test_internals.py

+10
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,16 @@ def test_period_can_hold_element(self, element):
13201320
with pytest.raises(TypeError, match="Invalid value"):
13211321
self.check_series_setitem(elem, pi, False)
13221322

1323+
def test_period_reindex_axis(self):
1324+
# GH#60273 Test reindexing of block with PeriodDtype
1325+
pi = period_range("2020", periods=5, freq="Y")
1326+
blk = new_block(pi._data.reshape(5, 1), BlockPlacement(slice(5)), ndim=2)
1327+
mgr = BlockManager(blocks=(blk,), axes=[Index(np.arange(5)), Index(["a"])])
1328+
reindexed = mgr.reindex_axis(Index([0, 2, 4]), axis=0)
1329+
result = DataFrame._from_mgr(reindexed, axes=reindexed.axes)
1330+
expected = DataFrame([[pi[0], pi[2], pi[4]]], columns=[0, 2, 4], index=["a"])
1331+
tm.assert_frame_equal(result, expected)
1332+
13231333
def check_can_hold_element(self, obj, elem, inplace: bool):
13241334
blk = obj._mgr.blocks[0]
13251335
if inplace:

pandas/tests/series/test_reductions.py

+23
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric_ea_dtype):
5151
tm.assert_series_equal(result, expected)
5252

5353

54+
def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype):
55+
# GH##58926
56+
ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype)
57+
result = ser.mode(dropna=False)
58+
expected = Series([1], dtype=any_numeric_ea_dtype)
59+
tm.assert_series_equal(result, expected)
60+
61+
ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype)
62+
result = ser2.mode(dropna=False)
63+
expected = Series([1], dtype=any_numeric_ea_dtype)
64+
tm.assert_series_equal(result, expected)
65+
66+
ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
67+
result = ser3.mode(dropna=False)
68+
expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
69+
tm.assert_series_equal(result, expected)
70+
71+
ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
72+
result = ser4.mode(dropna=False)
73+
expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
74+
tm.assert_series_equal(result, expected)
75+
76+
5477
def test_mode_infer_string():
5578
# GH#56183
5679
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)