Skip to content

Commit 182cfff

Browse files
authored
Merge branch 'main' into refactor-io-sql-execute
2 parents 87156b1 + c430c61 commit 182cfff

File tree

10 files changed

+77
-25
lines changed

10 files changed

+77
-25
lines changed

ci/code_checks.sh

-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8282
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
8383
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
8484
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
85-
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
8685
-i "pandas.tseries.offsets.BDay PR02,SA01" \
8786
-i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
8887
-i "pandas.tseries.offsets.BQuarterBegin.n GL08" \

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ Groupby/resample/rolling
758758
Reshaping
759759
^^^^^^^^^
760760
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
761+
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
761762
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
762763
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
763764
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)

pandas/_libs/lib.pyx

+18-18
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ cdef class BoolValidator(Validator):
18821882

18831883
cpdef bint is_bool_array(ndarray values, bint skipna=False):
18841884
cdef:
1885-
BoolValidator validator = BoolValidator(len(values),
1885+
BoolValidator validator = BoolValidator(values.size,
18861886
values.dtype,
18871887
skipna=skipna)
18881888
return validator.validate(values)
@@ -1900,7 +1900,7 @@ cdef class IntegerValidator(Validator):
19001900
# Note: only python-exposed for tests
19011901
cpdef bint is_integer_array(ndarray values, bint skipna=True):
19021902
cdef:
1903-
IntegerValidator validator = IntegerValidator(len(values),
1903+
IntegerValidator validator = IntegerValidator(values.size,
19041904
values.dtype,
19051905
skipna=skipna)
19061906
return validator.validate(values)
@@ -1915,7 +1915,7 @@ cdef class IntegerNaValidator(Validator):
19151915

19161916
cdef bint is_integer_na_array(ndarray values, bint skipna=True):
19171917
cdef:
1918-
IntegerNaValidator validator = IntegerNaValidator(len(values),
1918+
IntegerNaValidator validator = IntegerNaValidator(values.size,
19191919
values.dtype, skipna=skipna)
19201920
return validator.validate(values)
19211921

@@ -1931,7 +1931,7 @@ cdef class IntegerFloatValidator(Validator):
19311931

19321932
cdef bint is_integer_float_array(ndarray values, bint skipna=True):
19331933
cdef:
1934-
IntegerFloatValidator validator = IntegerFloatValidator(len(values),
1934+
IntegerFloatValidator validator = IntegerFloatValidator(values.size,
19351935
values.dtype,
19361936
skipna=skipna)
19371937
return validator.validate(values)
@@ -1949,7 +1949,7 @@ cdef class FloatValidator(Validator):
19491949
# Note: only python-exposed for tests
19501950
cpdef bint is_float_array(ndarray values):
19511951
cdef:
1952-
FloatValidator validator = FloatValidator(len(values), values.dtype)
1952+
FloatValidator validator = FloatValidator(values.size, values.dtype)
19531953
return validator.validate(values)
19541954

19551955

@@ -1967,7 +1967,7 @@ cdef class ComplexValidator(Validator):
19671967

19681968
cdef bint is_complex_array(ndarray values):
19691969
cdef:
1970-
ComplexValidator validator = ComplexValidator(len(values), values.dtype)
1970+
ComplexValidator validator = ComplexValidator(values.size, values.dtype)
19711971
return validator.validate(values)
19721972

19731973

@@ -1980,7 +1980,7 @@ cdef class DecimalValidator(Validator):
19801980
cdef bint is_decimal_array(ndarray values, bint skipna=False):
19811981
cdef:
19821982
DecimalValidator validator = DecimalValidator(
1983-
len(values), values.dtype, skipna=skipna
1983+
values.size, values.dtype, skipna=skipna
19841984
)
19851985
return validator.validate(values)
19861986

@@ -1996,7 +1996,7 @@ cdef class StringValidator(Validator):
19961996

19971997
cpdef bint is_string_array(ndarray values, bint skipna=False):
19981998
cdef:
1999-
StringValidator validator = StringValidator(len(values),
1999+
StringValidator validator = StringValidator(values.size,
20002000
values.dtype,
20012001
skipna=skipna)
20022002
return validator.validate(values)
@@ -2013,7 +2013,7 @@ cdef class BytesValidator(Validator):
20132013

20142014
cdef bint is_bytes_array(ndarray values, bint skipna=False):
20152015
cdef:
2016-
BytesValidator validator = BytesValidator(len(values), values.dtype,
2016+
BytesValidator validator = BytesValidator(values.size, values.dtype,
20172017
skipna=skipna)
20182018
return validator.validate(values)
20192019

@@ -2064,7 +2064,7 @@ cdef class DatetimeValidator(TemporalValidator):
20642064

20652065
cpdef bint is_datetime_array(ndarray values, bint skipna=True):
20662066
cdef:
2067-
DatetimeValidator validator = DatetimeValidator(len(values),
2067+
DatetimeValidator validator = DatetimeValidator(values.size,
20682068
skipna=skipna)
20692069
return validator.validate(values)
20702070

@@ -2078,7 +2078,7 @@ cdef class Datetime64Validator(DatetimeValidator):
20782078
# Note: only python-exposed for tests
20792079
cpdef bint is_datetime64_array(ndarray values, bint skipna=True):
20802080
cdef:
2081-
Datetime64Validator validator = Datetime64Validator(len(values),
2081+
Datetime64Validator validator = Datetime64Validator(values.size,
20822082
skipna=skipna)
20832083
return validator.validate(values)
20842084

@@ -2093,7 +2093,7 @@ cdef class AnyDatetimeValidator(DatetimeValidator):
20932093

20942094
cdef bint is_datetime_or_datetime64_array(ndarray values, bint skipna=True):
20952095
cdef:
2096-
AnyDatetimeValidator validator = AnyDatetimeValidator(len(values),
2096+
AnyDatetimeValidator validator = AnyDatetimeValidator(values.size,
20972097
skipna=skipna)
20982098
return validator.validate(values)
20992099

@@ -2105,7 +2105,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
21052105
Doesn't check values are datetime-like types.
21062106
"""
21072107
cdef:
2108-
Py_ssize_t i = 0, j, n = len(values)
2108+
Py_ssize_t i = 0, j, n = values.size
21092109
object base_val, base_tz, val, tz
21102110

21112111
if n == 0:
@@ -2153,7 +2153,7 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True):
21532153
Infer with timedeltas and/or nat/none.
21542154
"""
21552155
cdef:
2156-
AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values),
2156+
AnyTimedeltaValidator validator = AnyTimedeltaValidator(values.size,
21572157
skipna=skipna)
21582158
return validator.validate(values)
21592159

@@ -2167,7 +2167,7 @@ cdef class DateValidator(Validator):
21672167
# Note: only python-exposed for tests
21682168
cpdef bint is_date_array(ndarray values, bint skipna=False):
21692169
cdef:
2170-
DateValidator validator = DateValidator(len(values), skipna=skipna)
2170+
DateValidator validator = DateValidator(values.size, skipna=skipna)
21712171
return validator.validate(values)
21722172

21732173

@@ -2180,7 +2180,7 @@ cdef class TimeValidator(Validator):
21802180
# Note: only python-exposed for tests
21812181
cpdef bint is_time_array(ndarray values, bint skipna=False):
21822182
cdef:
2183-
TimeValidator validator = TimeValidator(len(values), skipna=skipna)
2183+
TimeValidator validator = TimeValidator(values.size, skipna=skipna)
21842184
return validator.validate(values)
21852185

21862186

@@ -2231,14 +2231,14 @@ cpdef bint is_interval_array(ndarray values):
22312231
Is this an ndarray of Interval (or np.nan) with a single dtype?
22322232
"""
22332233
cdef:
2234-
Py_ssize_t i, n = len(values)
2234+
Py_ssize_t i, n = values.size
22352235
str closed = None
22362236
bint numeric = False
22372237
bint dt64 = False
22382238
bint td64 = False
22392239
object val
22402240

2241-
if len(values) == 0:
2241+
if n == 0:
22422242
return False
22432243

22442244
for i in range(n):

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8671,6 +8671,7 @@ def combine(
86718671
2 NaN 3.0 1.0
86728672
"""
86738673
other_idxlen = len(other.index) # save for compare
8674+
other_columns = other.columns
86748675

86758676
this, other = self.align(other)
86768677
new_index = this.index
@@ -8681,8 +8682,8 @@ def combine(
86818682
if self.empty and len(other) == other_idxlen:
86828683
return other.copy()
86838684

8684-
# sorts if possible; otherwise align above ensures that these are set-equal
8685-
new_columns = this.columns.union(other.columns)
8685+
# preserve column order
8686+
new_columns = self.columns.union(other_columns, sort=False)
86868687
do_fill = fill_value is not None
86878688
result = {}
86888689
for col in new_columns:

pandas/core/resample.py

+10
Original file line numberDiff line numberDiff line change
@@ -378,10 +378,20 @@ def transform(self, arg, *args, **kwargs):
378378
----------
379379
arg : function
380380
To apply to each group. Should return a Series with the same index.
381+
*args, **kwargs
382+
Additional arguments and keywords.
381383
382384
Returns
383385
-------
384386
Series
387+
A Series with the transformed values, maintaining the same index as
388+
the original object.
389+
390+
See Also
391+
--------
392+
core.resample.Resampler.apply : Apply a function along each group.
393+
core.resample.Resampler.aggregate : Aggregate using one or more operations
394+
over the specified axis.
385395
386396
Examples
387397
--------

pandas/tests/arrays/string_/test_string.py

+6
Original file line numberDiff line numberDiff line change
@@ -758,3 +758,9 @@ def test_tolist(dtype):
758758
result = arr.tolist()
759759
expected = vals
760760
tm.assert_equal(result, expected)
761+
762+
763+
def test_string_array_view_type_error():
764+
arr = pd.array(["a", "b", "c"], dtype="string")
765+
with pytest.raises(TypeError, match="Cannot change data-type for string array."):
766+
arr.view("i8")

pandas/tests/dtypes/test_inference.py

+25
Original file line numberDiff line numberDiff line change
@@ -1582,6 +1582,31 @@ def test_is_string_array(self):
15821582
)
15831583
assert not lib.is_string_array(np.array([1, 2]))
15841584

1585+
@pytest.mark.parametrize(
1586+
"func",
1587+
[
1588+
"is_bool_array",
1589+
"is_date_array",
1590+
"is_datetime_array",
1591+
"is_datetime64_array",
1592+
"is_float_array",
1593+
"is_integer_array",
1594+
"is_interval_array",
1595+
"is_string_array",
1596+
"is_time_array",
1597+
"is_timedelta_or_timedelta64_array",
1598+
],
1599+
)
1600+
def test_is_dtype_array_empty_obj(self, func):
1601+
# https://github.com/pandas-dev/pandas/pull/60796
1602+
func = getattr(lib, func)
1603+
1604+
arr = np.empty((2, 0), dtype=object)
1605+
assert not func(arr)
1606+
1607+
arr = np.empty((0, 2), dtype=object)
1608+
assert not func(arr)
1609+
15851610
def test_to_object_array_tuples(self):
15861611
r = (5, 6)
15871612
values = [r]

pandas/tests/frame/methods/test_combine_first.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def test_combine_first_with_asymmetric_other(self, val):
380380
df2 = DataFrame({"isBool": [True]})
381381

382382
res = df1.combine_first(df2)
383-
exp = DataFrame({"isBool": [True], "isNum": [val]})
383+
exp = DataFrame({"isNum": [val], "isBool": [True]})
384384

385385
tm.assert_frame_equal(res, exp)
386386

@@ -555,3 +555,13 @@ def test_combine_first_empty_columns():
555555
result = left.combine_first(right)
556556
expected = DataFrame(columns=["a", "b", "c"])
557557
tm.assert_frame_equal(result, expected)
558+
559+
560+
def test_combine_first_preserve_column_order():
561+
# GH#60427
562+
df1 = DataFrame({"B": [1, 2, 3], "A": [4, None, 6]})
563+
df2 = DataFrame({"A": [5]}, index=[1])
564+
565+
result = df1.combine_first(df2)
566+
expected = DataFrame({"B": [1, 2, 3], "A": [4.0, 5.0, 6.0]})
567+
tm.assert_frame_equal(result, expected)

pandas/tests/io/test_feather.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ def test_rw_use_threads(self):
143143
def test_path_pathlib(self):
144144
df = pd.DataFrame(
145145
1.1 * np.arange(120).reshape((30, 4)),
146-
columns=pd.Index(list("ABCD"), dtype=object),
147-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
146+
columns=pd.Index(list("ABCD")),
147+
index=pd.Index([f"i-{i}" for i in range(30)]),
148148
).reset_index()
149149
result = tm.round_trip_pathlib(df.to_feather, read_feather)
150150
tm.assert_frame_equal(df, result)

pandas/tests/io/test_http_headers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def stata_responder(df):
8686
return bio.getvalue()
8787

8888

89-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
9089
@pytest.mark.parametrize(
9190
"responder, read_method",
9291
[
@@ -107,6 +106,7 @@ def stata_responder(df):
107106
marks=[
108107
td.skip_if_no("fastparquet"),
109108
td.skip_if_no("fsspec"),
109+
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string"),
110110
],
111111
),
112112
(pickle_respnder, pd.read_pickle),

0 commit comments

Comments
 (0)