Skip to content

Commit 8973c55

Browse files
authored
BUG: is_*_array returns true on empty object dtype (#60796)
1 parent c0c778b commit 8973c55

File tree

3 files changed

+45
-20
lines changed

3 files changed

+45
-20
lines changed

pandas/_libs/lib.pyx

+18-18
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ cdef class BoolValidator(Validator):
18821882

18831883
cpdef bint is_bool_array(ndarray values, bint skipna=False):
18841884
cdef:
1885-
BoolValidator validator = BoolValidator(len(values),
1885+
BoolValidator validator = BoolValidator(values.size,
18861886
values.dtype,
18871887
skipna=skipna)
18881888
return validator.validate(values)
@@ -1900,7 +1900,7 @@ cdef class IntegerValidator(Validator):
19001900
# Note: only python-exposed for tests
19011901
cpdef bint is_integer_array(ndarray values, bint skipna=True):
19021902
cdef:
1903-
IntegerValidator validator = IntegerValidator(len(values),
1903+
IntegerValidator validator = IntegerValidator(values.size,
19041904
values.dtype,
19051905
skipna=skipna)
19061906
return validator.validate(values)
@@ -1915,7 +1915,7 @@ cdef class IntegerNaValidator(Validator):
19151915

19161916
cdef bint is_integer_na_array(ndarray values, bint skipna=True):
19171917
cdef:
1918-
IntegerNaValidator validator = IntegerNaValidator(len(values),
1918+
IntegerNaValidator validator = IntegerNaValidator(values.size,
19191919
values.dtype, skipna=skipna)
19201920
return validator.validate(values)
19211921

@@ -1931,7 +1931,7 @@ cdef class IntegerFloatValidator(Validator):
19311931

19321932
cdef bint is_integer_float_array(ndarray values, bint skipna=True):
19331933
cdef:
1934-
IntegerFloatValidator validator = IntegerFloatValidator(len(values),
1934+
IntegerFloatValidator validator = IntegerFloatValidator(values.size,
19351935
values.dtype,
19361936
skipna=skipna)
19371937
return validator.validate(values)
@@ -1949,7 +1949,7 @@ cdef class FloatValidator(Validator):
19491949
# Note: only python-exposed for tests
19501950
cpdef bint is_float_array(ndarray values):
19511951
cdef:
1952-
FloatValidator validator = FloatValidator(len(values), values.dtype)
1952+
FloatValidator validator = FloatValidator(values.size, values.dtype)
19531953
return validator.validate(values)
19541954

19551955

@@ -1967,7 +1967,7 @@ cdef class ComplexValidator(Validator):
19671967

19681968
cdef bint is_complex_array(ndarray values):
19691969
cdef:
1970-
ComplexValidator validator = ComplexValidator(len(values), values.dtype)
1970+
ComplexValidator validator = ComplexValidator(values.size, values.dtype)
19711971
return validator.validate(values)
19721972

19731973

@@ -1980,7 +1980,7 @@ cdef class DecimalValidator(Validator):
19801980
cdef bint is_decimal_array(ndarray values, bint skipna=False):
19811981
cdef:
19821982
DecimalValidator validator = DecimalValidator(
1983-
len(values), values.dtype, skipna=skipna
1983+
values.size, values.dtype, skipna=skipna
19841984
)
19851985
return validator.validate(values)
19861986

@@ -1996,7 +1996,7 @@ cdef class StringValidator(Validator):
19961996

19971997
cpdef bint is_string_array(ndarray values, bint skipna=False):
19981998
cdef:
1999-
StringValidator validator = StringValidator(len(values),
1999+
StringValidator validator = StringValidator(values.size,
20002000
values.dtype,
20012001
skipna=skipna)
20022002
return validator.validate(values)
@@ -2013,7 +2013,7 @@ cdef class BytesValidator(Validator):
20132013

20142014
cdef bint is_bytes_array(ndarray values, bint skipna=False):
20152015
cdef:
2016-
BytesValidator validator = BytesValidator(len(values), values.dtype,
2016+
BytesValidator validator = BytesValidator(values.size, values.dtype,
20172017
skipna=skipna)
20182018
return validator.validate(values)
20192019

@@ -2064,7 +2064,7 @@ cdef class DatetimeValidator(TemporalValidator):
20642064

20652065
cpdef bint is_datetime_array(ndarray values, bint skipna=True):
20662066
cdef:
2067-
DatetimeValidator validator = DatetimeValidator(len(values),
2067+
DatetimeValidator validator = DatetimeValidator(values.size,
20682068
skipna=skipna)
20692069
return validator.validate(values)
20702070

@@ -2078,7 +2078,7 @@ cdef class Datetime64Validator(DatetimeValidator):
20782078
# Note: only python-exposed for tests
20792079
cpdef bint is_datetime64_array(ndarray values, bint skipna=True):
20802080
cdef:
2081-
Datetime64Validator validator = Datetime64Validator(len(values),
2081+
Datetime64Validator validator = Datetime64Validator(values.size,
20822082
skipna=skipna)
20832083
return validator.validate(values)
20842084

@@ -2093,7 +2093,7 @@ cdef class AnyDatetimeValidator(DatetimeValidator):
20932093

20942094
cdef bint is_datetime_or_datetime64_array(ndarray values, bint skipna=True):
20952095
cdef:
2096-
AnyDatetimeValidator validator = AnyDatetimeValidator(len(values),
2096+
AnyDatetimeValidator validator = AnyDatetimeValidator(values.size,
20972097
skipna=skipna)
20982098
return validator.validate(values)
20992099

@@ -2105,7 +2105,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool:
21052105
Doesn't check values are datetime-like types.
21062106
"""
21072107
cdef:
2108-
Py_ssize_t i = 0, j, n = len(values)
2108+
Py_ssize_t i = 0, j, n = values.size
21092109
object base_val, base_tz, val, tz
21102110

21112111
if n == 0:
@@ -2153,7 +2153,7 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True):
21532153
Infer with timedeltas and/or nat/none.
21542154
"""
21552155
cdef:
2156-
AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values),
2156+
AnyTimedeltaValidator validator = AnyTimedeltaValidator(values.size,
21572157
skipna=skipna)
21582158
return validator.validate(values)
21592159

@@ -2167,7 +2167,7 @@ cdef class DateValidator(Validator):
21672167
# Note: only python-exposed for tests
21682168
cpdef bint is_date_array(ndarray values, bint skipna=False):
21692169
cdef:
2170-
DateValidator validator = DateValidator(len(values), skipna=skipna)
2170+
DateValidator validator = DateValidator(values.size, skipna=skipna)
21712171
return validator.validate(values)
21722172

21732173

@@ -2180,7 +2180,7 @@ cdef class TimeValidator(Validator):
21802180
# Note: only python-exposed for tests
21812181
cpdef bint is_time_array(ndarray values, bint skipna=False):
21822182
cdef:
2183-
TimeValidator validator = TimeValidator(len(values), skipna=skipna)
2183+
TimeValidator validator = TimeValidator(values.size, skipna=skipna)
21842184
return validator.validate(values)
21852185

21862186

@@ -2231,14 +2231,14 @@ cpdef bint is_interval_array(ndarray values):
22312231
Is this an ndarray of Interval (or np.nan) with a single dtype?
22322232
"""
22332233
cdef:
2234-
Py_ssize_t i, n = len(values)
2234+
Py_ssize_t i, n = values.size
22352235
str closed = None
22362236
bint numeric = False
22372237
bint dt64 = False
22382238
bint td64 = False
22392239
object val
22402240

2241-
if len(values) == 0:
2241+
if n == 0:
22422242
return False
22432243

22442244
for i in range(n):

pandas/tests/dtypes/test_inference.py

+25
Original file line numberDiff line numberDiff line change
@@ -1582,6 +1582,31 @@ def test_is_string_array(self):
15821582
)
15831583
assert not lib.is_string_array(np.array([1, 2]))
15841584

1585+
@pytest.mark.parametrize(
1586+
"func",
1587+
[
1588+
"is_bool_array",
1589+
"is_date_array",
1590+
"is_datetime_array",
1591+
"is_datetime64_array",
1592+
"is_float_array",
1593+
"is_integer_array",
1594+
"is_interval_array",
1595+
"is_string_array",
1596+
"is_time_array",
1597+
"is_timedelta_or_timedelta64_array",
1598+
],
1599+
)
1600+
def test_is_dtype_array_empty_obj(self, func):
1601+
# https://github.com/pandas-dev/pandas/pull/60796
1602+
func = getattr(lib, func)
1603+
1604+
arr = np.empty((2, 0), dtype=object)
1605+
assert not func(arr)
1606+
1607+
arr = np.empty((0, 2), dtype=object)
1608+
assert not func(arr)
1609+
15851610
def test_to_object_array_tuples(self):
15861611
r = (5, 6)
15871612
values = [r]

pandas/tests/io/test_feather.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ def test_rw_use_threads(self):
143143
def test_path_pathlib(self):
144144
df = pd.DataFrame(
145145
1.1 * np.arange(120).reshape((30, 4)),
146-
columns=pd.Index(list("ABCD"), dtype=object),
147-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
146+
columns=pd.Index(list("ABCD")),
147+
index=pd.Index([f"i-{i}" for i in range(30)]),
148148
).reset_index()
149149
result = tm.round_trip_pathlib(df.to_feather, read_feather)
150150
tm.assert_frame_equal(df, result)

0 commit comments

Comments
 (0)