Skip to content

Commit 4511251

Browse files
authored
TST(string dtype): Resolve xfails in pytables (#60795)
1 parent e557039 commit 4511251

13 files changed

+142
-149
lines changed

pandas/io/pytables.py

+3
Original file line numberDiff line numberDiff line change
@@ -5118,6 +5118,9 @@ def _maybe_convert_for_string_atom(
51185118
errors,
51195119
columns: list[str],
51205120
):
5121+
if isinstance(bvalues.dtype, StringDtype):
5122+
# "ndarray[Any, Any]" has no attribute "to_numpy"
5123+
bvalues = bvalues.to_numpy() # type: ignore[union-attr]
51215124
if bvalues.dtype != object:
51225125
return bvalues
51235126

pandas/tests/io/pytables/test_append.py

+30-26
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@
2525
ensure_clean_store,
2626
)
2727

28-
pytestmark = [
29-
pytest.mark.single_cpu,
30-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
31-
]
28+
pytestmark = [pytest.mark.single_cpu]
3229

3330
tables = pytest.importorskip("tables")
3431

@@ -40,7 +37,7 @@ def test_append(setup_path):
4037
# tables.NaturalNameWarning):
4138
df = DataFrame(
4239
np.random.default_rng(2).standard_normal((20, 4)),
43-
columns=Index(list("ABCD"), dtype=object),
40+
columns=Index(list("ABCD")),
4441
index=date_range("2000-01-01", periods=20, freq="B"),
4542
)
4643
_maybe_remove(store, "df1")
@@ -203,7 +200,7 @@ def test_append_some_nans(setup_path):
203200
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)
204201

205202

206-
def test_append_all_nans(setup_path):
203+
def test_append_all_nans(setup_path, using_infer_string):
207204
with ensure_clean_store(setup_path) as store:
208205
df = DataFrame(
209206
{
@@ -255,7 +252,13 @@ def test_append_all_nans(setup_path):
255252
_maybe_remove(store, "df")
256253
store.append("df", df[:10], dropna=True)
257254
store.append("df", df[10:], dropna=True)
258-
tm.assert_frame_equal(store["df"], df, check_index_type=True)
255+
result = store["df"]
256+
expected = df
257+
if using_infer_string:
258+
# TODO: Test is incorrect when not using_infer_string.
259+
# Should take the last 4 rows uncondiationally.
260+
expected = expected[-4:]
261+
tm.assert_frame_equal(result, expected, check_index_type=True)
259262

260263
_maybe_remove(store, "df2")
261264
store.append("df2", df[:10], dropna=False)
@@ -294,7 +297,7 @@ def test_append_frame_column_oriented(setup_path, request):
294297
# column oriented
295298
df = DataFrame(
296299
np.random.default_rng(2).standard_normal((10, 4)),
297-
columns=Index(list("ABCD"), dtype=object),
300+
columns=Index(list("ABCD")),
298301
index=date_range("2000-01-01", periods=10, freq="B"),
299302
)
300303
df.index = df.index._with_freq(None) # freq doesn't round-trip
@@ -426,7 +429,7 @@ def check_col(key, name, size):
426429
{
427430
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
428431
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
429-
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype=object),
432+
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"]),
430433
"D": date_range("20130101", periods=5),
431434
}
432435
).set_index("C")
@@ -453,7 +456,7 @@ def check_col(key, name, size):
453456
_maybe_remove(store, "df")
454457
df = DataFrame(
455458
np.random.default_rng(2).standard_normal((10, 4)),
456-
columns=Index(list("ABCD"), dtype=object),
459+
columns=Index(list("ABCD")),
457460
index=date_range("2000-01-01", periods=10, freq="B"),
458461
)
459462
df["string"] = "foo"
@@ -513,11 +516,12 @@ def test_append_with_empty_string(setup_path):
513516
tm.assert_frame_equal(store.select("df"), df)
514517

515518

519+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
516520
def test_append_with_data_columns(setup_path):
517521
with ensure_clean_store(setup_path) as store:
518522
df = DataFrame(
519523
np.random.default_rng(2).standard_normal((10, 4)),
520-
columns=Index(list("ABCD"), dtype=object),
524+
columns=Index(list("ABCD")),
521525
index=date_range("2000-01-01", periods=10, freq="B"),
522526
)
523527
df.iloc[0, df.columns.get_loc("B")] = 1.0
@@ -693,8 +697,8 @@ def test_append_misc(setup_path):
693697
with ensure_clean_store(setup_path) as store:
694698
df = DataFrame(
695699
1.1 * np.arange(120).reshape((30, 4)),
696-
columns=Index(list("ABCD"), dtype=object),
697-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
700+
columns=Index(list("ABCD")),
701+
index=Index([f"i-{i}" for i in range(30)]),
698702
)
699703
store.append("df", df, chunksize=1)
700704
result = store.select("df")
@@ -710,8 +714,8 @@ def test_append_misc_chunksize(setup_path, chunksize):
710714
# more chunksize in append tests
711715
df = DataFrame(
712716
1.1 * np.arange(120).reshape((30, 4)),
713-
columns=Index(list("ABCD"), dtype=object),
714-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
717+
columns=Index(list("ABCD")),
718+
index=Index([f"i-{i}" for i in range(30)]),
715719
)
716720
df["string"] = "foo"
717721
df["float322"] = 1.0
@@ -747,15 +751,15 @@ def test_append_misc_empty_frame(setup_path):
747751
tm.assert_frame_equal(store.select("df2"), df)
748752

749753

750-
def test_append_raise(setup_path):
754+
def test_append_raise(setup_path, using_infer_string):
751755
with ensure_clean_store(setup_path) as store:
752756
# test append with invalid input to get good error messages
753757

754758
# list in column
755759
df = DataFrame(
756760
1.1 * np.arange(120).reshape((30, 4)),
757-
columns=Index(list("ABCD"), dtype=object),
758-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
761+
columns=Index(list("ABCD")),
762+
index=Index([f"i-{i}" for i in range(30)]),
759763
)
760764
df["invalid"] = [["a"]] * len(df)
761765
assert df.dtypes["invalid"] == np.object_
@@ -775,8 +779,8 @@ def test_append_raise(setup_path):
775779
# datetime with embedded nans as object
776780
df = DataFrame(
777781
1.1 * np.arange(120).reshape((30, 4)),
778-
columns=Index(list("ABCD"), dtype=object),
779-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
782+
columns=Index(list("ABCD")),
783+
index=Index([f"i-{i}" for i in range(30)]),
780784
)
781785
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
782786
s = s.astype(object)
@@ -803,8 +807,8 @@ def test_append_raise(setup_path):
803807
# appending an incompatible table
804808
df = DataFrame(
805809
1.1 * np.arange(120).reshape((30, 4)),
806-
columns=Index(list("ABCD"), dtype=object),
807-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
810+
columns=Index(list("ABCD")),
811+
index=Index([f"i-{i}" for i in range(30)]),
808812
)
809813
store.append("df", df)
810814

@@ -881,7 +885,7 @@ def test_append_with_timedelta(setup_path):
881885
def test_append_to_multiple(setup_path):
882886
df1 = DataFrame(
883887
np.random.default_rng(2).standard_normal((10, 4)),
884-
columns=Index(list("ABCD"), dtype=object),
888+
columns=Index(list("ABCD")),
885889
index=date_range("2000-01-01", periods=10, freq="B"),
886890
)
887891
df2 = df1.copy().rename(columns="{}_2".format)
@@ -918,12 +922,12 @@ def test_append_to_multiple(setup_path):
918922
def test_append_to_multiple_dropna(setup_path):
919923
df1 = DataFrame(
920924
np.random.default_rng(2).standard_normal((10, 4)),
921-
columns=Index(list("ABCD"), dtype=object),
925+
columns=Index(list("ABCD")),
922926
index=date_range("2000-01-01", periods=10, freq="B"),
923927
)
924928
df2 = DataFrame(
925929
np.random.default_rng(2).standard_normal((10, 4)),
926-
columns=Index(list("ABCD"), dtype=object),
930+
columns=Index(list("ABCD")),
927931
index=date_range("2000-01-01", periods=10, freq="B"),
928932
).rename(columns="{}_2".format)
929933
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
@@ -943,7 +947,7 @@ def test_append_to_multiple_dropna(setup_path):
943947
def test_append_to_multiple_dropna_false(setup_path):
944948
df1 = DataFrame(
945949
np.random.default_rng(2).standard_normal((10, 4)),
946-
columns=Index(list("ABCD"), dtype=object),
950+
columns=Index(list("ABCD")),
947951
index=date_range("2000-01-01", periods=10, freq="B"),
948952
)
949953
df2 = df1.copy().rename(columns="{}_2".format)

pandas/tests/io/pytables/test_categorical.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@
1616
ensure_clean_store,
1717
)
1818

19-
pytestmark = [
20-
pytest.mark.single_cpu,
21-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
22-
]
19+
pytestmark = [pytest.mark.single_cpu]
2320

2421

2522
def test_categorical(setup_path):
@@ -143,6 +140,7 @@ def test_categorical(setup_path):
143140
store.select("df3/meta/s/meta")
144141

145142

143+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
146144
def test_categorical_conversion(tmp_path, setup_path):
147145
# GH13322
148146
# Check that read_hdf with categorical columns doesn't return rows if

pandas/tests/io/pytables/test_complex.py

-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -13,10 +11,6 @@
1311

1412
from pandas.io.pytables import read_hdf
1513

16-
pytestmark = pytest.mark.xfail(
17-
using_string_dtype(), reason="TODO(infer_string)", strict=False
18-
)
19-
2014

2115
def test_complex_fixed(tmp_path, setup_path):
2216
df = DataFrame(

pandas/tests/io/pytables/test_errors.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas import (
119
CategoricalIndex,
1210
DataFrame,
@@ -24,10 +22,7 @@
2422
_maybe_adjust_name,
2523
)
2624

27-
pytestmark = [
28-
pytest.mark.single_cpu,
29-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
30-
]
25+
pytestmark = [pytest.mark.single_cpu]
3126

3227

3328
def test_pass_spec_to_storer(setup_path):
@@ -93,9 +88,14 @@ def test_unimplemented_dtypes_table_columns(setup_path):
9388

9489
with ensure_clean_store(setup_path) as store:
9590
# this fails because we have a date in the object block......
96-
msg = re.escape(
97-
"""Cannot serialize the column [datetime1]
98-
because its data contents are not [string] but [date] object dtype"""
91+
msg = "|".join(
92+
[
93+
re.escape(
94+
"Cannot serialize the column [datetime1]\nbecause its data "
95+
"contents are not [string] but [date] object dtype"
96+
),
97+
re.escape("[date] is not implemented as a table column"),
98+
]
9999
)
100100
with pytest.raises(TypeError, match=msg):
101101
store.append("df_unimplemented", df)

pandas/tests/io/pytables/test_file_handling.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.compat import (
97
PY311,
108
is_ci_environment,
@@ -35,9 +33,7 @@
3533
from pandas.io import pytables
3634
from pandas.io.pytables import Term
3735

38-
pytestmark = [
39-
pytest.mark.single_cpu,
40-
]
36+
pytestmark = [pytest.mark.single_cpu]
4137

4238

4339
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
@@ -329,7 +325,6 @@ def test_complibs(tmp_path, lvl, lib, request):
329325
assert node.filters.complib == lib
330326

331327

332-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
333328
@pytest.mark.skipif(
334329
not is_platform_little_endian(), reason="reason platform is not little endian"
335330
)
@@ -347,7 +342,6 @@ def test_encoding(setup_path):
347342
tm.assert_frame_equal(result, expected)
348343

349344

350-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
351345
@pytest.mark.parametrize(
352346
"val",
353347
[
@@ -362,7 +356,7 @@ def test_encoding(setup_path):
362356
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
363357
],
364358
)
365-
@pytest.mark.parametrize("dtype", ["category", object])
359+
@pytest.mark.parametrize("dtype", ["category", None])
366360
def test_latin_encoding(tmp_path, setup_path, dtype, val):
367361
enc = "latin-1"
368362
nan_rep = ""

pandas/tests/io/pytables/test_keys.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
HDFStore,
@@ -15,10 +13,7 @@
1513
tables,
1614
)
1715

18-
pytestmark = [
19-
pytest.mark.single_cpu,
20-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
21-
]
16+
pytestmark = [pytest.mark.single_cpu]
2217

2318

2419
def test_keys(setup_path):

pandas/tests/io/pytables/test_put.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222
)
2323
from pandas.util import _test_decorators as td
2424

25-
pytestmark = [
26-
pytest.mark.single_cpu,
27-
]
25+
pytestmark = [pytest.mark.single_cpu]
2826

2927

3028
def test_format_type(tmp_path, setup_path):

pandas/tests/io/pytables/test_read.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@
2626

2727
from pandas.io.pytables import TableIterator
2828

29-
pytestmark = [
30-
pytest.mark.single_cpu,
31-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
32-
]
29+
pytestmark = [pytest.mark.single_cpu]
3330

3431

3532
def test_read_missing_key_close_store(tmp_path, setup_path):
@@ -75,10 +72,11 @@ def test_read_missing_key_opened_store(tmp_path, setup_path):
7572
read_hdf(store, "k1")
7673

7774

75+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7876
def test_read_column(setup_path):
7977
df = DataFrame(
8078
np.random.default_rng(2).standard_normal((10, 4)),
81-
columns=Index(list("ABCD"), dtype=object),
79+
columns=Index(list("ABCD")),
8280
index=date_range("2000-01-01", periods=10, freq="B"),
8381
)
8482

@@ -175,7 +173,7 @@ def test_pytables_native2_read(datapath):
175173
assert isinstance(d1, DataFrame)
176174

177175

178-
def test_read_hdf_open_store(tmp_path, setup_path):
176+
def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string):
179177
# GH10330
180178
# No check for non-string path_or-buf, and no test of open store
181179
df = DataFrame(
@@ -187,6 +185,12 @@ def test_read_hdf_open_store(tmp_path, setup_path):
187185
df = df.set_index(keys="E", append=True)
188186

189187
path = tmp_path / setup_path
188+
if using_infer_string:
189+
# TODO(infer_string) make this work for string dtype
190+
msg = "Saving a MultiIndex with an extension dtype is not supported."
191+
with pytest.raises(NotImplementedError, match=msg):
192+
df.to_hdf(path, key="df", mode="w")
193+
return
190194
df.to_hdf(path, key="df", mode="w")
191195
direct = read_hdf(path, "df")
192196
with HDFStore(path, mode="r") as store:

0 commit comments

Comments
 (0)