Skip to content

Commit b8624cb

Browse files
Backport PR #60943: BUG(string dtype): Resolve pytables xfail when reading with condition (#60967)
* ENH: Improved error message and raise new error for small-string NaN edge case in HDFStore.append (#60829) * Add clearer error messages for datatype mismatch in HDFStore.append. Raise ValueError when nan_rep too large for pytable column. Add and modify applicable test code. * Fix missed tests and correct mistake in error message. * Remove excess comments. Reverse error type change to avoid api changes. Move nan_rep tests into separate function. (cherry picked from commit 57340ec) * TST(string dtype): Resolve xfails in pytables (#60795) (cherry picked from commit 4511251) * BUG(string dtype): Resolve pytables xfail when reading with condition (#60943) (cherry picked from commit 0ec5f26) --------- Co-authored-by: Jake Thomas Trevallion <[email protected]>
1 parent 3143f44 commit b8624cb

File tree

5 files changed

+16
-15
lines changed

5 files changed

+16
-15
lines changed

pandas/io/pytables.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -4093,6 +4093,8 @@ def _create_axes(
40934093
ordered = data_converted.ordered
40944094
meta = "category"
40954095
metadata = np.asarray(data_converted.categories).ravel()
4096+
elif isinstance(blk.dtype, StringDtype):
4097+
meta = str(blk.dtype)
40964098

40974099
data, dtype_name = _get_data_and_dtype_name(data_converted)
40984100

@@ -4360,7 +4362,9 @@ def read_column(
43604362
encoding=self.encoding,
43614363
errors=self.errors,
43624364
)
4363-
return Series(_set_tz(col_values[1], a.tz), name=column, copy=False)
4365+
cvs = _set_tz(col_values[1], a.tz)
4366+
dtype = getattr(self.table.attrs, f"{column}_meta", None)
4367+
return Series(cvs, name=column, copy=False, dtype=dtype)
43644368

43654369
raise KeyError(f"column [{column}] not found in the table")
43664370

@@ -4708,8 +4712,18 @@ def read(
47084712
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
47094713
if not (using_string_dtype() and values.dtype.kind == "O"):
47104714
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
4715+
4716+
# If str / string dtype is stored in meta, use that.
4717+
converted = False
4718+
for column in cols_:
4719+
dtype = getattr(self.table.attrs, f"{column}_meta", None)
4720+
if dtype in ["str", "string"]:
4721+
df[column] = df[column].astype(dtype)
4722+
converted = True
4723+
# Otherwise try inference.
47114724
if (
4712-
using_string_dtype()
4725+
not converted
4726+
and using_string_dtype()
47134727
and isinstance(values, np.ndarray)
47144728
and is_string_array(
47154729
values,

pandas/tests/io/pytables/test_append.py

-3
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas._libs.tslibs import Timestamp
119
import pandas.util._test_decorators as td
1210

@@ -507,7 +505,6 @@ def test_append_with_empty_string(setup_path):
507505
tm.assert_frame_equal(store.select("df"), df)
508506

509507

510-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
511508
def test_append_with_data_columns(setup_path):
512509
with ensure_clean_store(setup_path) as store:
513510
df = DataFrame(

pandas/tests/io/pytables/test_categorical.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
Categorical,
86
DataFrame,
@@ -140,7 +138,6 @@ def test_categorical(setup_path):
140138
store.select("df3/meta/s/meta")
141139

142140

143-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
144141
def test_categorical_conversion(tmp_path, setup_path):
145142
# GH13322
146143
# Check that read_hdf with categorical columns doesn't return rows if

pandas/tests/io/pytables/test_read.py

-3
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas._libs.tslibs import Timestamp
119
from pandas.compat import is_platform_windows
1210

@@ -74,7 +72,6 @@ def test_read_missing_key_opened_store(tmp_path, setup_path):
7472
read_hdf(store, "k1")
7573

7674

77-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7875
def test_read_column(setup_path):
7976
df = DataFrame(
8077
np.random.default_rng(2).standard_normal((10, 4)),

pandas/tests/io/pytables/test_select.py

-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs.tslibs import Timestamp
75

86
import pandas as pd
@@ -651,7 +649,6 @@ def test_frame_select(setup_path):
651649
# store.select('frame', [crit1, crit2])
652650

653651

654-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
655652
def test_frame_select_complex(setup_path):
656653
# select via complex criteria
657654

@@ -965,7 +962,6 @@ def test_query_long_float_literal(setup_path):
965962
tm.assert_frame_equal(expected, result)
966963

967964

968-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
969965
def test_query_compare_column_type(setup_path):
970966
# GH 15492
971967
df = DataFrame(

0 commit comments

Comments
 (0)