Skip to content

Commit 6dd0216

Browse files
committed
Add clearer error messages for datatype mismatch in HDFStore.append. Raise ValueError when nan_rep too large for pytable column. Add and modify applicable test code.
1 parent e84a7f7 commit 6dd0216

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

pandas/io/pytables.py

+11
Original file line numberDiff line numberDiff line change
@@ -3524,6 +3524,14 @@ def validate(self, other) -> None:
35243524
# Value of type "Optional[Any]" is not indexable [index]
35253525
oax = ov[i] # type: ignore[index]
35263526
if sax != oax:
3527+
## Raise clearer error if mismatching type on values_axes
3528+
if c == "values_axes" and sax.kind != oax.kind:
3529+
raise TypeError(
3530+
f"Cannot serialize the column [{oax.values[0]}] "
3531+
f"because its data contents are not [{oax.kind}] "
3532+
f"but [{sax.kind}] object dtype"
3533+
)
3534+
# Fallback if other source of difference
35273535
raise ValueError(
35283536
f"invalid combination of [{c}] on appending data "
35293537
f"[{sax}] vs current table [{oax}]"
@@ -5136,6 +5144,9 @@ def _maybe_convert_for_string_atom(
51365144
data = bvalues.copy()
51375145
data[mask] = nan_rep
51385146

5147+
if existing_col and mask.any() and len(nan_rep) > existing_col.itemsize:
5148+
raise ValueError("NaN representation is too large for existing column size")
5149+
51395150
# see if we have a valid string type
51405151
inferred_type = lib.infer_dtype(data, skipna=False)
51415152
if inferred_type != "string":

pandas/tests/io/pytables/test_append.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,14 @@ def check_col(key, name, size):
421421
with pytest.raises(ValueError, match=msg):
422422
store.append("df_new", df_new)
423423

424+
# bigger NaN representation on next append
425+
df_new = DataFrame([[124, "a"], [346, "b"]])
426+
store.append("df_new2", df_new)
427+
df_new = DataFrame([[124, None], [346, "b"]])
428+
msg = "NaN representation is too large for existing column size"
429+
with pytest.raises(ValueError, match=msg):
430+
store.append("df_new2", df_new)
431+
424432
# min_itemsize on Series index (GH 11412)
425433
df = DataFrame(
426434
{
@@ -822,15 +830,8 @@ def test_append_raise(setup_path):
822830
df["foo"] = Timestamp("20130101")
823831
store.append("df", df)
824832
df["foo"] = "bar"
825-
msg = re.escape(
826-
"invalid combination of [values_axes] on appending data "
827-
"[name->values_block_1,cname->values_block_1,"
828-
"dtype->bytes24,kind->string,shape->(1, 30)] "
829-
"vs current table "
830-
"[name->values_block_1,cname->values_block_1,"
831-
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
832-
)
833-
with pytest.raises(ValueError, match=msg):
833+
msg = re.escape("Cannot serialize the column [foo] but [string] object dtype")
834+
with pytest.raises(TypeError, match=msg):
834835
store.append("df", df)
835836

836837

0 commit comments

Comments
 (0)