Skip to content

Commit 136b091

Browse files
keep columns Index as string dtype even if metadata says object
1 parent 84b8234 commit 136b091

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

python/pyarrow/pandas_compat.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1159,8 +1159,10 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
11591159
elif pandas_dtype == "decimal":
11601160
level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level])
11611161
elif (
1162-
level.dtype == "str" and "mixed" in pandas_dtype and numpy_dtype == "object"
1162+
level.dtype == "str" and numpy_dtype == "object"
1163+
and ("mixed" in pandas_dtype or pandas_dtype in ["unicode", "string"])
11631164
):
1165+
# in this case don't convert to object dtype, but keep using the str dtype
11641166
new_levels.append(level)
11651167
continue
11661168
elif level.dtype != dtype:

python/pyarrow/tests/test_pandas.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -4553,7 +4553,7 @@ def test_metadata_compat_range_index_pre_0_12():
45534553
e1 = pd.DataFrame(
45544554
{'a': a_values},
45554555
index=pd.RangeIndex(0, 8, step=2, name='qux'),
4556-
columns=pd.Index(['a'], dtype=object)
4556+
columns=pd.Index(['a'])
45574557
)
45584558
t1 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
45594559
names=['a', 'qux'])
@@ -4584,7 +4584,7 @@ def test_metadata_compat_range_index_pre_0_12():
45844584
e2 = pd.DataFrame(
45854585
{'qux': a_values},
45864586
index=pd.RangeIndex(0, 8, step=2, name='qux'),
4587-
columns=pd.Index(['qux'], dtype=object)
4587+
columns=pd.Index(['qux'])
45884588
)
45894589
t2 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
45904590
names=['qux', gen_name_0])
@@ -4615,7 +4615,7 @@ def test_metadata_compat_range_index_pre_0_12():
46154615
e3 = pd.DataFrame(
46164616
{'a': a_values},
46174617
index=pd.RangeIndex(0, 8, step=2, name=None),
4618-
columns=pd.Index(['a'], dtype=object)
4618+
columns=pd.Index(['a'])
46194619
)
46204620
t3 = pa.Table.from_arrays([a_arrow, rng_index_arrow],
46214621
names=['a', gen_name_0])
@@ -4646,7 +4646,7 @@ def test_metadata_compat_range_index_pre_0_12():
46464646
e4 = pd.DataFrame(
46474647
{'a': a_values},
46484648
index=[pd.RangeIndex(0, 8, step=2, name='qux'), b_values],
4649-
columns=pd.Index(['a'], dtype=object)
4649+
columns=pd.Index(['a'])
46504650
)
46514651
t4 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
46524652
names=['a', 'qux', gen_name_1])
@@ -4682,7 +4682,7 @@ def test_metadata_compat_range_index_pre_0_12():
46824682
e5 = pd.DataFrame(
46834683
{'a': a_values},
46844684
index=[pd.RangeIndex(0, 8, step=2, name=None), b_values],
4685-
columns=pd.Index(['a'], dtype=object)
4685+
columns=pd.Index(['a'])
46864686
)
46874687
t5 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow],
46884688
names=['a', gen_name_0, gen_name_1])

0 commit comments

Comments
 (0)