Skip to content

Commit 251cd97

Browse files
[Python] Clean-up name / field_name handling in pandas compat
1 parent 9907f37 commit 251cd97

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

python/pyarrow/pandas_compat.py

+19-19
Original file line numberDiff line numberDiff line change
@@ -181,19 +181,18 @@ def get_column_metadata(column, name, arrow_type, field_name):
181181
)
182182
)
183183

184-
assert field_name is None or isinstance(field_name, str), \
185-
str(type(field_name))
184+
assert isinstance(field_name, str), str(type(field_name))
186185
return {
187186
'name': name,
188-
'field_name': 'None' if field_name is None else field_name,
187+
'field_name': field_name,
189188
'pandas_type': logical_type,
190189
'numpy_type': string_dtype,
191190
'metadata': extra_metadata,
192191
}
193192

194193

195-
def construct_metadata(columns_to_convert, df, column_names, index_levels,
196-
index_descriptors, preserve_index, types):
194+
def construct_metadata(columns_to_convert, df, column_names, column_field_names,
195+
index_levels, index_descriptors, preserve_index, types):
197196
"""Returns a dictionary containing enough metadata to reconstruct a pandas
198197
DataFrame as an Arrow Table, including index columns.
199198
@@ -219,11 +218,11 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels,
219218
index_types = types[ntypes - num_serialized_index_levels:]
220219

221220
column_metadata = []
222-
for col, sanitized_name, arrow_type in zip(columns_to_convert,
223-
column_names, df_types):
224-
metadata = get_column_metadata(col, name=sanitized_name,
221+
for col, name, field_name, arrow_type in zip(columns_to_convert, column_names,
222+
column_field_names, df_types):
223+
metadata = get_column_metadata(col, name=name,
225224
arrow_type=arrow_type,
226-
field_name=sanitized_name)
225+
field_name=field_name)
227226
column_metadata.append(metadata)
228227

229228
index_column_metadata = []
@@ -368,6 +367,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
368367
return _get_columns_to_convert_given_schema(df, schema, preserve_index)
369368

370369
column_names = []
370+
column_field_names = []
371371

372372
index_levels = (
373373
_get_index_level_values(df.index) if preserve_index is not False
@@ -388,6 +388,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
388388
columns_to_convert.append(col)
389389
convert_fields.append(None)
390390
column_names.append(name)
391+
column_field_names.append(str(name))
391392

392393
index_descriptors = []
393394
index_column_names = []
@@ -403,7 +404,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
403404
index_column_names.append(name)
404405
index_descriptors.append(descr)
405406

406-
all_names = column_names + index_column_names
407+
all_names = column_field_names + index_column_names
407408

408409
# all_names : all of the columns in the resulting table including the data
409410
# columns and serialized index columns
@@ -416,8 +417,8 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
416417
# to be converted to Arrow format
417418
# columns_fields : specified column to use for coercion / casting
418419
# during serialization, if a Schema was provided
419-
return (all_names, column_names, index_column_names, index_descriptors,
420-
index_levels, columns_to_convert, convert_fields)
420+
return (all_names, column_names, column_field_names, index_column_names,
421+
index_descriptors, index_levels, columns_to_convert, convert_fields)
421422

422423

423424
def _get_columns_to_convert_given_schema(df, schema, preserve_index):
@@ -462,8 +463,6 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index):
462463
"specified schema".format(name))
463464
is_index = True
464465

465-
name = _column_name_to_strings(name)
466-
467466
if _pandas_api.is_sparse(col):
468467
raise TypeError(
469468
"Sparse pandas data (column {}) not supported.".format(name))
@@ -480,8 +479,8 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index):
480479

481480
all_names = column_names + index_column_names
482481

483-
return (all_names, column_names, index_column_names, index_descriptors,
484-
index_levels, columns_to_convert, convert_fields)
482+
return (all_names, column_names, column_names, index_column_names,
483+
index_descriptors, index_levels, columns_to_convert, convert_fields)
485484

486485

487486
def _get_index_level(df, name):
@@ -539,6 +538,7 @@ def _resolve_columns_of_interest(df, schema, columns):
539538
def dataframe_to_types(df, preserve_index, columns=None):
540539
(all_names,
541540
column_names,
541+
column_field_names,
542542
_,
543543
index_descriptors,
544544
index_columns,
@@ -563,7 +563,7 @@ def dataframe_to_types(df, preserve_index, columns=None):
563563
types.append(type_)
564564

565565
metadata = construct_metadata(
566-
columns_to_convert, df, column_names, index_columns,
566+
columns_to_convert, df, column_names, column_field_names, index_columns,
567567
index_descriptors, preserve_index, types
568568
)
569569

@@ -574,6 +574,7 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1, columns=None,
574574
safe=True):
575575
(all_names,
576576
column_names,
577+
column_field_names,
577578
index_column_names,
578579
index_descriptors,
579580
index_columns,
@@ -642,12 +643,11 @@ def _can_definitely_zero_copy(arr):
642643
if schema is None:
643644
fields = []
644645
for name, type_ in zip(all_names, types):
645-
name = name if name is not None else 'None'
646646
fields.append(pa.field(name, type_))
647647
schema = pa.schema(fields)
648648

649649
pandas_metadata = construct_metadata(
650-
columns_to_convert, df, column_names, index_columns,
650+
columns_to_convert, df, column_names, column_field_names, index_columns,
651651
index_descriptors, preserve_index, types
652652
)
653653
metadata = deepcopy(schema.metadata) if schema.metadata else dict()

0 commit comments

Comments
 (0)