@@ -181,19 +181,18 @@ def get_column_metadata(column, name, arrow_type, field_name):
181
181
)
182
182
)
183
183
184
- assert field_name is None or isinstance (field_name , str ), \
185
- str (type (field_name ))
184
+ assert isinstance (field_name , str ), str (type (field_name ))
186
185
return {
187
186
'name' : name ,
188
- 'field_name' : 'None' if field_name is None else field_name ,
187
+ 'field_name' : field_name ,
189
188
'pandas_type' : logical_type ,
190
189
'numpy_type' : string_dtype ,
191
190
'metadata' : extra_metadata ,
192
191
}
193
192
194
193
195
- def construct_metadata (columns_to_convert , df , column_names , index_levels ,
196
- index_descriptors , preserve_index , types ):
194
+ def construct_metadata (columns_to_convert , df , column_names , column_field_names ,
195
+ index_levels , index_descriptors , preserve_index , types ):
197
196
"""Returns a dictionary containing enough metadata to reconstruct a pandas
198
197
DataFrame as an Arrow Table, including index columns.
199
198
@@ -219,11 +218,11 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels,
219
218
index_types = types [ntypes - num_serialized_index_levels :]
220
219
221
220
column_metadata = []
222
- for col , sanitized_name , arrow_type in zip (columns_to_convert ,
223
- column_names , df_types ):
224
- metadata = get_column_metadata (col , name = sanitized_name ,
221
+ for col , name , field_name , arrow_type in zip (columns_to_convert , column_names ,
222
+ column_field_names , df_types ):
223
+ metadata = get_column_metadata (col , name = name ,
225
224
arrow_type = arrow_type ,
226
- field_name = sanitized_name )
225
+ field_name = field_name )
227
226
column_metadata .append (metadata )
228
227
229
228
index_column_metadata = []
@@ -368,6 +367,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
368
367
return _get_columns_to_convert_given_schema (df , schema , preserve_index )
369
368
370
369
column_names = []
370
+ column_field_names = []
371
371
372
372
index_levels = (
373
373
_get_index_level_values (df .index ) if preserve_index is not False
@@ -388,6 +388,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
388
388
columns_to_convert .append (col )
389
389
convert_fields .append (None )
390
390
column_names .append (name )
391
+ column_field_names .append (str (name ))
391
392
392
393
index_descriptors = []
393
394
index_column_names = []
@@ -403,7 +404,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
403
404
index_column_names .append (name )
404
405
index_descriptors .append (descr )
405
406
406
- all_names = column_names + index_column_names
407
+ all_names = column_field_names + index_column_names
407
408
408
409
# all_names : all of the columns in the resulting table including the data
409
410
# columns and serialized index columns
@@ -416,8 +417,8 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
416
417
# to be converted to Arrow format
417
418
# columns_fields : specified column to use for coercion / casting
418
419
# during serialization, if a Schema was provided
419
- return (all_names , column_names , index_column_names , index_descriptors ,
420
- index_levels , columns_to_convert , convert_fields )
420
+ return (all_names , column_names , column_field_names , index_column_names ,
421
+ index_descriptors , index_levels , columns_to_convert , convert_fields )
421
422
422
423
423
424
def _get_columns_to_convert_given_schema (df , schema , preserve_index ):
@@ -462,8 +463,6 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index):
462
463
"specified schema" .format (name ))
463
464
is_index = True
464
465
465
- name = _column_name_to_strings (name )
466
-
467
466
if _pandas_api .is_sparse (col ):
468
467
raise TypeError (
469
468
"Sparse pandas data (column {}) not supported." .format (name ))
@@ -480,8 +479,8 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index):
480
479
481
480
all_names = column_names + index_column_names
482
481
483
- return (all_names , column_names , index_column_names , index_descriptors ,
484
- index_levels , columns_to_convert , convert_fields )
482
+ return (all_names , column_names , column_names , index_column_names ,
483
+ index_descriptors , index_levels , columns_to_convert , convert_fields )
485
484
486
485
487
486
def _get_index_level (df , name ):
@@ -539,6 +538,7 @@ def _resolve_columns_of_interest(df, schema, columns):
539
538
def dataframe_to_types (df , preserve_index , columns = None ):
540
539
(all_names ,
541
540
column_names ,
541
+ column_field_names ,
542
542
_ ,
543
543
index_descriptors ,
544
544
index_columns ,
@@ -563,7 +563,7 @@ def dataframe_to_types(df, preserve_index, columns=None):
563
563
types .append (type_ )
564
564
565
565
metadata = construct_metadata (
566
- columns_to_convert , df , column_names , index_columns ,
566
+ columns_to_convert , df , column_names , column_field_names , index_columns ,
567
567
index_descriptors , preserve_index , types
568
568
)
569
569
@@ -574,6 +574,7 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1, columns=None,
574
574
safe = True ):
575
575
(all_names ,
576
576
column_names ,
577
+ column_field_names ,
577
578
index_column_names ,
578
579
index_descriptors ,
579
580
index_columns ,
@@ -642,12 +643,11 @@ def _can_definitely_zero_copy(arr):
642
643
if schema is None :
643
644
fields = []
644
645
for name , type_ in zip (all_names , types ):
645
- name = name if name is not None else 'None'
646
646
fields .append (pa .field (name , type_ ))
647
647
schema = pa .schema (fields )
648
648
649
649
pandas_metadata = construct_metadata (
650
- columns_to_convert , df , column_names , index_columns ,
650
+ columns_to_convert , df , column_names , column_field_names , index_columns ,
651
651
index_descriptors , preserve_index , types
652
652
)
653
653
metadata = deepcopy (schema .metadata ) if schema .metadata else dict ()
0 commit comments