@@ -119,6 +119,13 @@ def df_upsert(src, dest):
119119 dest_sliced .sort_index (axis = 1 , inplace = True )
120120 src_sliced .sort_index (axis = 1 , inplace = True )
121121
122+ # Align src_sliced's row/column labels to dest_sliced. The two
123+ # were built with independent .isin() masks so column order may
124+ # differ; pandas >=1.x refuses to compare DataFrames whose
125+ # labels are not identical.
126+ src_sliced = src_sliced .reindex (index = dest_sliced .index ,
127+ columns = dest_sliced .columns )
128+
122129 # Obtain a mask of the conflicts in the current segment
123130 # as compared with all previously loaded data. That is:
124131 # NaN NaN = False
@@ -189,24 +196,30 @@ def convert_origin(df):
189196 # `for` loop loops through both `x` and `y`.
190197
191198 if offset in cur_worm .columns .get_level_values (0 ):
192- # Consider offset as 0 if not available in a certain frame
193- ox_column = cur_worm .loc [:, (offset )].fillna (0 ).astype ('float64' )
199+ # Consider offset as 0 if not available in a certain frame.
200+ # Coerce to numeric: the parser can leave the offset column
201+ # with object dtype (mixed str/int entries) when offsets
202+ # are present in some segments but not others.
203+ ox_column = cur_worm .loc [:, (offset )].apply (
204+ pd .to_numeric , errors = 'coerce' ).fillna (0 )
194205
195206 # Shift our 'x' values by offset
196- all_x_columns = cur_worm .loc [:, (coord )].fillna (0 ).astype ('float64' )
197- ox_affine_change = (np .array (ox_column ) *
207+ all_x_columns = cur_worm .loc [:, (coord )].apply (
208+ pd .to_numeric , errors = 'coerce' )
209+ ox_affine_change = (np .array (ox_column , dtype = float ) *
198210 np .ones (all_x_columns .shape ))
199211 all_x_columns += ox_affine_change
200212
201213 if centroid in cur_worm .columns .get_level_values (0 ):
202- cx_column = cur_worm .loc [:, (centroid )]
214+ cx_column = cur_worm .loc [:, (centroid )].apply (
215+ pd .to_numeric , errors = 'coerce' )
203216 # Shift the centroid by the offset
204217 cx_column += ox_column
205218
206219 # Now make the centroid our new offset, since the rule
207220 # is that if the offset exists, the centroid is not
208221 # the offset, but we want it to be.
209- cx_affine_change = (np .array (cx_column ) *
222+ cx_affine_change = (np .array (cx_column , dtype = float ) *
210223 np .ones (all_x_columns .shape ))
211224 all_x_columns -= cx_affine_change
212225
@@ -227,7 +240,8 @@ def convert_origin(df):
227240 # This is so DataFrames with and without offsets
228241 # will show as comparing identically.
229242 for offset_key in offset_keys :
230- df .drop (offset_key , axis = 1 , level = 'key' , inplace = True , errors = 'ignore' )
243+ df .drop (offset_key , axis = 1 , level = 'key' , inplace = True ,
244+ errors = 'ignore' )
231245
232246 # Because of a known issue in Pandas
233247 # (https://github.com/pydata/pandas/issues/2770), the dropped columns
@@ -405,7 +419,7 @@ def _obtain_time_series_data_frame(time_series_data):
405419 cur_df = pd .DataFrame (cur_data , columns = cur_columns )
406420
407421 cur_df .index = cur_timeframes
408- cur_df .index .name = 't'
422+ cur_df .index .names = [ 't' ]
409423
410424 # We want the index (time) to be in order.
411425 cur_df .sort_index (axis = 0 , inplace = True )
@@ -469,7 +483,7 @@ def _obtain_time_series_data_frame(time_series_data):
469483 with warnings .catch_warnings ():
470484 warnings .filterwarnings (action = "ignore" , category = FutureWarning )
471485 df_odict [worm_id ] = \
472- df_odict [worm_id ].convert_dtypes ( convert_floating = True )
486+ df_odict [worm_id ].infer_objects ( )
473487
474488 # If 'head' or 'ventral' is NaN, we must specify '?' since
475489 # otherwise, when saving this object, to specify "no value" we would
@@ -481,21 +495,27 @@ def _obtain_time_series_data_frame(time_series_data):
481495
482496 # We must replace NaN with None, otherwise the JSON encoder will
483497 # save 'NaN' as the string and this will get rejected by our schema
484- # on any subsequent loads
485- # Note we can't use .fillna(None) due to this issue:
486- # https://github.com/pydata/pandas/issues/1972
498+ # on any subsequent loads.
499+ # Pandas 3.0 infers 'str' dtype for these columns, and assigning
500+ # NaN on a str-dtype column coerces to the string 'nan'. Force
501+ # object dtype and map both real NaN and stringified 'nan' back
502+ # to None so downstream JSON serialization writes null.
487503 df_keys = set (df_odict [worm_id ].columns .get_level_values ('key' ))
488504 for k in ['head' , 'ventral' ]:
489505 if k in df_keys :
490- cur_slice = df_odict [worm_id ].loc [:, idx [:, k , :]]
491- df_odict [worm_id ].loc [:, idx [:, k , :]] = \
492- cur_slice .fillna (value = np .nan )
493-
494- # Make sure aspect_size is a float, since only floats are nullable:
506+ df = df_odict [worm_id ]
507+ for col in [c for c in df .columns if c [1 ] == k ]:
508+ s = df [col ].astype (object )
509+ df [col ] = s .where (s .notna () & (s != 'nan' ), None )
510+
511+ # Make sure aspect_size is a float, since only floats are nullable.
512+ # Replace the column whole rather than assigning via .loc[]; pandas
513+ # 2.x preserves the parent column's existing (object/str) dtype on
514+ # .loc[] assignment and raises TypeError on non-string values.
495515 if 'aspect_size' in df_keys :
496- df_odict [worm_id ]. loc [:, idx [:, 'aspect_size' , :]] = \
497- df_odict [ worm_id ]. loc [:, idx [:, 'aspect_size' , :]] \
498- .astype (float )
516+ df = df_odict [worm_id ]
517+ for col in [ c for c in df . columns if c [ 1 ] == 'aspect_size' ]:
518+ df [ col ] = df [ col ] .astype (float )
499519
500520 return sort_odict (df_odict )
501521
0 commit comments