@@ -119,6 +119,13 @@ def df_upsert(src, dest):
119119 dest_sliced .sort_index (inplace = True )
120120 src_sliced .sort_index (inplace = True )
121121
122+ # Align src_sliced's row/column labels to dest_sliced. The two
123+ # were built with independent .isin() masks so column order may
124+ # differ; pandas >=1.x refuses to compare DataFrames whose
125+ # labels are not identical.
126+ src_sliced = src_sliced .reindex (index = dest_sliced .index ,
127+ columns = dest_sliced .columns )
128+
122129 # Obtain a mask of the conflicts in the current segment
123130 # as compared with all previously loaded data. That is:
124131 # NaN NaN = False
@@ -189,24 +196,30 @@ def convert_origin(df):
189196 # `for` loop loops through both `x` and `y`.
190197
191198 if offset in cur_worm .columns .get_level_values (0 ):
192- # Consider offset as 0 if not available in a certain frame
193- ox_column = cur_worm .loc [:, (offset )].fillna (0 )
199+ # Consider offset as 0 if not available in a certain frame.
200+ # Coerce to numeric: the parser can leave the offset column
201+ # with object dtype (mixed str/int entries) when offsets
202+ # are present in some segments but not others.
203+ ox_column = cur_worm .loc [:, (offset )].apply (
204+ pd .to_numeric , errors = 'coerce' ).fillna (0 )
194205
195206 # Shift our 'x' values by offset
196- all_x_columns = cur_worm .loc [:, (coord )]
197- ox_affine_change = (np .array (ox_column ) *
207+ all_x_columns = cur_worm .loc [:, (coord )].apply (
208+ pd .to_numeric , errors = 'coerce' )
209+ ox_affine_change = (np .array (ox_column , dtype = float ) *
198210 np .ones (all_x_columns .shape ))
199211 all_x_columns += ox_affine_change
200212
201213 if centroid in cur_worm .columns .get_level_values (0 ):
202- cx_column = cur_worm .loc [:, (centroid )]
214+ cx_column = cur_worm .loc [:, (centroid )].apply (
215+ pd .to_numeric , errors = 'coerce' )
203216 # Shift the centroid by the offset
204217 cx_column += ox_column
205218
206219 # Now make the centroid our new offset, since the rule
207220 # is that if the offset exists, the centroid is not
208221 # the offset, but we want it to be.
209- cx_affine_change = (np .array (cx_column ) *
222+ cx_affine_change = (np .array (cx_column , dtype = float ) *
210223 np .ones (all_x_columns .shape ))
211224 all_x_columns -= cx_affine_change
212225
@@ -224,7 +237,8 @@ def convert_origin(df):
224237 # This is so DataFrames with and without offsets
225238 # will show as comparing identically.
226239 for offset_key in offset_keys :
227- df .drop (offset_key , axis = 1 , level = 'key' , inplace = True )
240+ df .drop (offset_key , axis = 1 , level = 'key' , inplace = True ,
241+ errors = 'ignore' )
228242
229243 # Because of a known issue in Pandas
230244 # (https://github.com/pydata/pandas/issues/2770), the dropped columns
@@ -389,7 +403,7 @@ def _obtain_time_series_data_frame(time_series_data):
389403 for i in range (len (cur_timeframes )):
390404 data_segment [k ][i ] = (
391405 data_segment [k ][i ] +
392- [np .NaN ] * (max_aspect_size - len (data_segment [k ][i ])))
406+ [np .nan ] * (max_aspect_size - len (data_segment [k ][i ])))
393407
394408 num_timeframes = len (cur_timeframes )
395409
@@ -402,7 +416,7 @@ def _obtain_time_series_data_frame(time_series_data):
402416 cur_df = pd .DataFrame (cur_data , columns = cur_columns )
403417
404418 cur_df .index = cur_timeframes
405- cur_df .index .names = 't'
419+ cur_df .index .names = [ 't' ]
406420
407421 # We want the index (time) to be in order.
408422 cur_df .sort_index (axis = 0 , inplace = True )
@@ -466,7 +480,7 @@ def _obtain_time_series_data_frame(time_series_data):
466480 with warnings .catch_warnings ():
467481 warnings .filterwarnings (action = "ignore" , category = FutureWarning )
468482 df_odict [worm_id ] = \
469- df_odict [worm_id ].convert_objects ( convert_numeric = True )
483+ df_odict [worm_id ].infer_objects ( )
470484
471485 # If 'head' or 'ventral' is NaN, we must specify '?' since
472486 # otherwise, when saving this object, to specify "no value" we would
@@ -478,21 +492,27 @@ def _obtain_time_series_data_frame(time_series_data):
478492
479493 # We must replace NaN with None, otherwise the JSON encoder will
480494 # save 'NaN' as the string and this will get rejected by our schema
481- # on any subsequent loads
482- # Note we can't use .fillna(None) due to this issue:
483- # https://github.com/pydata/pandas/issues/1972
495+ # on any subsequent loads.
496+ # Pandas 3.0 infers 'str' dtype for these columns, and assigning
497+ # NaN on a str-dtype column coerces to the string 'nan'. Force
498+ # object dtype and map both real NaN and stringified 'nan' back
499+ # to None so downstream JSON serialization writes null.
484500 df_keys = set (df_odict [worm_id ].columns .get_level_values ('key' ))
485501 for k in ['head' , 'ventral' ]:
486502 if k in df_keys :
487- cur_slice = df_odict [worm_id ].loc [:, idx [:, k , :]]
488- df_odict [worm_id ].loc [:, idx [:, k , :]] = \
489- cur_slice .fillna (value = np .nan )
490-
491- # Make sure aspect_size is a float, since only floats are nullable:
503+ df = df_odict [worm_id ]
504+ for col in [c for c in df .columns if c [1 ] == k ]:
505+ s = df [col ].astype (object )
506+ df [col ] = s .where (s .notna () & (s != 'nan' ), None )
507+
508+ # Make sure aspect_size is a float, since only floats are nullable.
509+ # Replace the column whole rather than assigning via .loc[]; pandas
510+ # 2.x preserves the parent column's existing (object/str) dtype on
511+ # .loc[] assignment and raises TypeError on non-string values.
492512 if 'aspect_size' in df_keys :
493- df_odict [worm_id ]. loc [:, idx [:, 'aspect_size' , :]] = \
494- df_odict [ worm_id ]. loc [:, idx [:, 'aspect_size' , :]] \
495- .astype (float )
513+ df = df_odict [worm_id ]
514+ for col in [ c for c in df . columns if c [ 1 ] == 'aspect_size' ]:
515+ df [ col ] = df [ col ] .astype (float )
496516
497517 return sort_odict (df_odict )
498518
0 commit comments