@@ -1299,6 +1299,8 @@ def set_dataframe_by_url(
12991299 start_col = self ._column_to_letter (len (manifest_df .columns ) - num_out_of_schema_columns ) # find start of out of schema columns
13001300 end_col = self ._column_to_letter (len (manifest_df .columns ) + 1 ) # find end of out of schema columns
13011301 wb .set_data_validation (start = start_col , end = end_col , condition_type = None )
1302+
1303+
13021304 # set permissions so that anyone with the link can edit
13031305 sh .share ("" , role = "writer" , type = "anyone" )
13041306
@@ -1463,7 +1465,7 @@ def _handle_output_format_logic(self, output_format: str = None, output_path: st
14631465 return output_file_path
14641466
14651467 # Return google sheet if sheet_url flag is raised.
1466- elif sheet_url :
1468+ elif sheet_url :
14671469 manifest_sh = self .set_dataframe_by_url (manifest_url = empty_manifest_url , manifest_df = dataframe , out_of_schema_columns = out_of_schema_columns )
14681470 return manifest_sh .url
14691471
@@ -1521,7 +1523,6 @@ def get_manifest(
15211523 if manifest_record :
15221524 # TODO: Update or remove the warning in self.__init__() if
15231525 # you change the behavior here based on self.use_annotations
1524-
15251526 # Update df with existing manifest. Agnostic to output format
15261527 updated_df , out_of_schema_columns = self ._update_dataframe_with_existing_df (empty_manifest_url = empty_manifest_url , existing_df = manifest_record [1 ])
15271528
@@ -1555,7 +1556,6 @@ def get_manifest(
15551556
15561557 # Update `additional_metadata` and generate manifest
15571558 manifest_url , manifest_df = self .get_manifest_with_annotations (annotations )
1558-
15591559 # Update df with existing manifest. Agnostic to output format
15601560 updated_df , out_of_schema_columns = self ._update_dataframe_with_existing_df (empty_manifest_url = empty_manifest_url , existing_df = manifest_df )
15611561
@@ -1564,10 +1564,35 @@ def get_manifest(
15641564 output_path = output_path ,
15651565 sheet_url = sheet_url ,
15661566 empty_manifest_url = empty_manifest_url ,
1567- dataframe = manifest_df ,
1567+ dataframe = updated_df ,
1568+ out_of_schema_columns = out_of_schema_columns ,
15681569 )
15691570 return result
15701571
1572+ def _get_end_columns (self , current_schema_headers , existing_manifest_headers , out_of_schema_columns ):
1573+ """
1574+ Gather columns to be added to the end of the manifest, and ensure entityId is at the end.
1575+ Args:
1576+ current_schema_headers: list, columns in the current manifest schema
1577+ existing_manifest_headers: list, columns in the existing manifest
1578+ out_of_schema_columns: set, columns that are in the existing manifest, but not the current schema
1579+ Returns:
1580+ end_columns: list of columns to be added to the end of the manifest.
1581+ """
1582+ # Identify columns to add to the end of the manifest
1583+ end_columns = list (out_of_schema_columns )
1584+
1585+ # Make sure want Ids are placed at end of manifest, in given order.
1586+ for id_name in ['Uuid' , 'Id' , 'entityId' ]:
1587+ if id_name in end_columns :
1588+ end_columns .remove (id_name )
1589+ end_columns .append (id_name )
1590+
1591+ # Add entity_id to the end columns if it should be there but isn't
1592+ if 'entityId' in (current_schema_headers or existing_manfiest_headers ) and 'entityId' not in end_columns :
1593+ end_columns .append ('entityId' )
1594+ return end_columns
1595+
15711596 def _update_dataframe_with_existing_df (self , empty_manifest_url : str , existing_df : pd .DataFrame ) -> pd .DataFrame :
15721597 """
15731598 Handle scenario when existing manifest does not match new manifest template due to changes in the data model:
@@ -1585,13 +1610,13 @@ def _update_dataframe_with_existing_df(self, empty_manifest_url: str, existing_d
15851610
15861611 # Get headers for the current schema and existing manifest df.
15871612 current_schema_headers = list (self .get_dataframe_by_url (empty_manifest_url ).columns )
1588- existing_manfiest_headers = list (existing_df .columns )
1613+ existing_manifest_headers = list (existing_df .columns )
15891614
15901615 # Find columns that exist in the current schema, but are not in the manifest being downloaded.
1591- new_columns = self ._get_missing_columns (current_schema_headers , existing_manfiest_headers )
1616+ new_columns = self ._get_missing_columns (current_schema_headers , existing_manifest_headers )
15921617
15931618 # Find columns that exist in the manifest being downloaded, but not in the current schema.
1594- out_of_schema_columns = self ._get_missing_columns (existing_manfiest_headers , current_schema_headers )
1619+ out_of_schema_columns = self ._get_missing_columns (existing_manifest_headers , current_schema_headers )
15951620
15961621 # clean empty columns if any are present (there should be none)
15971622 # TODO: Remove this line once we start preventing empty column names
@@ -1607,12 +1632,17 @@ def _update_dataframe_with_existing_df(self, empty_manifest_url: str, existing_d
16071632 ** dict (zip (new_columns , len (new_columns ) * ["" ]))
16081633 )
16091634
1635+ end_columns = self ._get_end_columns (current_schema_headers = current_schema_headers ,
1636+ existing_manifest_headers = existing_manifest_headers ,
1637+ out_of_schema_columns = out_of_schema_columns )
1638+
16101639 # sort columns in the updated manifest:
16111640 # match latest schema order
16121641 # move obsolete columns at the end
16131642 updated_df = updated_df [self .sort_manifest_fields (updated_df .columns )]
1614- updated_df = updated_df [[c for c in updated_df if c not in out_of_schema_columns ] + list (out_of_schema_columns )]
16151643
1644+ # move obsolete columns at the end with entityId at the very end
1645+ updated_df = updated_df [[c for c in updated_df if c not in end_columns ] + list (end_columns )]
16161646 return updated_df , out_of_schema_columns
16171647
16181648 def _format_new_excel_column (self , worksheet , new_column_index : int , col : str ):
0 commit comments