@@ -439,7 +439,7 @@ def _gather_dependency_requirements(self, json_schema, required_metadata_fields)
439439
440440 return required_metadata_fields
441441
442- def _add_root_to_component (self , required_metadata_fields ):
442+ def _add_root_to_component (self , required_metadata_fields : Dict [ str , List ] ):
443443 """If 'Component' is in the column set, add root node as a
444444 metadata component entry in the first row of that column.
445445 Args:
@@ -462,7 +462,6 @@ def _add_root_to_component(self, required_metadata_fields):
462462 )
463463 else :
464464 self .additional_metadata ["Component" ] = [self .root ]
465-
466465 return
467466
468467 def _get_additional_metadata (self , required_metadata_fields : dict ) -> dict :
@@ -886,7 +885,7 @@ def _request_note_valid_values(self, i, req, validation_rules, valid_values):
886885 else :
887886 return
888887
889- def _request_notes_comments (self , i , req , json_schema ):
888+ def _set_required_columns_color (self , i , req , json_schema ):
890889 """Update background colors so that columns that are required are highlighted
891890 Args:
892891 i (int): column index
@@ -1126,10 +1125,10 @@ def _create_requests_body(
11261125 if get_row_formatting :
11271126 requests_body ["requests" ].append (get_row_formatting )
11281127
1129- # Add notes to headers to provide descriptions of the attribute
1130- header_notes = self ._request_notes_comments (i , req , json_schema )
1131- if header_notes :
1132- requests_body ["requests" ].append (header_notes )
1128+ # set color of required columns to blue
1129+ required_columns_color = self ._set_required_columns_color (i , req , json_schema )
1130+ if required_columns_color :
1131+ requests_body ["requests" ].append (required_columns_color )
11331132 # Add note on how to use multi-select, when appropriate
11341133 note_vv = self ._request_note_valid_values (
11351134 i , req , validation_rules , valid_values
@@ -1365,19 +1364,16 @@ def map_annotation_names_to_display_names(
13651364 return annotations .rename (columns = label_map )
13661365
13671366 def get_manifest_with_annotations (
1368- self , annotations : pd .DataFrame , sheet_url : bool = None , strict : Optional [bool ]= None ,
1367+ self , annotations : pd .DataFrame , strict : Optional [bool ]= None
13691368 ) -> Tuple [ps .Spreadsheet , pd .DataFrame ]:
13701369 """Generate manifest, optionally with annotations (if requested).
1371-
13721370 Args:
13731371 annotations (pd.DataFrame): Annotations table (can be empty).
13741372 strict (Optional Bool): strictness with which to apply validation rules to google sheets. True, blocks incorrect entries, False, raises a warning
1375- sheet_url (Will be deprecated): a boolean ; determine if a pandas dataframe or a google sheet url gets return
13761373 Returns:
13771374 Tuple[ps.Spreadsheet, pd.DataFrame]: Both the Google Sheet
13781375 URL and the corresponding data frame is returned.
13791376 """
1380-
13811377 # Map annotation labels to display names to match manifest columns
13821378 annotations = self .map_annotation_names_to_display_names (annotations )
13831379
@@ -1391,19 +1387,19 @@ def get_manifest_with_annotations(
13911387 self .additional_metadata = annotations_dict
13921388
13931389 # Generate empty manifest using `additional_metadata`
1394- manifest_url = self .get_empty_manifest (sheet_url = sheet_url , strict = strict )
1390+ # With annotations added, regenerate empty manifest
1391+ manifest_url = self .get_empty_manifest (sheet_url = True , strict = strict )
13951392 manifest_df = self .get_dataframe_by_url (manifest_url = manifest_url )
13961393
13971394 # Annotations clashing with manifest attributes are skipped
13981395 # during empty manifest generation. For more info, search
13991396 # for `additional_metadata` in `self.get_empty_manifest`.
14001397 # Hence, the shared columns need to be updated separately.
1401- if self .is_file_based and self .use_annotations :
1402- # This approach assumes that `update_df` returns
1403- # a data frame whose columns are in the same order
1404- manifest_df = update_df (manifest_df , annotations )
1405- manifest_sh = self .set_dataframe_by_url (manifest_url , manifest_df )
1406- manifest_url = manifest_sh .url
1398+ # This approach assumes that `update_df` returns
1399+ # a data frame whose columns are in the same order
1400+ manifest_df = update_df (manifest_df , annotations )
1401+ manifest_sh = self .set_dataframe_by_url (manifest_url , manifest_df )
1402+ manifest_url = manifest_sh .url
14071403
14081404 return manifest_url , manifest_df
14091405
@@ -1527,7 +1523,7 @@ def get_manifest(
15271523 manifest_record = store .updateDatasetManifestFiles (self .sg , datasetId = dataset_id , store = False )
15281524
15291525 # get URL of an empty manifest file created based on schema component
1530- empty_manifest_url = self .get_empty_manifest (strict = strict , sheet_url = sheet_url )
1526+ empty_manifest_url = self .get_empty_manifest (strict = strict , sheet_url = True )
15311527
15321528 # Populate empty template with existing manifest
15331529 if manifest_record :
@@ -1547,25 +1543,24 @@ def get_manifest(
15471543 return result
15481544
15491545 # Generate empty template and optionally fill in with annotations
1546+ # if there is no existing manifest and use annotations is set to True,
1547+ # pull annotations (in reality, annotations should be empty when there is no existing manifest)
15501548 else :
15511549 # Using getDatasetAnnotations() to retrieve file names and subset
15521550 # entities to files and folders (ignoring tables/views)
1553-
15541551 annotations = pd .DataFrame ()
1555- if self .is_file_based :
1556- annotations = store .getDatasetAnnotations (dataset_id )
1557-
1558- # if there are no files with annotations just generate an empty manifest
1559- if annotations .empty :
1560- manifest_url = self .get_empty_manifest (strict = strict )
1561- manifest_df = self .get_dataframe_by_url (manifest_url )
1552+ if self .use_annotations :
1553+ if self .is_file_based :
1554+ annotations = store .getDatasetAnnotations (dataset_id )
1555+ # Update `additional_metadata` and generate manifest
1556+ manifest_url , manifest_df = self .get_manifest_with_annotations (annotations ,strict = strict )
15621557 else :
1563- # Subset columns if no interested in user-defined annotations and there are files present
1564- if self .is_file_based and not self . use_annotations :
1565- annotations = annotations [[ "Filename" , "eTag" , " entityId" ]]
1566-
1567- # Update `additional_metadata` and generate manifest
1568- manifest_url , manifest_df = self . get_manifest_with_annotations ( annotations , sheet_url = sheet_url , strict = strict )
1558+ empty_manifest_df = self . get_dataframe_by_url ( empty_manifest_url )
1559+ if self .is_file_based :
1560+ # for file-based manifest, make sure that entityId column and Filename column still gets filled even though use_annotations gets set to False
1561+ manifest_df = store . add_entity_id_and_filename ( dataset_id , empty_manifest_df )
1562+ else :
1563+ manifest_df = empty_manifest_df
15691564
15701565 # Update df with existing manifest. Agnostic to output format
15711566 updated_df , out_of_schema_columns = self ._update_dataframe_with_existing_df (empty_manifest_url = empty_manifest_url , existing_df = manifest_df )
0 commit comments