99from pathlib import Path
1010import pygsheets as ps
1111from tempfile import NamedTemporaryFile
12- from typing import Dict , List , Tuple , Union , Optional
12+ from typing import Dict , List , Optional , Tuple , Union
1313
1414from schematic .schemas .generator import SchemaGenerator
1515from schematic .utils .google_api_utils import (
@@ -288,7 +288,7 @@ def _get_column_data_validation_values(
288288 spreadsheet_id ,
289289 valid_values ,
290290 column_id ,
291- strict ,
291+ strict : Optional [ bool ] ,
292292 validation_type = "ONE_OF_LIST" ,
293293 custom_ui = True ,
294294 input_message = "Choose one from dropdown" ,
@@ -706,7 +706,7 @@ def _request_regex_vr(self, gs_formula, i:int, text_color={"red": 1}):
706706 return requests_vr
707707
708708 def _request_regex_match_vr_formatting (self , validation_rules : List [str ], i : int ,
709- spreadsheet_id : str , requests_body : dict , strict : bool = None ,
709+ spreadsheet_id : str , requests_body : dict , strict : Optional [ bool ] ,
710710 ):
711711 """
712712 Purpose:
@@ -1079,7 +1079,8 @@ def _create_requests_body(
10791079 ordered_metadata_fields ,
10801080 json_schema ,
10811081 spreadsheet_id ,
1082- strict = None ,
1082+ sheet_url ,
1083+ strict : Optional [bool ],
10831084 ):
10841085 """Create and store all formatting changes for the google sheet to
10851086 execute at once.
@@ -1094,6 +1095,8 @@ def _create_requests_body(
10941095 representing the data model, including: '$schema', '$id', 'title',
10951096 'type', 'properties', 'required'
10961097 spreadsheet_id: str, of the id for the google sheet
1098+ sheet_url (Will be deprecated): a boolean ; determine if a pandas dataframe or a google sheet url gets return
1099+ strict (Optional Bool): strictness with which to apply validation rules to google sheets. True, blocks incorrect entries, False, raises a warning
10971100 Return:
10981101 requests_body(dict):
10991102 containing all the update requests to add to the gs
@@ -1102,10 +1105,11 @@ def _create_requests_body(
11021105 requests_body = {}
11031106 requests_body ["requests" ] = []
11041107 for i , req in enumerate (ordered_metadata_fields [0 ]):
1105- # Gather validation rules and valid values for attribute
1108+ # Gather validation rules and valid values for attribute.
11061109 validation_rules = self .sg .get_node_validation_rules (req )
1107-
1108- if validation_rules :
1110+
1111+ # Add regex match validaiton rule to Google Sheets.
1112+ if validation_rules and sheet_url :
11091113 requests_body = self ._request_regex_match_vr_formatting (
11101114 validation_rules , i , spreadsheet_id , requests_body , strict
11111115 )
@@ -1163,7 +1167,7 @@ def _create_requests_body(
11631167 requests_body ["requests" ].append (borders_formatting )
11641168 return requests_body
11651169
1166- def _create_empty_gs (self , required_metadata_fields , json_schema , spreadsheet_id , strict = None ):
1170+ def _create_empty_gs (self , required_metadata_fields , json_schema , spreadsheet_id , sheet_url , strict : Optional [ bool ] ):
11671171 """Generate requests to add columns and format the google sheet.
11681172 Args:
11691173 required_metadata_fields(dict):
@@ -1174,6 +1178,8 @@ def _create_empty_gs(self, required_metadata_fields, json_schema, spreadsheet_id
11741178 representing the data model, including: '$schema', '$id', 'title',
11751179 'type', 'properties', 'required'
11761180 spreadsheet_id: str, of the id for the google sheet
1181+ sheet_url (str): google sheet url of template manifest
1182+ strict (Optional Bool): strictness with which to apply validation rules to google sheets. True, blocks incorrect entries, False, raises a warning
11771183 Returns:
11781184 manifest_url (str): url of the google sheet manifest.
11791185 """
@@ -1193,6 +1199,7 @@ def _create_empty_gs(self, required_metadata_fields, json_schema, spreadsheet_id
11931199 ordered_metadata_fields ,
11941200 json_schema ,
11951201 spreadsheet_id ,
1202+ sheet_url ,
11961203 strict ,
11971204 )
11981205
@@ -1236,12 +1243,14 @@ def _gather_all_fields(self, fields, json_schema):
12361243 )
12371244 return required_metadata_fields
12381245
1239- def get_empty_manifest (self , strict : Optional [bool ], json_schema_filepath : str = None ):
1246+ def get_empty_manifest (self , strict : Optional [bool ], json_schema_filepath : str = None , sheet_url : Optional [ bool ] = None ):
12401247 """Create an empty manifest using specifications from the
12411248 json schema.
12421249 Args:
12431250 strict (bool): strictness with which to apply validation rules to google sheets. If true, blocks incorrect entries; if false, raises a warning
12441251 json_schema_filepath (str): path to json schema file
1252+ sheet_url (Will be deprecated): a boolean ; determine if a pandas dataframe or a google sheet url gets return
1253+ strict (Optional Bool): strictness with which to apply validation rules to google sheets. True, blocks incorrect entries, False, raises a warning
12451254 Returns:
12461255 manifest_url (str): url of the google sheet manifest.
12471256 TODO:
@@ -1255,7 +1264,7 @@ def get_empty_manifest(self, strict: Optional[bool], json_schema_filepath: str=N
12551264 )
12561265
12571266 manifest_url = self ._create_empty_gs (
1258- required_metadata_fields , json_schema , spreadsheet_id , strict
1267+ required_metadata_fields , json_schema , spreadsheet_id , sheet_url = sheet_url , strict = strict ,
12591268 )
12601269 return manifest_url
12611270
@@ -1356,13 +1365,14 @@ def map_annotation_names_to_display_names(
13561365 return annotations .rename (columns = label_map )
13571366
13581367 def get_manifest_with_annotations (
1359- self , annotations : pd .DataFrame , strict : bool = None ,
1368+ self , annotations : pd .DataFrame , sheet_url : bool = None , strict : Optional [ bool ] = None ,
13601369 ) -> Tuple [ps .Spreadsheet , pd .DataFrame ]:
13611370 """Generate manifest, optionally with annotations (if requested).
13621371
13631372 Args:
13641373 annotations (pd.DataFrame): Annotations table (can be empty).
1365-
1374+ strict (Optional Bool): strictness with which to apply validation rules to google sheets. True, blocks incorrect entries, False, raises a warning
1375+ sheet_url (Will be deprecated): a boolean ; determine if a pandas dataframe or a google sheet url gets return
13661376 Returns:
13671377 Tuple[ps.Spreadsheet, pd.DataFrame]: Both the Google Sheet
13681378 URL and the corresponding data frame is returned.
@@ -1381,8 +1391,8 @@ def get_manifest_with_annotations(
13811391 self .additional_metadata = annotations_dict
13821392
13831393 # Generate empty manifest using `additional_metadata`
1384- manifest_url = self .get_empty_manifest (strict = strict )
1385- manifest_df = self .get_dataframe_by_url (manifest_url )
1394+ manifest_url = self .get_empty_manifest (sheet_url = sheet_url , strict = strict )
1395+ manifest_df = self .get_dataframe_by_url (manifest_url = manifest_url )
13861396
13871397 # Annotations clashing with manifest attributes are skipped
13881398 # during empty manifest generation. For more info, search
@@ -1474,14 +1484,14 @@ def _handle_output_format_logic(self, output_format: str = None, output_path: st
14741484 return dataframe
14751485
14761486 def get_manifest (
1477- self , dataset_id : str = None , sheet_url : bool = None , json_schema : str = None , output_format : str = None , output_path : str = None , access_token : str = None , strict : bool = None ,
1487+ self , dataset_id : str = None , sheet_url : bool = None , json_schema : str = None , output_format : str = None , output_path : str = None , access_token : str = None , strict : Optional [ bool ] = None ,
14781488 ) -> Union [str , pd .DataFrame ]:
14791489 """Gets manifest for a given dataset on Synapse.
14801490 TODO: move this function to class MetadatModel (after MetadataModel is refactored)
14811491
14821492 Args:
14831493 dataset_id: Synapse ID of the "dataset" entity on Synapse (for a given center/project).
1484- sheet_url: Determines if googlesheet URL or pandas dataframe should be returned.
1494+ sheet_url (Will be deprecated): a boolean ; determine if a pandas dataframe or a google sheet url gets return
14851495 output_format: Determines if Google sheet URL, pandas dataframe, or Excel spreadsheet gets returned.
14861496 output_path: Determines the output path of the exported manifest
14871497 access_token: Token in .synapseConfig. Since we could not pre-load access_token as an environment variable on AWS, we have to add this variable.
@@ -1492,7 +1502,7 @@ def get_manifest(
14921502
14931503 # Handle case when no dataset ID is provided
14941504 if not dataset_id :
1495- manifest_url = self .get_empty_manifest (json_schema_filepath = json_schema , strict = strict )
1505+ manifest_url = self .get_empty_manifest (json_schema_filepath = json_schema , strict = strict , sheet_url = sheet_url )
14961506
14971507 # if output_form parameter is set to "excel", return an excel spreadsheet
14981508 if output_format == "excel" :
@@ -1517,7 +1527,7 @@ def get_manifest(
15171527 manifest_record = store .updateDatasetManifestFiles (self .sg , datasetId = dataset_id , store = False )
15181528
15191529 # get URL of an empty manifest file created based on schema component
1520- empty_manifest_url = self .get_empty_manifest (strict = strict )
1530+ empty_manifest_url = self .get_empty_manifest (strict = strict , sheet_url = sheet_url )
15211531
15221532 # Populate empty template with existing manifest
15231533 if manifest_record :
@@ -1555,7 +1565,8 @@ def get_manifest(
15551565 annotations = annotations [["Filename" , "eTag" , "entityId" ]]
15561566
15571567 # Update `additional_metadata` and generate manifest
1558- manifest_url , manifest_df = self .get_manifest_with_annotations (annotations )
1568+ manifest_url , manifest_df = self .get_manifest_with_annotations (annotations , sheet_url = sheet_url , strict = strict )
1569+
15591570 # Update df with existing manifest. Agnostic to output format
15601571 updated_df , out_of_schema_columns = self ._update_dataframe_with_existing_df (empty_manifest_url = empty_manifest_url , existing_df = manifest_df )
15611572
@@ -1609,8 +1620,8 @@ def _update_dataframe_with_existing_df(self, empty_manifest_url: str, existing_d
16091620 """
16101621
16111622 # Get headers for the current schema and existing manifest df.
1612- current_schema_headers = list (self .get_dataframe_by_url (empty_manifest_url ).columns )
1613- existing_manifest_headers = list (existing_df .columns )
1623+ current_schema_headers = list (self .get_dataframe_by_url (manifest_url = empty_manifest_url ).columns )
1624+ existing_manfiest_headers = list (existing_df .columns )
16141625
16151626 # Find columns that exist in the current schema, but are not in the manifest being downloaded.
16161627 new_columns = self ._get_missing_columns (current_schema_headers , existing_manifest_headers )
0 commit comments