Skip to content

Commit 5a6e595

Browse files
authored
Merge pull request #1267 from Sage-Bionetworks/develop-compliant-annotations-FDS-481
Make all entity annotations comply with synapse
2 parents b79283e + 3006d1a commit 5a6e595

File tree

1 file changed

+8
-19
lines changed

1 file changed

+8
-19
lines changed

schematic/store/synapse.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ def upload_manifest_file(self, manifest, metadataManifestPath, datasetId, restri
10591059
return manifest_synapse_file_id
10601060

10611061
@missing_entity_handler
1062-
def format_row_annotations(self, se, sg, row, entityId, useSchemaLabel, hideBlanks):
1062+
def format_row_annotations(self, se, sg, row, entityId, hideBlanks):
10631063
# prepare metadata for Synapse storage (resolve display name into a name that Synapse annotations support (e.g no spaces, parenthesis)
10641064
# note: the removal of special characters, will apply only to annotation keys; we are not altering the manifest
10651065
# this could create a divergence between manifest column and annotations. this should be ok for most use cases.
@@ -1069,10 +1069,7 @@ def format_row_annotations(self, se, sg, row, entityId, useSchemaLabel, hideBlan
10691069

10701070
for k, v in row.to_dict().items():
10711071

1072-
if useSchemaLabel:
1073-
keySyn = se.get_class_label_from_display_name(str(k)).translate({ord(x): '' for x in blacklist_chars})
1074-
else:
1075-
keySyn = str(k)
1072+
keySyn = se.get_class_label_from_display_name(str(k)).translate({ord(x): '' for x in blacklist_chars})
10761073

10771074
# Skip `Filename` and `ETag` columns when setting annotations
10781075
if keySyn in ["Filename", "ETag", "eTag"]:
@@ -1304,20 +1301,19 @@ def _generate_table_name(self, manifest):
13041301
table_name = 'synapse_storage_manifest_table'
13051302
return table_name, component_name
13061303

1307-
def _add_annotations(self, se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks):
1304+
def _add_annotations(self, se, schemaGenerator, row, entityId, hideBlanks):
13081305
"""Helper function to format and add annotations to entities in Synapse.
13091306
Args:
13101307
se: schemaExplorer object,
13111308
schemaGenerator: schemaGenerator Object.
13121309
row: current row of manifest being processed
13131310
entityId (str): synapseId of entity to add annotations to
1314-
useSchemaLabel (bool): Flag to use schema label instead of display name
13151311
hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
13161312
Returns:
13171313
Annotations are added to entities in Synapse, no return.
13181314
"""
13191315
# Format annotations for Synapse
1320-
annos = self.format_row_annotations(se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks)
1316+
annos = self.format_row_annotations(se, schemaGenerator, row, entityId, hideBlanks)
13211317

13221318
if annos:
13231319
# Store annotations for an entity folder
@@ -1350,7 +1346,6 @@ def add_entities(
13501346
manifest,
13511347
manifest_record_type,
13521348
datasetId,
1353-
useSchemaLabel,
13541349
hideBlanks,
13551350
manifest_synapse_table_id=''
13561351
):
@@ -1361,7 +1356,6 @@ def add_entities(
13611356
manifest (pd.DataFrame): loaded df containing user supplied data.
13621357
manifest_record_type: valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
13631358
datasetId (str): synapse ID of folder containing the dataset
1364-
useSchemaLabel (bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
13651359
hideBlanks (bool): Default is false -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
13661360
manifest_synapse_table_id (str): Default is an empty string ''.
13671361
Returns:
@@ -1395,7 +1389,7 @@ def add_entities(
13951389

13961390
# Adding annotations to connected files.
13971391
if entityId:
1398-
self._add_annotations(se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks)
1392+
self._add_annotations(se, schemaGenerator, row, entityId, hideBlanks)
13991393
return manifest
14001394

14011395
def upload_manifest_as_table(
@@ -1424,7 +1418,6 @@ def upload_manifest_as_table(
14241418
component_name (str): Name of the component manifest that is currently being uploaded.
14251419
restrict (bool): Flag for censored data.
14261420
manifest_record_type (str): valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
1427-
useSchemaLabel(bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
14281421
hideBlanks (bool): Default is False -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
14291422
table_malnipulation (str): Specify the way the manifest tables should be store as on Synapse when one with the same name already exists. Options are 'replace' and 'upsert'.
14301423
Return:
@@ -1440,7 +1433,7 @@ def upload_manifest_as_table(
14401433
useSchemaLabel,
14411434
table_manipulation)
14421435

1443-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks, manifest_synapse_table_id)
1436+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
14441437
# Load manifest to synapse as a CSV File
14451438
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name = component_name)
14461439

@@ -1473,7 +1466,6 @@ def upload_manifest_as_csv(
14731466
datasetId,
14741467
restrict,
14751468
manifest_record_type,
1476-
useSchemaLabel,
14771469
hideBlanks,
14781470
component_name,
14791471
with_entities = False,):
@@ -1486,15 +1478,14 @@ def upload_manifest_as_csv(
14861478
datasetId (str): synapse ID of folder containing the dataset
14871479
restrict (bool): Flag for censored data.
14881480
manifest_record_type: valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
1489-
useSchemaLabel (bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
14901481
hideBlanks (bool): Default is False -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
14911482
table_malnipulation (str): Specify the way the manifest tables should be store as on Synapse when one with the same name already exists. Options are 'replace' and 'upsert'.
14921483
with_entities (bool): Default is False - Flag to indicate whether to create entityIds and add annotations.
14931484
Return:
14941485
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
14951486
"""
14961487
if with_entities:
1497-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks)
1488+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks)
14981489

14991490
# Load manifest to synapse as a CSV File
15001491
manifest_synapse_file_id = self.upload_manifest_file(manifest,
@@ -1549,7 +1540,7 @@ def upload_manifest_combo(
15491540
useSchemaLabel=useSchemaLabel,
15501541
table_manipulation=table_manipulation,)
15511542

1552-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks, manifest_synapse_table_id)
1543+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
15531544

15541545
# Load manifest to synapse as a CSV File
15551546
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name)
@@ -1623,7 +1614,6 @@ def associateMetadataWithFiles(
16231614
metadataManifestPath,
16241615
datasetId=datasetId,
16251616
restrict=restrict_manifest,
1626-
useSchemaLabel=useSchemaLabel,
16271617
hideBlanks=hideBlanks,
16281618
manifest_record_type=manifest_record_type,
16291619
component_name = component_name,
@@ -1652,7 +1642,6 @@ def associateMetadataWithFiles(
16521642
metadataManifestPath,
16531643
datasetId=datasetId,
16541644
restrict=restrict_manifest,
1655-
useSchemaLabel=useSchemaLabel,
16561645
hideBlanks=hideBlanks,
16571646
manifest_record_type=manifest_record_type,
16581647
component_name = component_name,

0 commit comments

Comments
 (0)