Skip to content

Commit 11cbbe8

Browse files
committed
make all entity annotations comply with synapse
1 parent ba1e879 commit 11cbbe8

File tree

1 file changed

+8
-19
lines changed

1 file changed

+8
-19
lines changed

schematic/store/synapse.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,7 @@ def upload_manifest_file(self, manifest, metadataManifestPath, datasetId, restri
10601060
return manifest_synapse_file_id
10611061

10621062
@missing_entity_handler
1063-
def format_row_annotations(self, se, sg, row, entityId, useSchemaLabel, hideBlanks):
1063+
def format_row_annotations(self, se, sg, row, entityId, hideBlanks):
10641064
# prepare metadata for Synapse storage (resolve display name into a name that Synapse annotations support (e.g no spaces, parenthesis)
10651065
# note: the removal of special characters, will apply only to annotation keys; we are not altering the manifest
10661066
# this could create a divergence between manifest column and annotations. this should be ok for most use cases.
@@ -1070,10 +1070,7 @@ def format_row_annotations(self, se, sg, row, entityId, useSchemaLabel, hideBlan
10701070

10711071
for k, v in row.to_dict().items():
10721072

1073-
if useSchemaLabel:
1074-
keySyn = se.get_class_label_from_display_name(str(k)).translate({ord(x): '' for x in blacklist_chars})
1075-
else:
1076-
keySyn = str(k)
1073+
keySyn = se.get_class_label_from_display_name(str(k)).translate({ord(x): '' for x in blacklist_chars})
10771074

10781075
# Skip `Filename` and `ETag` columns when setting annotations
10791076
if keySyn in ["Filename", "ETag", "eTag"]:
@@ -1305,20 +1302,19 @@ def _generate_table_name(self, manifest):
13051302
table_name = 'synapse_storage_manifest_table'
13061303
return table_name, component_name
13071304

1308-
def _add_annotations(self, se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks):
1305+
def _add_annotations(self, se, schemaGenerator, row, entityId, hideBlanks):
13091306
"""Helper function to format and add annotations to entities in Synapse.
13101307
Args:
13111308
se: schemaExplorer object,
13121309
schemaGenerator: schemaGenerator Object.
13131310
row: current row of manifest being processed
13141311
entityId (str): synapseId of entity to add annotations to
1315-
useSchemaLabel (bool): Flag to use schema label instead of display name
13161312
hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
13171313
Returns:
13181314
Annotations are added to entities in Synapse, no return.
13191315
"""
13201316
# Format annotations for Synapse
1321-
annos = self.format_row_annotations(se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks)
1317+
annos = self.format_row_annotations(se, schemaGenerator, row, entityId, hideBlanks)
13221318

13231319
if annos:
13241320
# Store annotations for an entity folder
@@ -1351,7 +1347,6 @@ def add_entities(
13511347
manifest,
13521348
manifest_record_type,
13531349
datasetId,
1354-
useSchemaLabel,
13551350
hideBlanks,
13561351
manifest_synapse_table_id=''
13571352
):
@@ -1362,7 +1357,6 @@ def add_entities(
13621357
manifest (pd.DataFrame): loaded df containing user supplied data.
13631358
manifest_record_type: valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
13641359
datasetId (str): synapse ID of folder containing the dataset
1365-
useSchemaLabel (bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
13661360
hideBlanks (bool): Default is false -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
13671361
manifest_synapse_table_id (str): Default is an empty string ''.
13681362
Returns:
@@ -1396,7 +1390,7 @@ def add_entities(
13961390

13971391
# Adding annotations to connected files.
13981392
if entityId:
1399-
self._add_annotations(se, schemaGenerator, row, entityId, useSchemaLabel, hideBlanks)
1393+
self._add_annotations(se, schemaGenerator, row, entityId, hideBlanks)
14001394
return manifest
14011395

14021396
def upload_manifest_as_table(
@@ -1425,7 +1419,6 @@ def upload_manifest_as_table(
14251419
component_name (str): Name of the component manifest that is currently being uploaded.
14261420
restrict (bool): Flag for censored data.
14271421
manifest_record_type (str): valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
1428-
useSchemaLabel(bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
14291422
hideBlanks (bool): Default is False -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
14301423
table_malnipulation (str): Specify the way the manifest tables should be store as on Synapse when one with the same name already exists. Options are 'replace' and 'upsert'.
14311424
Return:
@@ -1441,7 +1434,7 @@ def upload_manifest_as_table(
14411434
useSchemaLabel,
14421435
table_manipulation)
14431436

1444-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks, manifest_synapse_table_id)
1437+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
14451438
# Load manifest to synapse as a CSV File
14461439
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name = component_name)
14471440

@@ -1474,7 +1467,6 @@ def upload_manifest_as_csv(
14741467
datasetId,
14751468
restrict,
14761469
manifest_record_type,
1477-
useSchemaLabel,
14781470
hideBlanks,
14791471
component_name,
14801472
with_entities = False,):
@@ -1487,15 +1479,14 @@ def upload_manifest_as_csv(
14871479
datasetId (str): synapse ID of folder containing the dataset
14881480
restrict (bool): Flag for censored data.
14891481
manifest_record_type: valid values are 'entity', 'table' or 'both'. Specifies whether to create entity ids and folders for each row in a manifest, a Synapse table to house the entire manifest or do both.
1490-
useSchemaLabel (bool): Default is True - use the schema label. If False, uses the display label from the schema. Attribute display names in the schema must not only include characters that are not accepted by Synapse. Annotation names may only contain: letters, numbers, '_' and '.'.
14911482
hideBlanks (bool): Default is False -Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false.
14921483
table_malnipulation (str): Specify the way the manifest tables should be store as on Synapse when one with the same name already exists. Options are 'replace' and 'upsert'.
14931484
with_entities (bool): Default is False - Flag to indicate whether to create entityIds and add annotations.
14941485
Return:
14951486
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
14961487
"""
14971488
if with_entities:
1498-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks)
1489+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks)
14991490

15001491
# Load manifest to synapse as a CSV File
15011492
manifest_synapse_file_id = self.upload_manifest_file(manifest,
@@ -1550,7 +1541,7 @@ def upload_manifest_combo(
15501541
useSchemaLabel=useSchemaLabel,
15511542
table_manipulation=table_manipulation,)
15521543

1553-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, useSchemaLabel, hideBlanks, manifest_synapse_table_id)
1544+
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
15541545

15551546
# Load manifest to synapse as a CSV File
15561547
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name)
@@ -1624,7 +1615,6 @@ def associateMetadataWithFiles(
16241615
metadataManifestPath,
16251616
datasetId=datasetId,
16261617
restrict=restrict_manifest,
1627-
useSchemaLabel=useSchemaLabel,
16281618
hideBlanks=hideBlanks,
16291619
manifest_record_type=manifest_record_type,
16301620
component_name = component_name,
@@ -1653,7 +1643,6 @@ def associateMetadataWithFiles(
16531643
metadataManifestPath,
16541644
datasetId=datasetId,
16551645
restrict=restrict_manifest,
1656-
useSchemaLabel=useSchemaLabel,
16571646
hideBlanks=hideBlanks,
16581647
manifest_record_type=manifest_record_type,
16591648
component_name = component_name,

0 commit comments

Comments
 (0)