Skip to content

Commit d876195

Browse files
authored
Duplicate columns are ignored when adding default View columns and annotation columns to EntityView(SYNPY-489) (#503)
* reafactored EntityView repeated column logic * flitering of repeated column names added for both annotations and default view columns
1 parent a3caf49 commit d876195

File tree

3 files changed

+73
-52
lines changed

3 files changed

+73
-52
lines changed

synapseclient/table.py

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ def _before_synapse_store(self, syn):
551551
self.properties.columnIds.extend(column.id for column in syn.createColumns(self.columns_to_store))
552552
self.columns_to_store = []
553553

554+
554555
class Schema(SchemaBase):
555556
"""
556557
A Schema is an :py:class:`synapseclient.entity.Entity` that defines a set of columns in a table.
@@ -645,61 +646,62 @@ def add_scope(self, entities):
645646
self.scopeIds.append(id_of(entities))
646647

647648
def _before_synapse_store(self, syn):
648-
if self.addAnnotationColumns:
649-
self._add_annotations_as_columns(syn)
650-
self.addAnnotationColumns = False
651-
652649
#get the default EntityView columns from Synapse and add them to the columns list
650+
additional_columns = []
653651
if self.addDefaultViewColumns:
654-
self.addColumns(syn._get_default_entity_view_columns(self.type))
655-
self.addDefaultViewColumns = False
652+
additional_columns.extend(syn._get_default_entity_view_columns(self.type))
653+
654+
#get default annotations
655+
if self.addAnnotationColumns:
656+
anno_columns = [x for x in syn._get_annotation_entity_view_columns(self.scopeIds, self.type) if
657+
x['name'] not in self.ignoredAnnotationColumnNames]
658+
additional_columns.extend(anno_columns)
659+
660+
self.addColumns(self._filter_duplicate_columns(syn, additional_columns))
661+
662+
#set these boolean flags to false so they are not repeated.
663+
self.addDefaultViewColumns = False
664+
self.addAnnotationColumns = False
656665

657666
super(EntityViewSchema, self)._before_synapse_store(syn)
658667

659668

660-
def _add_annotations_as_columns(self, syn):
661-
column_type_to_annotation_names = {
662-
'STRING': set(),
663-
'INTEGER': set(),
664-
'DOUBLE': set(),
665-
'DATE': set()
666-
}
667-
all_existing_column_names = set() # set of all existing columns names regardless of type
669+
def _filter_duplicate_columns(self, syn, columns_to_add):
670+
"""
671+
If a column to be added has the same name and same type as an existing column, it will be considered a duplicate and not added.
672+
:param syn: a :py:class:`synapseclient.client.Synapse` object that is logged in
673+
:param columns_to_add: iterable collection of type :py:class:`synapseclient.table.Column` objects
674+
:return: a filtered list of columns to add
675+
"""
676+
677+
# no point in making HTTP calls to retrieve existing Columns if we not adding any new columns
678+
if not columns_to_add:
679+
return columns_to_add
680+
681+
# set up Column name/type tracking
682+
column_type_to_annotation_names = {} # map of str -> set(str), where str is the column type as a string and set is a set of column name strings
668683

669684
# add to existing columns the columns that user has added but not yet created in synapse
670-
column_generator = itertools.chain(syn.getColumns(self.columnIds), self.columns_to_store) if self.columns_to_store else syn.getColumns(self.columnIds)
685+
column_generator = itertools.chain(syn.getColumns(self.columnIds),
686+
self.columns_to_store) if self.columns_to_store else syn.getColumns(self.columnIds)
671687

672688
for column in column_generator:
673689
column_name = column['name']
674690
column_type = column['columnType']
675691

676-
all_existing_column_names.add(column_name)
677-
#add to type specific set
678-
if column_type in column_type_to_annotation_names:
679-
column_type_to_annotation_names[column_type].add(column_name)
680-
681-
682-
#get annotations from each of the scopes and create columns
683-
columns_to_add = [] #temporarily store all columns so that none are added if any errors occur
684-
anno_columns = syn._get_annotation_entity_view_columns(self.scopeIds, self.type)
685-
for column in anno_columns:
686-
anno_col_name = column['name']
687-
anno_col_type = column['columnType']
688-
typed_col_name_set = column_type_to_annotation_names[anno_col_type]
689-
if (anno_col_name not in self.ignoredAnnotationColumnNames
690-
and anno_col_name not in typed_col_name_set):
691-
692-
if anno_col_name in all_existing_column_names:
693-
raise ValueError("The annotation column name [%s] has multiple types in your scopes or in your defined columns.\n"
694-
"Please do one of the following:\n"
695-
" Turn off the automatic conversion of annotations to column names: entityView.addAnnotationColumns = False\n"
696-
" Modify your annotations/columns named [%s] to all be of the same type.\n"
697-
" Add the annotation name to the set of ignored annotation names via entityView.ignoredAnnotations.add(%s).\n" % (anno_col_name, anno_col_name, anno_col_name))
698-
all_existing_column_names.add(anno_col_name)
699-
typed_col_name_set.add(anno_col_name)
700-
columns_to_add.append(column)
701-
self.addColumns(columns_to_add)
692+
column_type_to_annotation_names.setdefault(column_type,set()).add(column_name)
693+
694+
695+
valid_columns = []
696+
for column in columns_to_add:
697+
new_col_name = column['name']
698+
new_col_type = column['columnType']
702699

700+
typed_col_name_set = column_type_to_annotation_names.setdefault(new_col_type, set())
701+
if new_col_name not in typed_col_name_set:
702+
typed_col_name_set.add(new_col_name)
703+
valid_columns.append(column)
704+
return valid_columns
703705

704706

705707
## add Schema to the map of synapse entity types to their Python representations

tests/integration/test_tables.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def test_entity_view_add_annotation_columns():
159159
prev_columns = list(entity_view.columnIds)
160160
# sometimes annotation columns are not immediately updated so we wait for it to update in a loop
161161
while len(entity_view.columnIds) != len(prev_columns) + 1:
162+
print(len(prev_columns), entity_view.columnIds)
162163
entity_view.addAnnotationColumns = True
163164
entity_view = syn.store(entity_view)
164165

tests/unit/unit_test_tables.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from synapseclient.exceptions import SynapseError
3030
from synapseclient.entity import split_entity_namespaces
3131
from synapseclient.table import Column, Schema, CsvFileTable, TableQueryResult, cast_values, \
32-
as_table_columns, Table, RowSet, SelectColumn, EntityViewSchema, RowSetTable, Row, PartialRow, PartialRowset
32+
as_table_columns, Table, RowSet, SelectColumn, EntityViewSchema, RowSetTable, Row, PartialRow, PartialRowset, SchemaBase
3333
from mock import patch
3434
from collections import OrderedDict
3535

@@ -610,40 +610,58 @@ def test_EntityViewSchema__ignore_column_names_set_info_preserved():
610610

611611

612612

613-
def test_EntityViewSchema__ignore_column_names():
613+
def test_EntityViewSchema__ignore_annotation_column_names():
614614
syn = synapseclient.client.Synapse(debug=True, skip_checks=True)
615615

616616
scopeIds = ['123']
617-
entity_view = EntityViewSchema("someName", scopes = scopeIds ,parent="syn123", ignoredAnnotationColumnNames={'long1'})
617+
entity_view = EntityViewSchema("someName", scopes = scopeIds ,parent="syn123", ignoredAnnotationColumnNames={'long1'}, addDefaultViewColumns=False, addAnnotationColumns=True)
618618

619619
mocked_annotation_result1 = [Column(name='long1', columnType='INTEGER'), Column(name='long2', columnType ='INTEGER')]
620620

621621
with patch.object(syn, '_get_annotation_entity_view_columns', return_value=mocked_annotation_result1) as mocked_get_annotations,\
622-
patch.object(syn, 'getColumns') as mocked_get_columns:
622+
patch.object(syn, 'getColumns') as mocked_get_columns,\
623+
patch.object(SchemaBase, "_before_synapse_store"):
623624

624-
entity_view._add_annotations_as_columns(syn)
625+
entity_view._before_synapse_store(syn)
625626

626627
mocked_get_columns.assert_called_once_with([])
627628
mocked_get_annotations.assert_called_once_with(scopeIds, 'file')
628629

629630
assert_equals([Column(name='long2', columnType='INTEGER')], entity_view.columns_to_store)
630631

631632

632-
def test_EntityViewSchema__repeated_columnName():
633+
def test_EntityViewSchema__repeated_columnName_different_type():
633634
syn = synapseclient.client.Synapse(debug=True, skip_checks=True)
634635

635636
scopeIds = ['123']
636637
entity_view = EntityViewSchema("someName", scopes = scopeIds ,parent="syn123")
637638

638-
mocked_annotation_result1 = [Column(name='annoName', columnType='INTEGER'), Column(name='annoName', columnType='DOUBLE')]
639+
columns = [Column(name='annoName', columnType='INTEGER'),
640+
Column(name='annoName', columnType='DOUBLE')]
639641

640-
with patch.object(syn, '_get_annotation_entity_view_columns', return_value=mocked_annotation_result1) as mocked_get_annotations,\
641-
patch.object(syn, 'getColumns') as mocked_get_columns:
642+
with patch.object(syn, 'getColumns') as mocked_get_columns:
642643

643-
assert_raises(ValueError, entity_view._add_annotations_as_columns, syn)
644+
filtered_results = entity_view._filter_duplicate_columns(syn, columns)
644645

645646
mocked_get_columns.assert_called_once_with([])
646-
mocked_get_annotations.assert_called_once_with(scopeIds, 'file')
647+
assert_equals(2, len(filtered_results))
648+
assert_equals(columns, filtered_results)
649+
650+
651+
def test_EntityViewSchema__repeated_columnName_same_type():
652+
syn = synapseclient.client.Synapse(debug=True, skip_checks=True)
653+
654+
entity_view = EntityViewSchema("someName", parent="syn123")
655+
656+
columns = [Column(name='annoName', columnType='INTEGER'),
657+
Column(name='annoName', columnType='INTEGER')]
658+
659+
with patch.object(syn, 'getColumns') as mocked_get_columns:
660+
filtered_results = entity_view._filter_duplicate_columns(syn, columns)
661+
662+
mocked_get_columns.assert_called_once_with([])
663+
assert_equals(1, len(filtered_results))
664+
assert_equals(Column(name='annoName', columnType='INTEGER'), filtered_results[0])
647665

648666

649667
def test_rowset_asDataFrame__with_ROW_ETAG_column():

0 commit comments

Comments
 (0)