1414from schematic .store .synapse import SynapseStorage , DatasetFileView , ManifestDownload
1515from schematic .schemas .generator import SchemaGenerator
1616from synapseclient .core .exceptions import SynapseHTTPError
17+ from synapseclient .entity import File
1718from schematic .configuration .configuration import Configuration
1819
1920logging .basicConfig (level = logging .DEBUG )
@@ -113,9 +114,37 @@ def test_getFileAnnotations(self, synapse_store):
113114
114115 assert expected_dict == actual_dict
115116
116- def test_annotation_submission (self , synapse_store , helpers , config : Configuration ):
117- manifest_path = "mock_manifests/annotations_test_manifest.csv"
118-
117+ @pytest .mark .parametrize ('only_new_files' ,[True , False ])
118+ def test_get_file_entityIds (self , helpers , synapse_store , only_new_files ):
119+ manifest_path = "mock_manifests/test_BulkRNAseq.csv"
120+ dataset_files = synapse_store .getFilesInStorageDataset ('syn39241199' )
121+
122+ if only_new_files :
123+ # Prepare manifest is getting Ids for new files only
124+ manifest = helpers .get_data_frame (manifest_path )
125+ entityIds = pd .DataFrame ({'entityId' : ['syn39242580' , 'syn51900502' ]})
126+ manifest = manifest .join (entityIds )
127+
128+ # get entityIds for new files
129+ files_and_Ids = synapse_store ._get_file_entityIds (dataset_files = dataset_files , only_new_files = only_new_files , manifest = manifest )
130+
131+ # Assert that there are no new files
132+ for value in files_and_Ids .values ():
133+ assert value == []
134+
135+ else :
136+ # get entityIds for all files
137+ files_and_Ids = synapse_store ._get_file_entityIds (dataset_files = dataset_files , only_new_files = only_new_files )
138+
139+ # assert that the correct number of files were found
140+ assert len (files_and_Ids ['entityId' ]) == 2
141+
142+ @pytest .mark .parametrize ('manifest_path, test_annotations, datasetId, manifest_record_type' ,
143+ [ ("mock_manifests/annotations_test_manifest.csv" , {'CheckInt' : '7' , 'CheckList' : 'valid, list, values' }, 'syn34295552' , 'file_and_entities' ),
144+ ("mock_manifests/test_BulkRNAseq.csv" , {'FileFormat' : 'BAM' , 'GenomeBuild' : 'GRCh38' }, 'syn39241199' , 'table_and_file' )],
145+ ids = ['non file-based' ,
146+ 'file-based' ])
147+ def test_annotation_submission (self , synapse_store , helpers , manifest_path , test_annotations , datasetId , manifest_record_type , config : Configuration ):
119148 # Upload dataset annotations
120149 sg = SchemaGenerator (config .model_location )
121150
@@ -129,8 +158,8 @@ def test_annotation_submission(self, synapse_store, helpers, config: Configurati
129158 manifest_id = synapse_store .associateMetadataWithFiles (
130159 schemaGenerator = sg ,
131160 metadataManifestPath = helpers .get_data_path (manifest_path ),
132- datasetId = 'syn34295552' ,
133- manifest_record_type = 'file_and_entities' ,
161+ datasetId = datasetId ,
162+ manifest_record_type = manifest_record_type ,
134163 useSchemaLabel = True ,
135164 hideBlanks = True ,
136165 restrict_manifest = False ,
@@ -139,17 +168,19 @@ def test_annotation_submission(self, synapse_store, helpers, config: Configurati
139168 pass
140169
141170 # Retrive annotations
142- entity_id , entity_id_spare = helpers .get_data_frame (manifest_path )["entityId" ][0 : 2 ]
171+ entity_id = helpers .get_data_frame (manifest_path )["entityId" ][0 ]
143172 annotations = synapse_store .getFileAnnotations (entity_id )
144173
145174 # Check annotations of interest
146- assert annotations ['CheckInt' ] == '7'
147- assert annotations ['CheckList' ] == 'valid, list, values'
148- assert 'CheckRecommended' not in annotations .keys ()
149-
150-
151-
175+ for key in test_annotations .keys ():
176+ assert key in annotations .keys ()
177+ assert annotations [key ] == test_annotations [key ]
152178
179+ if manifest_path .endswith ('annotations_test_manifest.csv' ):
180+ assert 'CheckRecommended' not in annotations .keys ()
181+ elif manifest_path .endswith ('test_BulkRNAseq.csv' ):
182+ entity = synapse_store .syn .get (entity_id )
183+ assert type (entity ) == File
153184
154185 @pytest .mark .parametrize ("force_batch" , [True , False ], ids = ["batch" , "non_batch" ])
155186 def test_getDatasetAnnotations (self , dataset_id , synapse_store , force_batch ):
@@ -466,7 +497,6 @@ def test_upsertTable(self, helpers, synapse_store, config:Configuration, project
466497 # delete table
467498 synapse_store .syn .delete (tableId )
468499
469-
470500class TestDownloadManifest :
471501 @pytest .mark .parametrize ("datasetFileView" , [{"id" : ["syn51203973" , "syn51203943" ], "name" : ["synapse_storage_manifest.csv" , "synapse_storage_manifest_censored.csv" ]}, {"id" : ["syn51203973" ], "name" : ["synapse_storage_manifest.csv" ]}, {"id" : ["syn51203943" ], "name" : ["synapse_storage_manifest_censored.csv" ]}])
472502 def test_get_manifest_id (self , synapse_store , datasetFileView ):
0 commit comments