Skip to content

Commit 5bedac0

Browse files
authored
Merge pull request #1292 from Sage-Bionetworks/develop
v23.9.2
2 parents 12d7707 + 135cfb2 commit 5bedac0

File tree

7 files changed

+103
-13
lines changed

7 files changed

+103
-13
lines changed

pyproject.toml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,5 +148,13 @@ markers = [
148148
"""\
149149
rule_benchmark: marks tests covering \
150150
validation rule benchmarking
151-
"""
151+
""",
152+
"""\
153+
synapse_credentials_needed: marks api tests that \
154+
require synapse credentials to run
155+
""",
156+
"""\
157+
empty_token: marks api tests that \
158+
send empty credentials in the request
159+
"""
152160
]

schematic/manifest/generator.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,16 @@ def get_manifest(
15541554
annotations = store.getDatasetAnnotations(dataset_id)
15551555
# Update `additional_metadata` and generate manifest
15561556
manifest_url, manifest_df = self.get_manifest_with_annotations(annotations,strict=strict)
1557+
1558+
# If the annotations are empty,
1559+
# ie if there are no annotations to pull or annotations were unable to be pulled because the metadata is not file based,
1560+
# then create manifest from an empty manifest
1561+
if annotations.empty:
1562+
empty_manifest_df = self.get_dataframe_by_url(empty_manifest_url)
1563+
manifest_df = empty_manifest_df
1564+
1565+
logger.warning(f"Annotations were not able to be gathered for the given parameters. This manifest will be generated from an empty manifest.")
1566+
15571567
else:
15581568
empty_manifest_df = self.get_dataframe_by_url(empty_manifest_url)
15591569
if self.is_file_based:

schematic/schemas/explorer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def get_adjacent_nodes_by_relationship(
184184
return list(nodes)
185185

186186
def is_class_in_schema(self, class_label):
187-
if self.schema_nx.nodes[class_label]:
187+
if class_label in self.schema_nx.nodes():
188188
return True
189189
else:
190190
return False

schematic/store/synapse.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,7 +1412,7 @@ def _create_entity_id(self, idx, row, manifest, datasetId):
14121412
manifest.loc[idx, "entityId"] = entityId
14131413
return manifest, entityId
14141414

1415-
def add_entities(
1415+
def add_annotations_to_entities_files(
14161416
self,
14171417
se,
14181418
schemaGenerator,
@@ -1457,12 +1457,13 @@ def add_entities(
14571457
manifest.loc[idx, "entityId"] = manifest_synapse_table_id
14581458
entityId = ''
14591459
else:
1460-
# get the entity id corresponding to this row
1460+
# get the file id of the file to annotate, collected in above step.
14611461
entityId = row["entityId"]
14621462

14631463
# Adding annotations to connected files.
14641464
if entityId:
14651465
self._add_annotations(se, schemaGenerator, row, entityId, hideBlanks)
1466+
logger.info(f"Added annotations to entity: {entityId}")
14661467
return manifest
14671468

14681469
def upload_manifest_as_table(
@@ -1506,7 +1507,7 @@ def upload_manifest_as_table(
15061507
useSchemaLabel,
15071508
table_manipulation)
15081509

1509-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
1510+
manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
15101511
# Load manifest to synapse as a CSV File
15111512
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name = component_name)
15121513

@@ -1540,8 +1541,7 @@ def upload_manifest_as_csv(
15401541
restrict,
15411542
manifest_record_type,
15421543
hideBlanks,
1543-
component_name,
1544-
with_entities = False,):
1544+
component_name):
15451545
"""Upload manifest to Synapse as a csv only.
15461546
Args:
15471547
se: SchemaExplorer object
@@ -1557,8 +1557,8 @@ def upload_manifest_as_csv(
15571557
Return:
15581558
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
15591559
"""
1560-
if with_entities:
1561-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks)
1560+
# remove with_entities parameter and rename add_annotations, as add_annototaions_to_files_entities.
1561+
manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks)
15621562

15631563
# Load manifest to synapse as a CSV File
15641564
manifest_synapse_file_id = self.upload_manifest_file(manifest,
@@ -1613,7 +1613,7 @@ def upload_manifest_combo(
16131613
useSchemaLabel=useSchemaLabel,
16141614
table_manipulation=table_manipulation,)
16151615

1616-
manifest = self.add_entities(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
1616+
manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
16171617

16181618
# Load manifest to synapse as a CSV File
16191619
manifest_synapse_file_id = self.upload_manifest_file(manifest, metadataManifestPath, datasetId, restrict, component_name)
@@ -1690,7 +1690,6 @@ def associateMetadataWithFiles(
16901690
hideBlanks=hideBlanks,
16911691
manifest_record_type=manifest_record_type,
16921692
component_name = component_name,
1693-
with_entities = False,
16941693
)
16951694
elif manifest_record_type == "table_and_file":
16961695
manifest_synapse_file_id = self.upload_manifest_as_table(
@@ -1718,7 +1717,6 @@ def associateMetadataWithFiles(
17181717
hideBlanks=hideBlanks,
17191718
manifest_record_type=manifest_record_type,
17201719
component_name = component_name,
1721-
with_entities=True,
17221720
)
17231721
elif manifest_record_type == "table_file_and_entities":
17241722
manifest_synapse_file_id = self.upload_manifest_combo(

tests/test_api.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def syn_token(config:Configuration):
9595

9696
@pytest.mark.schematic_api
9797
class TestSynapseStorage:
98+
@pytest.mark.synapse_credentials_needed
9899
@pytest.mark.parametrize("return_type", ["json", "csv"])
99100
def test_get_storage_assets_tables(self, client, syn_token, return_type):
100101
params = {
@@ -120,6 +121,8 @@ def test_get_storage_assets_tables(self, client, syn_token, return_type):
120121
os.remove(response_dt)
121122
else:
122123
pass
124+
125+
@pytest.mark.synapse_credentials_needed
123126
@pytest.mark.parametrize("full_path", [True, False])
124127
@pytest.mark.parametrize("file_names", [None, "Sample_A.txt"])
125128
def test_get_dataset_files(self,full_path, file_names, syn_token, client):
@@ -150,6 +153,7 @@ def test_get_dataset_files(self,full_path, file_names, syn_token, client):
150153
else:
151154
assert ["syn25705259","Boolean Test"] and ["syn23667202","DataTypeX_table"] in response_dt
152155

156+
@pytest.mark.synapse_credentials_needed
153157
def test_get_storage_project_dataset(self, syn_token, client):
154158
params = {
155159
"access_token": syn_token,
@@ -162,6 +166,7 @@ def test_get_storage_project_dataset(self, syn_token, client):
162166
response_dt = json.loads(response.data)
163167
assert ["syn26251193","Issue522"] in response_dt
164168

169+
@pytest.mark.synapse_credentials_needed
165170
def test_get_storage_project_manifests(self, syn_token, client):
166171

167172
params = {
@@ -174,6 +179,7 @@ def test_get_storage_project_manifests(self, syn_token, client):
174179

175180
assert response.status_code == 200
176181

182+
@pytest.mark.synapse_credentials_needed
177183
def test_get_storage_projects(self, syn_token, client):
178184

179185
params = {
@@ -185,6 +191,7 @@ def test_get_storage_projects(self, syn_token, client):
185191

186192
assert response.status_code == 200
187193

194+
@pytest.mark.synapse_credentials_needed
188195
@pytest.mark.parametrize("entity_id", ["syn34640850", "syn23643253", "syn24992754"])
189196
def test_get_entity_type(self, syn_token, client, entity_id):
190197
params = {
@@ -203,6 +210,7 @@ def test_get_entity_type(self, syn_token, client, entity_id):
203210
elif entity_id == "syn24992754":
204211
assert response_dt == "project"
205212

213+
@pytest.mark.synapse_credentials_needed
206214
@pytest.mark.parametrize("entity_id", ["syn30988314", "syn27221721"])
207215
def test_if_in_assetview(self, syn_token, client, entity_id):
208216
params = {
@@ -399,6 +407,7 @@ def ifPandasDataframe(self, response_dt):
399407
assert isinstance(df, pd.DataFrame)
400408

401409

410+
@pytest.mark.empty_token
402411
#@pytest.mark.parametrize("output_format", [None, "excel", "google_sheet", "dataframe (only if getting existing manifests)"])
403412
@pytest.mark.parametrize("output_format", ["excel"])
404413
@pytest.mark.parametrize("data_type", ["Biospecimen", "Patient", "all manifests", ["Biospecimen", "Patient"]])
@@ -455,6 +464,7 @@ def test_generate_existing_manifest(self, client, data_model_jsonld, data_type,
455464
self.ifGoogleSheetExists(response_dt)
456465

457466

467+
@pytest.mark.empty_token
458468
@pytest.mark.parametrize("output_format", ["excel", "google_sheet", "dataframe (only if getting existing manifests)", None])
459469
@pytest.mark.parametrize("data_type", ["all manifests", ["Biospecimen", "Patient"], "Patient"])
460470
def test_generate_new_manifest(self, caplog, client, data_model_jsonld, data_type, output_format):
@@ -621,6 +631,7 @@ def test_validate_manifest(self, data_model_jsonld, client, json_str, restrict_r
621631
assert "errors" in response_dt.keys()
622632
assert "warnings" in response_dt.keys()
623633

634+
@pytest.mark.synapse_credentials_needed
624635
def test_get_datatype_manifest(self, client, syn_token):
625636
params = {
626637
"access_token": syn_token,
@@ -642,6 +653,7 @@ def test_get_datatype_manifest(self, client, syn_token):
642653
"Year of Birth": "Int64",
643654
"entityId": "string"}
644655

656+
@pytest.mark.synapse_credentials_needed
645657
# small manifest: syn51078535; big manifest: syn51156998
646658
@pytest.mark.parametrize("manifest_id, expected_component, expected_file_name", [("syn51078535", "BulkRNA-seqAssay", "synapse_storage_manifest.csv"), ("syn51156998", "Biospecimen", "synapse_storage_manifest_biospecimen.csv")])
647659
@pytest.mark.parametrize("new_manifest_name",[None,"Example.csv"])
@@ -694,6 +706,8 @@ def test_manifest_download(self, config: Configuration, client, syn_token, manif
694706
os.remove(manifest_file_path)
695707
except:
696708
pass
709+
710+
@pytest.mark.synapse_credentials_needed
697711
# test downloading a manifest with access restriction and see if the correct error message got raised
698712
def test_download_access_restricted_manifest(self, client, syn_token):
699713
params = {
@@ -707,6 +721,7 @@ def test_download_access_restricted_manifest(self, client, syn_token):
707721
raise TypeError('the type error got raised')
708722
assert exc_info.value.args[0] == "the type error got raised"
709723

724+
@pytest.mark.synapse_credentials_needed
710725
@pytest.mark.parametrize("as_json", [None, True, False])
711726
@pytest.mark.parametrize("new_manifest_name", [None, "Test"])
712727
def test_dataset_manifest_download(self, client, as_json, syn_token, new_manifest_name):
@@ -735,6 +750,7 @@ def test_dataset_manifest_download(self, client, as_json, syn_token, new_manifes
735750
assert isinstance(response_path, str)
736751
assert response_path.endswith(".csv")
737752

753+
@pytest.mark.synapse_credentials_needed
738754
@pytest.mark.submission
739755
def test_submit_manifest_table_and_file_replace(self, client, syn_token, data_model_jsonld, test_manifest_submit):
740756
"""Testing submit manifest in a csv format as a table and a file. Only replace the table
@@ -755,6 +771,7 @@ def test_submit_manifest_table_and_file_replace(self, client, syn_token, data_mo
755771
response_csv = client.post('http://localhost:3001/v1/model/submit', query_string=params, data={"file_name": (open(test_manifest_submit, 'rb'), "test.csv")})
756772
assert response_csv.status_code == 200
757773

774+
@pytest.mark.synapse_credentials_needed
758775
@pytest.mark.submission
759776
def test_submit_manifest_file_only_replace(self, client, syn_token, data_model_jsonld, test_manifest_submit):
760777
"""Testing submit manifest in a csv format as a file
@@ -773,6 +790,7 @@ def test_submit_manifest_file_only_replace(self, client, syn_token, data_model_j
773790
response_csv = client.post('http://localhost:3001/v1/model/submit', query_string=params, data={"file_name": (open(test_manifest_submit, 'rb'), "test.csv")})
774791
assert response_csv.status_code == 200
775792

793+
@pytest.mark.synapse_credentials_needed
776794
@pytest.mark.submission
777795
def test_submit_manifest_json_str_replace(self, client, syn_token, data_model_jsonld):
778796
"""Submit json str as a file
@@ -794,6 +812,7 @@ def test_submit_manifest_json_str_replace(self, client, syn_token, data_model_js
794812
response = client.post('http://localhost:3001/v1/model/submit', query_string = params, data={"file_name":''})
795813
assert response.status_code == 200
796814

815+
@pytest.mark.synapse_credentials_needed
797816
@pytest.mark.submission
798817
def test_submit_manifest_w_file_and_entities(self, client, syn_token, data_model_jsonld, test_manifest_submit):
799818
params = {
@@ -812,6 +831,7 @@ def test_submit_manifest_w_file_and_entities(self, client, syn_token, data_model
812831
response_csv = client.post('http://localhost:3001/v1/model/submit', query_string=params, data={"file_name": (open(test_manifest_submit, 'rb'), "test.csv")})
813832
assert response_csv.status_code == 200
814833

834+
@pytest.mark.synapse_credentials_needed
815835
@pytest.mark.submission
816836
def test_submit_manifest_table_and_file_upsert(self, client, syn_token, data_model_jsonld, test_upsert_manifest_csv, ):
817837
params = {

tests/test_manifest.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,37 @@ def test_get_manifest_excel(self, helpers, sheet_url, output_format, dataset_id)
193193
if type(manifest) is str and os.path.exists(manifest):
194194
os.remove(manifest)
195195

196+
@pytest.mark.parametrize("dataset_id", [("syn27600056"), ("syn52397659")], ids=["Annotations present", "Annotations not present"])
197+
def test_get_manifest_no_annos(self, helpers, dataset_id):
198+
"""
199+
Test to cover manifest generation under the case where use_annotations is True
200+
but there are no annotations in the dataset
201+
"""
202+
203+
# Use a non-file based DataType
204+
data_type = "Patient"
205+
206+
# Instantiate object with use_annotations set to True
207+
generator = ManifestGenerator(
208+
path_to_json_ld=helpers.get_data_path("example.model.jsonld"),
209+
root=data_type,
210+
use_annotations=True,
211+
)
212+
213+
# Get manifest as a dataframe
214+
manifest = generator.get_manifest(dataset_id = dataset_id, sheet_url = False, output_format = "dataframe")
215+
216+
# Case where annotations are present in the dataset
217+
# manifest should have pulled in the annotations
218+
if dataset_id == "syn27600056":
219+
assert manifest["Patient ID"].size > 1
220+
221+
# Case where annotations are not present in the dataset
222+
# manifest should be empty (One column will have the component filled so not truly empty)
223+
elif dataset_id == "syn52397659":
224+
assert manifest["Patient ID"].size == 1
225+
226+
196227
# test all the functions used under get_manifest
197228
def test_create_empty_manifest_spreadsheet(self, simple_manifest_generator):
198229
'''

tests/test_schemas.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from schematic.schemas import df_parser
88
from schematic.utils.df_utils import load_df
9+
from schematic.schemas.generator import SchemaGenerator
910

1011
logging.basicConfig(level=logging.DEBUG)
1112
logger = logging.getLogger(__name__)
@@ -46,6 +47,13 @@ def extended_schema_path(helpers, tmp_path):
4647

4748
yield extended_schema_path
4849

50+
@pytest.fixture
51+
def sg(helpers):
52+
53+
inputModelLocation = helpers.get_data_path('example.model.jsonld')
54+
sg = SchemaGenerator(inputModelLocation)
55+
56+
yield sg
4957

5058
class TestDfParser:
5159
def test_get_class(self, helpers):
@@ -268,4 +276,19 @@ def test_get_class_label_from_display_name(self, helpers):
268276
assert(se_obj.get_class_label_from_display_name("model of manifestation") == "Modelofmanifestation")
269277
assert(se_obj.get_class_label_from_display_name("model of manifestation", strict_camel_case = True) == "ModelOfManifestation")
270278
assert(se_obj.get_class_label_from_display_name("model of manifestation") == "Modelofmanifestation")
271-
assert(se_obj.get_class_label_from_display_name("model of manifestation", strict_camel_case = True) == "ModelOfManifestation")
279+
assert(se_obj.get_class_label_from_display_name("model of manifestation", strict_camel_case = True) == "ModelOfManifestation")
280+
281+
class TestSchemaExplorer:
282+
@pytest.mark.parametrize("class_name, expected_in_schema", [("Patient",True), ("ptaient",False), ("Biospecimen",True), ("InvalidComponent",False)])
283+
def test_is_class_in_schema(self, sg, class_name, expected_in_schema):
284+
"""
285+
Test to cover checking if a given class is in a schema.
286+
`is_class_in_schema` should return `True` if the class is in the schema
287+
and `False` if it is not.
288+
"""
289+
290+
# Check if class is in schema
291+
class_in_schema = sg.se.is_class_in_schema(class_name)
292+
293+
# Assert value is as expected
294+
assert class_in_schema == expected_in_schema

0 commit comments

Comments
 (0)