Skip to content

Commit 37563a1

Browse files
authored
Merge pull request #1046 from Sage-Bionetworks/develop-table-upsert
Introduce tests to cover current table operations: creation and replacement
2 parents dc123b4 + d268af1 commit 37563a1

File tree

7 files changed

+205
-13
lines changed

7 files changed

+205
-13
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
env:
3030
POETRY_VERSION: 1.2.0rc1
3131
strategy:
32+
fail-fast: false
3233
matrix:
3334
python-version: ["3.7", "3.8", "3.9", "3.10"]
3435

@@ -108,7 +109,7 @@ jobs:
108109
run: >
109110
source .venv/bin/activate;
110111
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
111-
-m "not google_credentials_needed"
112+
-m "not (google_credentials_needed or table_operations)"
112113
113114
- name: Run tests
114115
env:
@@ -118,7 +119,7 @@ jobs:
118119
run: >
119120
source .venv/bin/activate;
120121
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
121-
-m "not (google_credentials_needed or rule_combos or schematic_api)"
122+
-m "not (google_credentials_needed or rule_combos or schematic_api or table_operations)"
122123
123124
- name: Upload pytest test results
124125
uses: actions/upload-artifact@v2

pyproject.toml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,5 +118,15 @@ markers = [
118118
"""\
119119
schematic_api: marks tests requiring \
120120
running API locally (skipped on GitHub CI)
121-
"""
121+
""",
122+
"""\
123+
rule_combos: marks tests covering \
124+
combinations of rules that aren't always necessary \
125+
and can add significantly to CI runtime (skipped on GitHub CI unless prompted to run in commit message)
126+
""",
127+
"""\
128+
table_operations: marks tests covering \
129+
table operations that pass locally \
130+
but fail on CI due to interactions with Synapse (skipped on GitHub CI)
131+
"""
122132
]

schematic/store/synapse.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# allows specifying explicit variable types
1010
from typing import Dict, List, Tuple, Sequence, Union
1111
from collections import OrderedDict
12+
from tenacity import retry, stop_after_attempt, wait_chain, wait_fixed, retry_if_exception_type
1213

1314
import numpy as np
1415
import pandas as pd
@@ -93,6 +94,9 @@ def __init__(
9394
except KeyError:
9495
raise MissingConfigValueError(("synapse", "manifest_basename"))
9596

97+
self._query_fileview()
98+
99+
def _query_fileview(self):
96100
try:
97101
self.storageFileview = CONFIG["synapse"]["master_fileview"]
98102
self.manifest = CONFIG["synapse"]["manifest_basename"]
@@ -108,7 +112,7 @@ def __init__(
108112
except AttributeError:
109113
raise AttributeError("storageFileview attribute has not been set.")
110114
except SynapseHTTPError:
111-
raise AccessCredentialsError(self.storageFileview)
115+
raise AccessCredentialsError(self.storageFileview)
112116

113117
@staticmethod
114118
def login(token=None, access_token=None, input_token=None):
@@ -725,16 +729,21 @@ def get_synapse_table(self, synapse_id: str) -> Tuple[pd.DataFrame, CsvFileTable
725729

726730
return df, results
727731

728-
def _get_tables(self, datasetId: str = None) -> List[Table]:
729-
project = self.syn.get(self.getDatasetProject(datasetId))
732+
def _get_tables(self, datasetId: str = None, projectId: str = None) -> List[Table]:
733+
if projectId:
734+
project = projectId
735+
elif datasetId:
736+
project = self.syn.get(self.getDatasetProject(datasetId))
737+
730738
return list(self.syn.getChildren(project, includeTypes=["table"]))
731739

732-
def get_table_info(self, datasetId: str = None) -> List[str]:
740+
def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[str]:
733741
"""Gets the names of the tables in the schema
742+
Can pass in a synID for a dataset or project
734743
Returns:
735744
list[str]: A list of table names
736745
"""
737-
tables = self._get_tables(datasetId)
746+
tables = self._get_tables(datasetId = datasetId, projectId = projectId)
738747
if tables:
739748
return {table["name"]: table["id"] for table in tables}
740749
else:
@@ -743,7 +752,7 @@ def get_table_info(self, datasetId: str = None) -> List[str]:
743752
@missing_entity_handler
744753
def upload_format_manifest_table(self, se, manifest, datasetId, table_name, restrict, useSchemaLabel):
745754
# Rename the manifest columns to display names to match fileview
746-
table_info = self.get_table_info(datasetId)
755+
table_info = self.get_table_info(datasetId = datasetId)
747756

748757
blacklist_chars = ['(', ')', '.', ' ', '-']
749758
manifest_columns = manifest.columns.tolist()
@@ -1293,6 +1302,15 @@ def getDatasetAnnotations(
12931302
# Force all values as strings
12941303
return table.astype(str)
12951304

1305+
def raise_final_error(retry_state):
1306+
return retry_state.outcome.result()
1307+
1308+
@retry(stop = stop_after_attempt(5),
1309+
wait = wait_chain(*[wait_fixed(10) for i in range (2)] +
1310+
[wait_fixed(15) for i in range(2)] +
1311+
[wait_fixed(20)]),
1312+
retry=retry_if_exception_type(LookupError),
1313+
retry_error_callback = raise_final_error)
12961314
def getDatasetProject(self, datasetId: str) -> str:
12971315
"""Get parent project for a given dataset ID.
12981316
@@ -1306,6 +1324,9 @@ def getDatasetProject(self, datasetId: str) -> str:
13061324
Returns:
13071325
str: The Synapse ID for the parent project.
13081326
"""
1327+
1328+
self._query_fileview()
1329+
13091330
# Subset main file view
13101331
dataset_index = self.storageFileviewTable["id"] == datasetId
13111332
dataset_row = self.storageFileviewTable[dataset_index]

tests/conftest.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from multiprocessing.sharedctypes import Value
22
import os
33
import logging
4-
import platform
4+
import sys
55

66
import shutil
77
import pytest
@@ -66,10 +66,25 @@ def get_schema_explorer(path=None, *paths):
6666

6767
@staticmethod
6868
def get_python_version(self):
69-
version=platform.python_version()
69+
version=sys.version
7070
base_version=".".join(version.split('.')[0:2])
7171

7272
return base_version
73+
74+
@staticmethod
75+
def get_python_project(self):
76+
77+
version = self.get_python_version(Helpers)
78+
79+
python_projects = {
80+
"3.7": "syn47217926",
81+
"3.8": "syn47217967",
82+
"3.9": "syn47218127",
83+
"3.10": "syn47218347",
84+
}
85+
86+
return python_projects[version]
87+
7388
@pytest.fixture(scope="session")
7489
def helpers():
7590
yield Helpers
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID
2+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455.0
3+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456.0
4+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457.0
5+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458.0
6+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459.0
7+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460.0
8+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461.0
9+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462.0
10+
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454.0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID,Uuid
2+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455,318e2d8c-83be-4b19-9a9f-5e97ce9c9cf7
3+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456,600666ee-1f96-49f8-b848-2be751cb59f9
4+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457,1fb32068-7b6c-4d12-8031-96d91229dda0
5+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458,964949f3-efe1-47cd-8113-dedb909fd312
6+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459,c562590f-359a-4ff5-bbd7-47e7ad840a32
7+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460,a77e0882-7b6f-42a9-9e60-c4d2bfd72bb7
8+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461,e7705ed5-ade4-4451-ae26-9c790e35e878
9+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462,2897b73c-e225-4dc3-a093-96f645332ea3
10+
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454,26a1cdba-dd37-483a-be6e-d1a4f5a9521b

tests/test_store.py

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import math
44
import logging
55
import pytest
6-
import time
6+
from time import sleep
77
from tenacity import Retrying, RetryError, stop_after_attempt, wait_random_exponential
88

99
import pandas as pd
10-
from synapseclient import EntityViewSchema
10+
from synapseclient import EntityViewSchema, Folder
1111

12+
from schematic.models.metadata import MetadataModel
1213
from schematic.store.base import BaseStorage
1314
from schematic.store.synapse import SynapseStorage, DatasetFileView
1415
from schematic.utils.cli_utils import get_from_config
@@ -47,6 +48,28 @@ def dataset_fileview_table_tidy(dataset_fileview, dataset_fileview_table):
4748
table = dataset_fileview.tidy_table()
4849
yield table
4950

51+
@pytest.fixture
52+
def version(synapse_store, helpers):
53+
54+
yield helpers.get_python_version(helpers)
55+
56+
@pytest.fixture
57+
def projectId(synapse_store, helpers):
58+
projectId = helpers.get_python_project(helpers)
59+
yield projectId
60+
61+
@pytest.fixture
62+
def datasetId(synapse_store, projectId, helpers):
63+
dataset = Folder(
64+
name = 'Table Test Dataset ' + helpers.get_python_version(helpers),
65+
parent = projectId,
66+
)
67+
68+
datasetId = synapse_store.syn.store(dataset).id
69+
sleep(5)
70+
yield datasetId
71+
synapse_store.syn.delete(datasetId)
72+
5073

5174
def raise_final_error(retry_state):
5275
return retry_state.outcome.result()
@@ -243,3 +266,105 @@ def test_tidy_table(self, dataset_fileview_table_tidy):
243266
year_value = table.loc[sample_a_row, "YearofBirth"][0]
244267
assert isinstance(year_value, str)
245268
assert year_value == "1980"
269+
270+
@pytest.mark.table_operations
271+
class TestTableOperations:
272+
273+
def test_createTable(self, helpers, synapse_store, config, projectId, datasetId):
274+
275+
276+
# Check if FollowUp table exists if so delete
277+
existing_tables = synapse_store.get_table_info(projectId = projectId)
278+
279+
table_name='followup_synapse_storage_manifest_table'
280+
281+
if table_name in existing_tables.keys():
282+
synapse_store.syn.delete(existing_tables[table_name])
283+
sleep(10)
284+
# assert no table
285+
assert table_name not in synapse_store.get_table_info(projectId = projectId).keys()
286+
287+
# associate metadata with files
288+
manifest_path = "mock_manifests/table_manifest.csv"
289+
inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location")))
290+
sg = SchemaGenerator(inputModelLocaiton)
291+
292+
# updating file view on synapse takes a long time
293+
manifestId = synapse_store.associateMetadataWithFiles(
294+
schemaGenerator = sg,
295+
metadataManifestPath = helpers.get_data_path(manifest_path),
296+
datasetId = datasetId,
297+
manifest_record_type = 'table',
298+
useSchemaLabel = True,
299+
hideBlanks = True,
300+
restrict_manifest = False,
301+
)
302+
existing_tables = synapse_store.get_table_info(projectId = projectId)
303+
304+
# clean Up
305+
synapse_store.syn.delete(manifestId)
306+
# assert table exists
307+
assert table_name in existing_tables.keys()
308+
309+
def test_replaceTable(self, helpers, synapse_store, config, projectId, datasetId):
310+
table_name='followup_synapse_storage_manifest_table'
311+
manifest_path = "mock_manifests/table_manifest.csv"
312+
replacement_manifest_path = "mock_manifests/table_manifest_replacement.csv"
313+
column_of_interest="DaystoFollowUp"
314+
315+
# Check if FollowUp table exists if so delete
316+
existing_tables = synapse_store.get_table_info(projectId = projectId)
317+
318+
if table_name in existing_tables.keys():
319+
synapse_store.syn.delete(existing_tables[table_name])
320+
sleep(10)
321+
# assert no table
322+
assert table_name not in synapse_store.get_table_info(projectId = projectId).keys()
323+
324+
# associate org FollowUp metadata with files
325+
inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location")))
326+
sg = SchemaGenerator(inputModelLocaiton)
327+
328+
# updating file view on synapse takes a long time
329+
manifestId = synapse_store.associateMetadataWithFiles(
330+
schemaGenerator = sg,
331+
metadataManifestPath = helpers.get_data_path(manifest_path),
332+
datasetId = datasetId,
333+
manifest_record_type = 'table',
334+
useSchemaLabel = True,
335+
hideBlanks = True,
336+
restrict_manifest = False,
337+
)
338+
existing_tables = synapse_store.get_table_info(projectId = projectId)
339+
340+
# Query table for DaystoFollowUp column
341+
tableId = existing_tables[table_name]
342+
daysToFollowUp = synapse_store.syn.tableQuery(
343+
f"SELECT {column_of_interest} FROM {tableId}"
344+
).asDataFrame().squeeze()
345+
346+
# assert Days to FollowUp == 73
347+
assert (daysToFollowUp == '73.0').all()
348+
349+
# Associate replacement manifest with files
350+
manifestId = synapse_store.associateMetadataWithFiles(
351+
schemaGenerator = sg,
352+
metadataManifestPath = helpers.get_data_path(replacement_manifest_path),
353+
datasetId = datasetId,
354+
manifest_record_type = 'table',
355+
useSchemaLabel = True,
356+
hideBlanks = True,
357+
restrict_manifest = False,
358+
)
359+
existing_tables = synapse_store.get_table_info(projectId = projectId)
360+
361+
# Query table for DaystoFollowUp column
362+
tableId = existing_tables[table_name]
363+
daysToFollowUp = synapse_store.syn.tableQuery(
364+
f"SELECT {column_of_interest} FROM {tableId}"
365+
).asDataFrame().squeeze()
366+
367+
# assert Days to FollowUp == 89 now and not 73
368+
assert (daysToFollowUp == '89').all()
369+
# delete table
370+
synapse_store.syn.delete(tableId)

0 commit comments

Comments
 (0)