Skip to content

Commit 853f01b

Browse files
committed
revert changes in pipeline
1 parent 73296fb commit 853f01b

File tree

4 files changed

+15
-16
lines changed

4 files changed

+15
-16
lines changed

genie_registry/bed.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from genie.example_filetype_format import FileTypeFormat
1010
from genie import load, process_functions, validate
11-
from synapseclient.models import query
1211

1312
LOGGER = logging.getLogger(__name__)
1413

@@ -513,6 +512,7 @@ def _process(self, beddf, seq_assay_id, newpath, parentid, create_panel=True):
513512
"""
514513
seq_assay_id = seq_assay_id.upper()
515514
seq_assay_id = seq_assay_id.replace("_", "-")
515+
516516
# Add in 6th column which is the clinicalReported
517517
if len(beddf.columns) > 5:
518518
if all(beddf[5].apply(lambda x: x in [True, False])):
@@ -548,8 +548,9 @@ def _process(self, beddf, seq_assay_id, newpath, parentid, create_panel=True):
548548
# Change all start and end to int
549549
beddf["Start_Position"] = beddf["Start_Position"].apply(int)
550550
beddf["End_Position"] = beddf["End_Position"].apply(int)
551-
gene_position_table = query("SELECT * FROM syn11806563")
552-
gene_positiondf = gene_position_table.convert_dtypes()
551+
552+
gene_position_table = self.syn.tableQuery("SELECT * FROM syn11806563")
553+
gene_positiondf = gene_position_table.asDataFrame()
553554
beddf["ID"] = beddf["Hugo_Symbol"]
554555
# The apply function of a DataFrame is called twice on the first
555556
# row (known pandas behavior)
@@ -685,11 +686,12 @@ def _validate(self, beddf):
685686
total_error += error
686687

687688
if to_validate_symbol:
688-
gene_position_table = query("SELECT * FROM syn11806563")
689-
gene_positiondf = gene_position_table.convert_dtypes()
689+
gene_position_table = self.syn.tableQuery("SELECT * FROM syn11806563")
690+
gene_positiondf = gene_position_table.asDataFrame()
690691
# The apply function of a DataFrame is called twice on the first row (known
691692
# pandas behavior)
692693
beddf = beddf.apply(lambda x: remap_symbols(x, gene_positiondf), axis=1)
694+
693695
if any(beddf["Hugo_Symbol"].isnull()):
694696
warning += (
695697
"BED file: "

genie_registry/clinical.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from genie import extract, load, process_functions, validate
1414
from genie.database_to_staging import redact_phi
1515
from genie.example_filetype_format import FileTypeFormat
16-
from synapseclient.models import query
1716

1817
logger = logging.getLogger(__name__)
1918

@@ -474,10 +473,10 @@ def uploadMissingData(
474473
process_functions.SCRIPT_DIR, f"{self._fileType}_missing_{col}.csv"
475474
)
476475
# PLFM-7428 - there are limits on a "not in" function on Synapse tables
477-
center_samples = query(
476+
center_samples = self.syn.tableQuery(
478477
f"select {col} from {dbSynId} where " f"CENTER='{self.center}'"
479478
)
480-
center_samples_df = center_samples.convert_dtypes()
479+
center_samples_df = center_samples.asDataFrame()
481480
# Get all the samples that are in the database but missing from
482481
# the input file
483482
missing_df = center_samples_df[col][~center_samples_df[col].isin(df[col])]

genie_registry/cna.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from genie.example_filetype_format import FileTypeFormat
99
from genie import process_functions
10-
from synapseclient.models import query
1110

1211
logger = logging.getLogger(__name__)
1312

@@ -132,10 +131,10 @@ def _process(self, cnaDf):
132131
del cnaDf[cnaDf.columns[index][0]]
133132

134133
bedSynId = self.genie_config["bed"]
135-
bed = query(
134+
bed = self.syn.tableQuery(
136135
f"select Hugo_Symbol, ID from {bedSynId} where CENTER = '{self.center}'"
137136
)
138-
bedDf = bed.convert_dtypes()
137+
bedDf = bed.asDataFrame()
139138
cnaDf["Hugo_Symbol"] = cnaDf["Hugo_Symbol"].apply(
140139
lambda x: validateSymbol(x, bedDf)
141140
)
@@ -223,11 +222,11 @@ def _validate(self, cnvDF, nosymbol_check):
223222
cnvDF["HUGO_SYMBOL"] = keepSymbols
224223
if haveColumn and not nosymbol_check:
225224
bedSynId = self.genie_config["bed"]
226-
bed = query(
225+
bed = self.syn.tableQuery(
227226
f"select Hugo_Symbol, ID from {bedSynId} "
228227
f"where CENTER = '{self.center}'"
229228
)
230-
bedDf = bed.convert_dtypes()
229+
bedDf = bed.asDataFrame()
231230
cnvDF["remapped"] = cnvDF["HUGO_SYMBOL"].apply(
232231
lambda x: validateSymbol(x, bedDf)
233232
)

genie_registry/mutationsInCis.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from genie.example_filetype_format import FileTypeFormat
77
from genie import load, process_functions
8-
from synapseclient.models import query
98

109
logger = logging.getLogger(__name__)
1110

@@ -49,12 +48,12 @@ def process_steps(self, mutationInCis, newPath, databaseSynId):
4948
def _validate(self, mutationInCisDf):
5049
mutationInCisSynId = self.genie_config["mutationsInCis"]
5150
# Pull down the correct database
52-
existingMergeCheck = query(
51+
existingMergeCheck = self.syn.tableQuery(
5352
"select * from {} where Center = '{}'".format(
5453
mutationInCisSynId, self.center
5554
)
5655
)
57-
existingMergeCheckDf = existingMergeCheck.convert_dtypes()
56+
existingMergeCheckDf = existingMergeCheck.asDataFrame()
5857

5958
total_error = ""
6059
warning = ""

0 commit comments

Comments
 (0)