Skip to content

Commit 29970c0

Browse files
committed
use unit test in test_database_to_staging instead
1 parent b54cfbe commit 29970c0

File tree

1 file changed

+2
-64
lines changed

1 file changed

+2
-64
lines changed

tests/test_filters.py

Lines changed: 2 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,10 @@
88
from unittest.mock import patch
99

1010
import pandas as pd
11-
12-
from genie.process_functions import seqDateFilter
1311
from genie import database_to_staging
14-
from genie.database_to_staging import (
15-
seq_assay_id_filter,
16-
redact_phi,
17-
no_genepanel_filter,
18-
_to_redact_interval,
19-
_redact_year,
20-
_to_redact_difference,
21-
)
2212
from genie.consortium_to_public import commonVariantFilter
13+
from genie.database_to_staging import no_genepanel_filter, seq_assay_id_filter
14+
from genie.process_functions import seqDateFilter
2315

2416
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
2517
sys.path.append(os.path.join(SCRIPT_DIR, "../../genie"))
@@ -126,60 +118,6 @@ def test_seqdatefilter():
126118
assert all(samples == expected)
127119

128120

129-
def test__to_redact_interval():
130-
"""Tests the correct boolean vectors are returned for phi and pediatric
131-
redaction"""
132-
values = pd.Series([32850, 32485, 6570, 6569, "<foo", ">testing"])
133-
to_redact, to_redact_peds = _to_redact_interval(values)
134-
expected_redact = [True, False, False, False, False, True]
135-
expected_redact_peds = [False, False, False, True, True, False]
136-
assert to_redact.to_list() == expected_redact
137-
assert to_redact_peds.to_list() == expected_redact_peds
138-
139-
140-
def test__redact_year():
141-
"""Tests redaction of birth year based on < and >"""
142-
values = pd.Series([1923, 2003, "<foo", ">testing"])
143-
redacted = _redact_year(values)
144-
expected_redact = [1923, 2003, "withheld", "cannotReleaseHIPAA"]
145-
assert redacted.to_list() == expected_redact
146-
147-
148-
def test___to_redact_difference():
149-
"""Tests if a difference between two year columns is >89, redact"""
150-
year1 = pd.Series([1923, 2000, float("nan")])
151-
year2 = pd.Series([1926, 2100, 2000])
152-
redacted = _to_redact_difference(year1, year2)
153-
expected_redact = [False, True, False]
154-
assert redacted.to_list() == expected_redact
155-
156-
157-
def test_redact_phi():
158-
"""Redacts PHI interval and years"""
159-
return_bools = ([True, False, False], [False, False, True])
160-
clinicaldf = pd.DataFrame(["SAGE-TEST-1", "SAGE-TEST-2", "SAGE-TEST-3"])
161-
clinicaldf.rename(columns={0: "SAMPLE_ID"}, inplace=True)
162-
clinicaldf["AGE_AT_SEQ_REPORT"] = [32850, 32485, 6570]
163-
clinicaldf["BIRTH_YEAR"] = [1900, "<1900", 1902]
164-
# These are the years that are returned by the _redact_year
165-
# Use these against YEAR_CONTACT and YEAR_DEATH to calculate
166-
# expected_birth
167-
return_year = pd.Series([2000, 1903, 1904])
168-
clinicaldf["YEAR_CONTACT"] = [2100, 1904, float("nan")]
169-
clinicaldf["YEAR_DEATH"] = [2001, float("nan"), 2000]
170-
171-
expected_age = pd.Series([">32485", 32485, "<6570"])
172-
expected_birth = pd.Series(["cannotReleaseHIPAA", 1903, "cannotReleaseHIPAA"])
173-
with patch.object(
174-
database_to_staging, "_to_redact_interval", return_value=return_bools
175-
), patch.object(database_to_staging, "_redact_year", return_value=return_year):
176-
redacted_clin = redact_phi(
177-
clinicaldf, interval_cols_to_redact=["AGE_AT_SEQ_REPORT"]
178-
)
179-
assert all(redacted_clin["AGE_AT_SEQ_REPORT"] == expected_age)
180-
assert all(redacted_clin["BIRTH_YEAR"] == expected_birth)
181-
182-
183121
# def test_MAFinBED():
184122
# syn = mock.create_autospec(synapseclient.Synapse)
185123
# # MAF in BED filter (Make sure that the only Hugo symbol that

0 commit comments

Comments
 (0)