|
8 | 8 | from unittest.mock import patch |
9 | 9 |
|
10 | 10 | import pandas as pd |
11 | | - |
12 | | -from genie.process_functions import seqDateFilter |
13 | 11 | from genie import database_to_staging |
14 | | -from genie.database_to_staging import ( |
15 | | - seq_assay_id_filter, |
16 | | - redact_phi, |
17 | | - no_genepanel_filter, |
18 | | - _to_redact_interval, |
19 | | - _redact_year, |
20 | | - _to_redact_difference, |
21 | | -) |
22 | 12 | from genie.consortium_to_public import commonVariantFilter |
| 13 | +from genie.database_to_staging import no_genepanel_filter, seq_assay_id_filter |
| 14 | +from genie.process_functions import seqDateFilter |
23 | 15 |
|
24 | 16 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
25 | 17 | sys.path.append(os.path.join(SCRIPT_DIR, "../../genie")) |
@@ -126,60 +118,6 @@ def test_seqdatefilter(): |
126 | 118 | assert all(samples == expected) |
127 | 119 |
|
128 | 120 |
|
129 | | -def test__to_redact_interval(): |
130 | | - """Tests the correct boolean vectors are returned for phi and pediatric |
131 | | - redaction""" |
132 | | - values = pd.Series([32850, 32485, 6570, 6569, "<foo", ">testing"]) |
133 | | - to_redact, to_redact_peds = _to_redact_interval(values) |
134 | | - expected_redact = [True, False, False, False, False, True] |
135 | | - expected_redact_peds = [False, False, False, True, True, False] |
136 | | - assert to_redact.to_list() == expected_redact |
137 | | - assert to_redact_peds.to_list() == expected_redact_peds |
138 | | - |
139 | | - |
140 | | -def test__redact_year(): |
141 | | - """Tests redaction of birth year based on < and >""" |
142 | | - values = pd.Series([1923, 2003, "<foo", ">testing"]) |
143 | | - redacted = _redact_year(values) |
144 | | - expected_redact = [1923, 2003, "withheld", "cannotReleaseHIPAA"] |
145 | | - assert redacted.to_list() == expected_redact |
146 | | - |
147 | | - |
148 | | -def test___to_redact_difference(): |
149 | | - """Tests if a difference between two year columns is >89, redact""" |
150 | | - year1 = pd.Series([1923, 2000, float("nan")]) |
151 | | - year2 = pd.Series([1926, 2100, 2000]) |
152 | | - redacted = _to_redact_difference(year1, year2) |
153 | | - expected_redact = [False, True, False] |
154 | | - assert redacted.to_list() == expected_redact |
155 | | - |
156 | | - |
157 | | -def test_redact_phi(): |
158 | | - """Redacts PHI interval and years""" |
159 | | - return_bools = ([True, False, False], [False, False, True]) |
160 | | - clinicaldf = pd.DataFrame(["SAGE-TEST-1", "SAGE-TEST-2", "SAGE-TEST-3"]) |
161 | | - clinicaldf.rename(columns={0: "SAMPLE_ID"}, inplace=True) |
162 | | - clinicaldf["AGE_AT_SEQ_REPORT"] = [32850, 32485, 6570] |
163 | | - clinicaldf["BIRTH_YEAR"] = [1900, "<1900", 1902] |
164 | | - # These are the years that are returned by the _redact_year |
165 | | - # Use these against YEAR_CONTACT and YEAR_DEATH to calculate |
166 | | - # expected_birth |
167 | | - return_year = pd.Series([2000, 1903, 1904]) |
168 | | - clinicaldf["YEAR_CONTACT"] = [2100, 1904, float("nan")] |
169 | | - clinicaldf["YEAR_DEATH"] = [2001, float("nan"), 2000] |
170 | | - |
171 | | - expected_age = pd.Series([">32485", 32485, "<6570"]) |
172 | | - expected_birth = pd.Series(["cannotReleaseHIPAA", 1903, "cannotReleaseHIPAA"]) |
173 | | - with patch.object( |
174 | | - database_to_staging, "_to_redact_interval", return_value=return_bools |
175 | | - ), patch.object(database_to_staging, "_redact_year", return_value=return_year): |
176 | | - redacted_clin = redact_phi( |
177 | | - clinicaldf, interval_cols_to_redact=["AGE_AT_SEQ_REPORT"] |
178 | | - ) |
179 | | - assert all(redacted_clin["AGE_AT_SEQ_REPORT"] == expected_age) |
180 | | - assert all(redacted_clin["BIRTH_YEAR"] == expected_birth) |
181 | | - |
182 | | - |
183 | 121 | # def test_MAFinBED(): |
184 | 122 | # syn = mock.create_autospec(synapseclient.Synapse) |
185 | 123 | # # MAF in BED filter (Make sure that the only Hugo symbol that |
|
0 commit comments