Skip to content

Commit b7772ef

Browse files
authored
[GEN-1892] unmask pediatric data (#604)
* unmask ped data * add test suites for redaction function
1 parent 82816bf commit b7772ef

File tree

3 files changed

+611
-69
lines changed

3 files changed

+611
-69
lines changed

genie/database_to_staging.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
# TODO: Add to transform.py
107107
def _to_redact_interval(df_col):
108108
"""
109-
Determines interval values that are <18 and >89 that need to be redacted
109+
Determines year values that are "<18" and interval values >89 that need to be redacted
110110
Returns bool because BIRTH_YEAR needs to be redacted as well based
111111
on the results
112112
@@ -119,15 +119,14 @@ def _to_redact_interval(df_col):
119119
120120
"""
121121
phi_cutoff = 365 * 89
122-
pediatric_cutoff = 365 * 18
123122
# Some centers pre-redact their values by adding < or >. These
124123
# must be redacted
125124
contain_greaterthan = df_col.astype(str).str.contains(">", na=False)
126125
contain_lessthan = df_col.astype(str).str.contains("<", na=False)
127126
# Add in errors='coerce' to turn strings into NaN
128127
col_int = pd.to_numeric(df_col, errors="coerce")
129128
to_redact = (col_int > phi_cutoff) | contain_greaterthan
130-
to_redact_pediatric = (col_int < pediatric_cutoff) | contain_lessthan
129+
to_redact_pediatric = contain_lessthan
131130
return to_redact, to_redact_pediatric
132131

133132

@@ -193,8 +192,7 @@ def redact_phi(
193192
to_redact, to_redactpeds = _to_redact_interval(clinicaldf[col])
194193
clinicaldf.loc[to_redact, "BIRTH_YEAR"] = "cannotReleaseHIPAA"
195194
clinicaldf.loc[to_redact, col] = ">32485"
196-
clinicaldf.loc[to_redactpeds, "BIRTH_YEAR"] = "withheld"
197-
clinicaldf.loc[to_redactpeds, col] = "<6570"
195+
clinicaldf.loc[to_redactpeds, col] = "withheld"
198196
# Redact BIRTH_YEAR values that have < or >
199197
# Birth year has to be done separately because it is not an interval
200198
clinicaldf["BIRTH_YEAR"] = _redact_year(clinicaldf["BIRTH_YEAR"])

0 commit comments

Comments
 (0)