106106# TODO: Add to transform.py
107107def _to_redact_interval (df_col ):
108108 """
109- Determines interval values that are <18 and >89 that need to be redacted
109+ Determines year values that are " <18" and interval values >89 that need to be redacted
110110 Returns bool because BIRTH_YEAR needs to be redacted as well based
111111 on the results
112112
@@ -119,15 +119,14 @@ def _to_redact_interval(df_col):
119119
120120 """
121121 phi_cutoff = 365 * 89
122- pediatric_cutoff = 365 * 18
123122 # Some centers pre-redact their values by adding < or >. These
124123 # must be redacted
125124 contain_greaterthan = df_col .astype (str ).str .contains (">" , na = False )
126125 contain_lessthan = df_col .astype (str ).str .contains ("<" , na = False )
127126 # Add in errors='coerce' to turn strings into NaN
128127 col_int = pd .to_numeric (df_col , errors = "coerce" )
129128 to_redact = (col_int > phi_cutoff ) | contain_greaterthan
130- to_redact_pediatric = ( col_int < pediatric_cutoff ) | contain_lessthan
129+ to_redact_pediatric = contain_lessthan
131130 return to_redact , to_redact_pediatric
132131
133132
@@ -193,8 +192,7 @@ def redact_phi(
193192 to_redact , to_redactpeds = _to_redact_interval (clinicaldf [col ])
194193 clinicaldf .loc [to_redact , "BIRTH_YEAR" ] = "cannotReleaseHIPAA"
195194 clinicaldf .loc [to_redact , col ] = ">32485"
196- clinicaldf .loc [to_redactpeds , "BIRTH_YEAR" ] = "withheld"
197- clinicaldf .loc [to_redactpeds , col ] = "<6570"
195+ clinicaldf .loc [to_redactpeds , col ] = "withheld"
198196 # Redact BIRTH_YEAR values that have < or >
199197 # Birth year has to be done separately because it is not an interval
200198 clinicaldf ["BIRTH_YEAR" ] = _redact_year (clinicaldf ["BIRTH_YEAR" ])
0 commit comments