Skip to content

Commit e8dfa6a

Browse files
committed
Support non-numeric fields in input data.
1 parent 6af3a4b commit e8dfa6a

5 files changed

Lines changed: 20 additions & 13 deletions

File tree

nonlinear-covariate-gwas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727
"""DeepNull."""
2828

29-
__version__ = '0.1.2'
29+
__version__ = '0.1.3'

nonlinear-covariate-gwas/data.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,17 @@ def write_plink_or_bolt_file(input_df: pd.DataFrame,
169169
for column in df.columns:
170170
values = df[column]
171171
mask = ~values.isnull()
172-
if (values[mask] == values[mask].astype(int)).all():
173-
# All non-null values are integers. Convert to the 'Int64' type that
174-
# allows nullable integers. This requires nulls to use the pd.NA value
175-
# rather than np.nan.
176-
df[column] = values.fillna(pd.NA).astype('Int64')
172+
try:
173+
int_values = values[mask].astype(int)
174+
except ValueError:
175+
# This is a non-numeric field, leave it as-is.
176+
continue
177+
else:
178+
if (values[mask] == int_values).all():
179+
# All non-null values are integers. Convert to the 'Int64' type that
180+
# allows nullable integers. This requires nulls to use the pd.NA value
181+
# rather than np.nan.
182+
df[column] = values.fillna(pd.NA).astype('Int64')
177183

178184
return df.to_csv(
179185
path_or_buf, sep='\t', index=False, na_rep=str(missing_value))

nonlinear-covariate-gwas/data_test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def test_load_invalid_plink_or_bolt_file(self, str_contents, msg):
164164
def test_write_plink_or_bolt_file(self):
165165
df = pd.DataFrame(
166166
{
167-
'FID': [1, 2, 3, 4, 5],
167+
'FID': ['fam1', 'fam1', 'fam1', 'fam2', 'fam3'],
168168
'IID': [1, 2, 3, 4, 5],
169169
'age': [45, 50, 55, 60, 65],
170170
'sex': [0, 0, 1, 1, 0],
@@ -175,11 +175,11 @@ def test_write_plink_or_bolt_file(self):
175175
binary_column_map = {'sex': {0: 1, 1: 2}, 'binary_miss': {0.: 1., 1.: 2.}}
176176

177177
expected = ('FID\tIID\tage\tsex\tbinary_miss\tcont_miss\n'
178-
'1\t1\t45\t1\t1\t0.5\n'
179-
'2\t2\t50\t1\t2\t1.5\n'
180-
'3\t3\t55\t2\t2\t2.5\n'
181-
'4\t4\t60\t2\tNA\t3.5\n'
182-
'5\t5\t65\t1\t1\tNA\n')
178+
'fam1\t1\t45\t1\t1\t0.5\n'
179+
'fam1\t2\t50\t1\t2\t1.5\n'
180+
'fam1\t3\t55\t2\t2\t2.5\n'
181+
'fam2\t4\t60\t2\tNA\t3.5\n'
182+
'fam3\t5\t65\t1\t1\tNA\n')
183183

184184
actual = data.write_plink_or_bolt_file(
185185
df,

nonlinear-covariate-gwas/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#
1818
# $ pip install deepnull
1919
name='deepnull',
20-
version='0.1.2', # Keep in sync with __init__.__version__.
20+
version='0.1.3', # Keep in sync with __init__.__version__.
2121
description='Models nonlinear interactions between covariates and phenotypes',
2222
long_description=long_description,
2323
long_description_content_type='text/markdown',

nonlinear-covariate-gwas/train_eval_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ def test_create_deepnull_prediction(self, target_is_binary):
194194
design_df = _create_df(target_is_binary=target_is_binary, size=size)
195195
design_df['FID'] = np.arange(size)
196196
design_df['IID'] = np.arange(size)
197+
design_df['unused_str_column'] = np.random.choice(list('abcdefg'), size)
197198

198199
input_df = design_df.copy(deep=True)
199200
with tempfile.TemporaryDirectory() as tmpdir:

0 commit comments

Comments
 (0)