Skip to content

Commit 9222bf9

Browse files
authored
bug: filter clinvar missing conditions (#1083)
1 parent 619bfa1 commit 9222bf9

File tree

2 files changed

+77
-4
lines changed

2 files changed

+77
-4
lines changed

Diff for: v03_pipeline/lib/reference_datasets/clinvar.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,13 @@ def select_fields(ht):
154154
conflictingPathogenicities=parsed_and_mapped_clnsigconf(ht),
155155
goldStars=CLINVAR_GOLD_STARS_LOOKUP.get(hl.delimit(ht.info.CLNREVSTAT)),
156156
submitters=ht.submitters,
157-
# assumes the format 'MedGen#:condition', e.g.'C0023264:Leigh syndrome'
158-
conditions=hl.map(
159-
lambda p: p.split(r':')[1],
160-
ht.conditions,
157+
# assumes the format 'MedGen#:condition;MedGen#:condition', e.g.'C0023264:Leigh syndrome'
158+
conditions=hl.filter(
159+
hl.is_defined,
160+
hl.flatmap(
161+
lambda p: p.split(';'),
162+
ht.conditions,
163+
).map(lambda p: p.split(':')[1]),
161164
),
162165
)
163166

Diff for: v03_pipeline/lib/reference_datasets/clinvar_test.py

+70
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,73 @@ def test_get_ht(self):
169169
),
170170
],
171171
)
172+
173+
# VariationID 9 tests Conditions parsing
174+
self.assertListEqual(
175+
ht.collect()[8].submitters,
176+
[
177+
'Hemochromatosis type 1',
178+
'Hereditary cancer-predisposing syndrome',
179+
'HFE-related disorder',
180+
'Hemochromatosis type 1',
181+
'Hemochromatosis type 1',
182+
'Bronze diabetes',
183+
'Hemochromatosis type 1',
184+
'HFE-related disorder',
185+
'Hemochromatosis type 1',
186+
'Abdominal pain',
187+
'Atypical behavior',
188+
'Pain',
189+
'Peripheral neuropathy',
190+
'Abnormality of the nervous system',
191+
'Abnormality of the male genitalia',
192+
'Abnormal peripheral nervous system morphology',
193+
'Hereditary hemochromatosis',
194+
'Hemochromatosis type 1',
195+
'not provided',
196+
'Hereditary hemochromatosis',
197+
'not provided',
198+
'Hemochromatosis type 1',
199+
'Hemochromatosis type 1',
200+
'Hemochromatosis type 1',
201+
'Hemochromatosis type 1',
202+
'Hemochromatosis type 1',
203+
'Hemochromatosis type 1',
204+
'Hemochromatosis type 1',
205+
'not provided',
206+
'not provided',
207+
'Hemochromatosis type 1',
208+
'Hemochromatosis type 1',
209+
'Hereditary hemochromatosis',
210+
'Cardiomyopathy',
211+
'not provided',
212+
'Juvenile hemochromatosis',
213+
'Hemochromatosis type 1',
214+
'not provided',
215+
'not provided',
216+
'Inborn genetic diseases',
217+
'Hemochromatosis type 1',
218+
'not provided',
219+
'Hemochromatosis type 1',
220+
'Hemochromatosis type 1',
221+
'Hemochromatosis type 1',
222+
'Hemochromatosis type 1',
223+
'not provided',
224+
'Porphyrinuria',
225+
'Cutaneous photosensitivity',
226+
'Hemochromatosis type 1',
227+
'Hereditary hemochromatosis',
228+
'Hemochromatosis type 1',
229+
'Hemochromatosis type 1',
230+
'Hemochromatosis type 1',
231+
'not provided',
232+
'Hemochromatosis type 1',
233+
'Variegate porphyria',
234+
'Familial porphyria cutanea tarda',
235+
'Alzheimer disease type 1',
236+
'Microvascular complications of diabetes, susceptibility to, 7',
237+
'Transferrin serum level quantitative trait locus 2',
238+
'Hemochromatosis type 1',
239+
'Hemochromatosis type 1',
240+
],
241+
)

0 commit comments

Comments
 (0)