Skip to content

Commit e0e3b77

Browse files
committed
refactor: improve code sharing for clickhouse migration tasks
1 parent adc1291 commit e0e3b77

File tree

3 files changed

+33
-25
lines changed

3 files changed

+33
-25
lines changed

v03_pipeline/lib/misc/family_entries.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,17 @@ def globalize_ids(ht: hl.Table) -> hl.Table:
100100
)
101101

102102

103-
def deglobalize_ids(ht: hl.Table) -> hl.Table:
103+
def deglobalize_ids(ht: hl.Table, sample_id_field: str = 's') -> hl.Table:
104104
ht = ht.annotate(
105105
family_entries=(
106106
hl.enumerate(ht.family_entries).starmap(
107107
lambda i, fe: hl.enumerate(fe).starmap(
108108
lambda j, e: hl.Struct(
109109
**e,
110-
s=ht.family_samples[ht.family_guids[i]][j],
111-
family_guid=ht.family_guids[i],
110+
**{
111+
sample_id_field: ht.family_samples[ht.family_guids[i]][j],
112+
'family_guid': ht.family_guids[i],
113+
},
112114
),
113115
),
114116
)

v03_pipeline/lib/model/dataset_type.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ def entries_fields(
101101
},
102102
}[self]
103103

104+
@property
105+
def entries_export_fields(
106+
self,
107+
) -> Callable[hl.StructExpression, hl.StructExpression]:
108+
return {
109+
DatasetType.SNV_INDEL: lambda fe: hl.Struct(
110+
sampleId=fe.sampleId,
111+
gt=hl.case()
112+
.when(fe.GT.is_hom_ref(), 0)
113+
.when(fe.GT.is_het(), 1)
114+
.when(fe.GT.is_hom_var(), 2)
115+
.default(hl.missing(hl.tint32)),
116+
gq=fe.GQ,
117+
ab=fe.AB,
118+
dp=fe.DP,
119+
),
120+
}[self]
121+
104122
@property
105123
def row_fields(
106124
self,

v03_pipeline/lib/tasks/exports/write_new_entries_parquet.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from v03_pipeline.lib.annotations.fields import get_fields
77
from v03_pipeline.lib.misc.family_entries import (
88
compute_callset_family_entries_ht,
9+
deglobalize_ids,
910
)
1011
from v03_pipeline.lib.paths import (
1112
new_entries_parquet_path,
@@ -90,16 +91,7 @@ def create_table(self) -> None:
9091
**self.param_kwargs,
9192
),
9293
)
93-
ht = ht.annotate(
94-
family_entries=hl.enumerate(ht.family_entries).starmap(
95-
lambda i, fs: hl.enumerate(fs).starmap(
96-
lambda j, e: e.annotate(
97-
family_guid=ht.family_guids[i], # noqa: B023
98-
sampleId=ht.family_samples[ht.family_guids[i]][j], # noqa: B023
99-
),
100-
),
101-
),
102-
)
94+
ht = deglobalize_ids(ht)
10395
annotations_ht = hl.read_table(
10496
self.input()[ANNOTATIONS_TABLE_TASK].path,
10597
)
@@ -124,20 +116,16 @@ def create_table(self) -> None:
124116
family_guid=ht.family_entries.family_guid[0],
125117
sample_type=self.sample_type.value,
126118
xpos=get_expr_for_xpos(ht.locus),
127-
is_gnomad_gt_5_percent=hl.is_defined(ht.is_gt_5_percent),
119+
**(
120+
{
121+
'is_gnomad_gt_5_percent': hl.is_defined(ht.is_gt_5_percent),
122+
}
123+
if hasattr(ht, 'is_gnomad_gt_5_percent')
124+
else {}
125+
),
128126
filters=ht.filters,
129127
calls=ht.family_entries.map(
130-
lambda fe: hl.Struct(
131-
sampleId=fe.sampleId,
132-
gt=hl.case()
133-
.when(fe.GT.is_hom_ref(), 0)
134-
.when(fe.GT.is_het(), 1)
135-
.when(fe.GT.is_hom_var(), 2)
136-
.default(hl.missing(hl.tint32)),
137-
gq=fe.GQ,
138-
ab=fe.AB,
139-
dp=fe.DP,
140-
),
128+
lambda fe: self.dataset_type.entries_export_fields(fe),
141129
),
142130
sign=1,
143131
)

0 commit comments

Comments
 (0)