Skip to content

Commit 138e047

Browse files
committed
add post qual gnomad filtration
1 parent 444aca2 commit 138e047

File tree

3 files changed

+50
-15
lines changed

3 files changed

+50
-15
lines changed

BALSAMIC/constants/variant_filters.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class BaseSNVFilters:
159159
@classmethod
160160
def filter_criteria(
161161
cls,
162-
category: Literal["clinical", "research", "quality"],
162+
category: Literal["clinical", "research", "post_quality", "quality"],
163163
analysis_type: Optional[Enum] = None,
164164
variant_caller: Optional[Enum] = None,
165165
exclude_variantcaller_filters: Optional[bool] = False,
@@ -169,7 +169,7 @@ def filter_criteria(
169169
Shared filtering logic to get filters based on criteria.
170170
171171
Args:
172-
category (Literal["clinical", "research", "quality"]): The filter category to use.
172+
category (Literal["clinical", "research", "post_quality", "quality"]): The filter category to use.
173173
analysis_type (Optional[Enum]): Filter based on analysis type (default: None).
174174
variant_caller (Optional[Enum]): Filter based on variant caller (default: None).
175175
exclude_variantcaller_filters (Optional[bool]): If True, excludes the variantcaller filters.
@@ -208,7 +208,7 @@ def filter_matches(f: VCFFilter) -> bool:
208208
@classmethod
209209
def get_bcftools_filter_string(
210210
cls,
211-
category: Literal["clinical", "research", "quality"],
211+
category: Literal["clinical", "research", "post_quality", "quality"],
212212
analysis_type: Optional[Enum] = None,
213213
variant_caller: Optional[Enum] = None,
214214
soft_filter_normals: Optional[bool] = None,
@@ -218,7 +218,7 @@ def get_bcftools_filter_string(
218218
Get a set of filter names based on various attributes.
219219
220220
Args:
221-
category (Literal["clinical", "research", "quality"]): The filter category to use.
221+
category (Literal["clinical", "research", "post_quality", "quality"]): The filter category to use.
222222
analysis_type (Optional[Enum]): Filter based on analysis type (default: None).
223223
variant_caller (Optional[Enum]): Filter based on variant caller (default: None).
224224
soft_filter_normals (Optional[bool]): If True, removes filters in MATCHED_NORMAL_FILTER_NAMES.
@@ -256,7 +256,7 @@ def get_bcftools_filter_string(
256256
@classmethod
257257
def get_filters(
258258
cls,
259-
category: Literal["clinical", "research", "quality"],
259+
category: Literal["clinical", "research", "post_quality", "quality"],
260260
analysis_type: Optional[Enum] = None,
261261
variant_caller: Optional[Enum] = None,
262262
exclude_variantcaller_filters: Optional[bool] = True,
@@ -266,7 +266,7 @@ def get_filters(
266266
Get a list of filters matching the specified attributes.
267267
268268
Args:
269-
category (Literal["clinical", "research", "quality"]): The filter category to use.
269+
category (Literal["clinical", "research",, "post_quality", "quality"]): The filter category to use.
270270
analysis_type (Optional[Enum]): Filter based on analysis type (default: None).
271271
variant_caller (Optional[Enum]): Filter based on variant caller (default: None).
272272
exclude_variantcaller_filters (Optional[bool]): If True, excludes the variantcaller filters.
@@ -285,12 +285,15 @@ def get_filters(
285285

286286

287287
class WgsSNVFilters(BaseSNVFilters):
288+
post_quality = [
289+
VCFFilter(tag_value=0.01, filter_name="gnomad_hard_threshold", field="INFO"),
290+
]
288291
research = [
289292
VCFFilter(tag_value=0.01, filter_name="SWEGENAF", field="INFO"),
290293
VCFFilter(tag_value=0.001, filter_name="balsamic_high_pop_freq", field="INFO"),
291294
]
292295
clinical = research + [
293-
VCFFilter(tag_value=0.01, filter_name="Frq", field="INFO"),
296+
VCFFilter(tag_value=0.005, filter_name="Frq", field="INFO"),
294297
VCFFilter(tag_value=0.1, filter_name="ArtefactFrq", field="INFO"),
295298
]
296299
quality = [
@@ -331,15 +334,18 @@ class WgsSNVFilters(BaseSNVFilters):
331334

332335

333336
class TgaSNVFilters(BaseSNVFilters):
337+
post_quality = [
338+
VCFFilter(tag_value=0.01, filter_name="gnomad_hard_threshold", field="INFO"),
339+
]
334340
research = [
335341
VCFFilter(
336342
filter_name="MERGED", Description="SNV Merged with neighboring variants"
337343
),
338344
VCFFilter(tag_value=0.01, filter_name="SWEGENAF", field="INFO"),
339-
VCFFilter(tag_value=0.005, filter_name="balsamic_high_pop_freq", field="INFO"),
345+
VCFFilter(tag_value=0.001, filter_name="balsamic_high_pop_freq", field="INFO"),
340346
]
341347
clinical = research + [
342-
VCFFilter(tag_value=0.01, filter_name="Frq", field="INFO"),
348+
VCFFilter(tag_value=0.005, filter_name="Frq", field="INFO"),
343349
VCFFilter(tag_value=0.1, filter_name="ArtefactFrq", field="INFO"),
344350
VCFFilter(tag_value=0.3, filter_name="HighOccurrenceFrq", field="INFO"),
345351
]
@@ -403,10 +409,10 @@ class TgaSNVFilters(BaseSNVFilters):
403409
class TgaUmiSNVFilters(BaseSNVFilters):
404410
research = [
405411
VCFFilter(tag_value=0.01, filter_name="SWEGENAF", field="INFO"),
406-
VCFFilter(tag_value=0.02, filter_name="balsamic_high_pop_freq", field="INFO"),
412+
VCFFilter(tag_value=0.001, filter_name="balsamic_high_pop_freq", field="INFO"),
407413
]
408414
clinical = research + [
409-
VCFFilter(tag_value=0.01, filter_name="Frq", field="INFO"),
415+
VCFFilter(tag_value=0.005, filter_name="Frq", field="INFO"),
410416
VCFFilter(tag_value=0.1, filter_name="ArtefactFrq", field="INFO"),
411417
]
412418
quality = [

BALSAMIC/snakemake_rules/annotation/somatic_snv_annotation.rule

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,36 @@
22
# coding: utf-8
33
# Somatic SNV annotation module.
44

5+
rule snv_gnomad_annotation_and_filtration:
6+
input:
7+
vcf_snv_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz",
8+
output:
9+
research_gnomadfiltered = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research_gnomadfiltered.vcf.gz",
10+
benchmark:
11+
Path(benchmark_dir, "snv_gnomad_annotation_and_filtration.{case_name}.{var_caller}.tsv").as_posix()
12+
singularity:
13+
Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix()
14+
params:
15+
pop_freq = get_tag_and_filtername(snv_post_quality_filters, "gnomad_hard_threshold"),
16+
gnomad_annotation = dump_toml(gnomad_annotation),
17+
tmpdir = tempfile.mkdtemp(prefix=tmp_dir)
18+
message:
19+
"Annotating and filtering based on gnomad frequencies on SNVs and INDELs"
20+
shell:
21+
"""
22+
echo \'{params.gnomad_annotation}\' > {output.vcfanno_gnomad_annotation_toml};
23+
24+
vcfanno -p {resources.threads} {output.vcfanno_gnomad_annotation_toml} {input.vcf_snv_research} \
25+
| bcftools view --threads {resources.threads} -O z -o {params.tmpdir}/SNV.somatic.research.gnomad_annotated.vcf.gz ;
26+
27+
bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' -O -z -o {output.research_gnomadfiltered} {params.tmpdir}/SNV.somatic.research.gnomad_annotated.vcf.gz
28+
29+
tabix -p vcf -f {output.research_gnomadfiltered};
30+
"""
31+
532
rule bcftools_get_somaticINDEL_research:
633
input:
7-
vcf_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz",
34+
vcf_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research_gnomadfiltered.vcf.gz",
835
output:
936
vcf_indel_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.indel.research.vcf.gz",
1037
benchmark:
@@ -43,7 +70,7 @@ CADD.sh -g GRCh37 -o {output.cadd_indel_research} {input.vcf_indel_research}
4370

4471
rule bcftools_annotate_somaticINDEL_research:
4572
input:
46-
vcf_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz",
73+
vcf_research = vcf_dir + "SNV.somatic.{case_name}.{var_caller}.research_gnomadfiltered.vcf.gz",
4774
cadd_indel_research = vep_dir + "SNV.somatic.{case_name}.{var_caller}.cadd_indel.research.tsv.gz",
4875
output:
4976
vcf_indel_research = vep_dir + "SNV.somatic.{case_name}.{var_caller}.cadd_indel.research.vcf.gz",

BALSAMIC/workflows/balsamic.smk

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,12 @@ else:
136136
if sequencing_type == "targeted":
137137
exome = config_model.panel.exome
138138
snv_quality_filters = SNV_FILTERS.get_filters(category="quality", analysis_type=analysis_type, exome=exome)
139+
snv_post_quality_filters = SNV_FILTERS.get_filters(category="post_quality", analysis_type=analysis_type, exome=exome)
139140
snv_research_filters = SNV_FILTERS.get_filters(category="research", analysis_type=analysis_type, exome=exome)
140141
snv_clinical_filters = SNV_FILTERS.get_filters(category="clinical", analysis_type=analysis_type, exome=exome)
141142
else:
142143
snv_quality_filters = SNV_FILTERS.get_filters(category="quality", analysis_type=analysis_type)
144+
snv_post_quality_filters = SNV_FILTERS.get_filters(category="post_quality",analysis_type=analysis_type)
143145
snv_research_filters = SNV_FILTERS.get_filters(category="research", analysis_type=analysis_type)
144146
snv_clinical_filters = SNV_FILTERS.get_filters(category="clinical", analysis_type=analysis_type)
145147

@@ -165,7 +167,7 @@ if config_model.custom_filters and config_model.custom_filters.umi_min_reads:
165167
params.umiconsensuscall.filter_minreads = config_model.custom_filters.umi_min_reads
166168

167169
# vcfanno annotations
168-
research_annotations.append(
170+
gnomad_annotation = [
169171
{
170172
"annotation": [
171173
{
@@ -176,7 +178,7 @@ research_annotations.append(
176178
}
177179
]
178180
}
179-
)
181+
]
180182

181183
research_annotations.append(
182184
{

0 commit comments

Comments
 (0)