@@ -11,6 +11,8 @@ rule vep_somatic_research_sv:
1111 vcf_research_vep = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.research.vep.vcf.gz" ,
1212 benchmark :
1313 Path (benchmark_dir , "vep_somatic_research_sv." + config ["analysis" ]["case_id" ] + ".svdb.tsv" ).as_posix ()
14+ resources :
15+ mem_mb = lambda wc : (140000 if config_model .analysis .sequencing_type == SequencingType .WGS else 25000 )
1416 singularity :
1517 Path (singularity_image , config ["bioinfo_tools" ].get ("ensembl-vep" ) + ".sif" ).as_posix ()
1618 params :
@@ -68,66 +70,52 @@ tabix -p vcf -f {output.vcf_research};
6870 """
6971
7072
71- rule svdb_annotate_clinical_obs_somatic_clinical_sv :
73+ rule svdb_annotate_clinical_sv :
7274 input :
73- vcf_sv_research = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.research.filtered.pass .vcf.gz" ,
75+ vcf_research = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.research.vcf.gz" ,
7476 output :
75- vcf_sv_clinical_obs = temp ( vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical_obs .vcf.gz" ) ,
77+ vcf_sv_clinical = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical .vcf.gz" ,
7678 benchmark :
77- Path (benchmark_dir , 'svdb_annotate_clinical_obs_somatic_clinical_sv .' + config ["analysis" ]["case_id" ] + ".tsv" )
79+ Path (benchmark_dir , 'svdb_annotate_clinical_sv_ .' + config ["analysis" ]["case_id" ] + ".tsv" )
7880 singularity :
7981 Path (singularity_image , config ["bioinfo_tools" ].get ("svdb" ) + ".sif" ).as_posix ()
8082 params :
8183 case_name = config ["analysis" ]["case_id" ],
8284 clinical_sv_observations = clinical_sv ,
83- vcf_clinical_obs = temp (vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical_obs.vcf" ),
85+ somatic_sv_observations = somatic_sv ,
86+ artefact_sv_observations = artefact_sv_obs ,
87+ vcf_clinical_obs = temp (vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical_obs.vcf.gz" ),
88+ vcf_somatic_obs = temp (vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.somatic_obs.vcf.gz" ),
89+ vcf_intermediate = temp (vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.intermediate.vcf" ),
8490 message :
8591 "Annotating structural and copy number variants with clinical observations using SVDB for {params.case_name}" ,
8692 shell :
8793 """
8894if [[ -f "{params.clinical_sv_observations}" ]]; then
8995 svdb --query --bnd_distance 10000 --overlap 0.80 \
9096 --in_occ Obs --out_occ clin_obs --in_frq Frq --out_frq Frq \
91- --db {params.clinical_sv_observations} --query_vcf {input.vcf_sv_research } > {params.vcf_clinical_obs}
92- bgzip -l 9 -c {params.vcf_clinical_obs } > {output.vcf_sv_clinical_obs };
97+ --db {params.clinical_sv_observations} --query_vcf {input.vcf_research } > {params.vcf_intermediate} ;
98+ bgzip -l 9 -c {params.vcf_intermediate } > {params.vcf_clinical_obs };
9399else
94- cp {input.vcf_sv_research } {output.vcf_sv_clinical_obs };
100+ cp {input.vcf_research } {params.vcf_clinical_obs };
95101fi
96-
97- tabix -p vcf -f {output.vcf_sv_clinical_obs};
98-
99- rm {params.vcf_clinical_obs}
100- """
101-
102-
103- rule svdb_annotate_somatic_obs_somatic_clinical_sv :
104- input :
105- vcf_sv_clinical_obs = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical_obs.vcf.gz" ,
106- output :
107- vcf_sv_clinical = vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.clinical.vcf.gz" ,
108- benchmark :
109- Path (benchmark_dir , 'svdb_annotate_somatic_obs_somatic_clinical_sv.' + config ["analysis" ]["case_id" ] + ".tsv" )
110- singularity :
111- Path (singularity_image , config ["bioinfo_tools" ].get ("svdb" ) + ".sif" ).as_posix ()
112- params :
113- case_name = config ["analysis" ]["case_id" ],
114- somatic_sv_observations = somatic_sv ,
115- vcf_somatic_obs = temp (vep_dir + "SV.somatic." + config ["analysis" ]["case_id" ] + ".svdb.somatic_obs.vcf" ),
116- message :
117- "Annotating structural and copy number variants with clinical observations using SVDB for {params.case_name}" ,
118- shell :
119- """
120102if [[ -f "{params.somatic_sv_observations}" ]]; then
121103 svdb --query --bnd_distance 10000 --overlap 0.80 \
122104 --in_occ Obs --out_occ Cancer_Somatic_Obs --in_frq Frq --out_frq Cancer_Somatic_Frq \
123- --db {params.somatic_sv_observations} --query_vcf {input.vcf_sv_clinical_obs} > {params.vcf_somatic_obs}
124- bgzip -l 9 -c {params.vcf_somatic_obs} > {output.vcf_sv_clinical};
125- rm {params.vcf_somatic_obs};
105+ --db {params.somatic_sv_observations} --query_vcf {params.vcf_clinical_obs} > {params.vcf_intermediate} ;
106+ bgzip -l 9 -c {params.vcf_intermediate} > {params.vcf_somatic_obs};
126107else
127- cp {input.vcf_sv_clinical_obs} {output.vcf_sv_clinical};
108+ cp {params.vcf_clinical_obs} {params.vcf_somatic_obs};
109+ fi
110+ if [[ -f "{params.artefact_sv_observations}" ]]; then
111+ svdb --query --bnd_distance 500 --overlap 0.80 \
112+ --in_occ Obs --out_occ ArtefactObs --in_frq Frq --out_frq ArtefactFrq \
113+ --db {params.artefact_sv_observations} --query_vcf {params.vcf_somatic_obs} > {params.vcf_intermediate} ;
114+ bgzip -l 9 -c {params.vcf_intermediate} > {output.vcf_sv_clinical};
115+ else
116+ cp {params.vcf_somatic_obs} {output.vcf_sv_clinical};
128117fi
129118
130119tabix -p vcf -f {output.vcf_sv_clinical};
120+ """
131121
132- rm {input.vcf_sv_clinical_obs};
133- """
0 commit comments