Skip to content

Commit 4136d3c

Browse files
author
Thomas Clarke
committed
Merge tag '3.5.7' into develop
Update to discarded variant script to allow sorting of vcf discarded variant file
2 parents de983c4 + bfcea15 commit 4136d3c

File tree

3 files changed

+29
-8
lines changed

3 files changed

+29
-8
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# CHANGES
22

3+
## 3.5.7
4+
* Update to discarded variant script to allow sorting of vcf discarded variant file
5+
36
## 3.5.6
47
* Adding parameters to "bcftools mpileup" call to resolve issue for samples using multiple wells
58
* Correcting code version

perl/post_process_discarded_variants.pl

100644100755
Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,22 @@
3838
# fa8, Nov 2023
3939
# This script reads the discardedvariants.csv file and creates a summary VCF
4040
# Output is written to standard output
41-
# E.g. perl post_process_discarded_variants.pl tmpNanoSeq/post/discardedvariants.csv > tmpNanoSeq/post/discardedvariants.vcf
41+
# E.g. perl post_process_discarded_variants.pl tmpNanoSeq/post/discardedvariants.csv reference_genome.fa > tmpNanoSeq/post/discardedvariants.vcf
4242

43+
44+
45+
my $input_file = $ARGV[0];
46+
my $ref_genome = $ARGV[1];
47+
if(!defined($ref_genome) || $ref_genome eq "") {
48+
die "Reference genome fasta file needs to be provided. Exiting...\n".
49+
"Usage: perl post_process_discarded_variants.pl discardedvariants.csv reference_genome.fa > discardedvariants.vcf\n";
50+
}
51+
52+
# Prepare header:
4353
my $header =
4454
"##fileformat=VCFv4.2
4555
##source=NanoSeq pipeline (discarded variants from post_process_discarded_variants.pl)
56+
##reference=file://$ref_genome
4657
##INFO=<ID=PYR_SUB,Number=1,Type=String,Description=\"Pyrimidine-based trinucleotide substitution\">
4758
##INFO=<ID=N,Number=1,Type=Integer,Description=\"Number of times this variant has been discarded\">
4859
##INFO=<ID=dplx_clip_filter,Number=1,Type=Integer,Description=\"Number of times this variant has failed the dplx_clip_filter\">
@@ -55,20 +66,27 @@
5566
##INFO=<ID=five_prime_trim_filter,Number=1,Type=Integer,Description=\"Number of times this variant has failed the five_prime_trim_filter\">
5667
##INFO=<ID=three_prime_trim_filter,Number=1,Type=String,Description=\"Number of times this variant has failed the three_prime_trim_filter\">
5768
##INFO=<ID=proper_pair_filter,Number=1,Type=String,Description=\"Number of times this variant has failed the proper_pair_filter\">
69+
##INFO=<ID=vaf_filter,Description=\"VAF in matched normal higher than threshold\">
5870
##INFO=<ID=QPOS,Number=.,Type=Integer,Description=\"Read position closest to 5-prime end. Up to 10 QPOS are reported\">
5971
##INFO=<ID=NORM_VAF,Number=1,Type=Float,Description=\"VAF in matched normal\">
6072
##INFO=<ID=MEAN_DX_ASXS,Number=1,Type=Float,Description=\"mean AS-XS for duplex\">
6173
##INFO=<ID=MEAN_NORM_ASXS,Number=1,Type=Float,Description=\"mean AS-XS for normal\">
6274
##INFO=<ID=MEAN_DX_NM,Number=1,Type=Float,Description=\"mean NM for duplex\">
6375
##INFO=<ID=MEAN_NORM_NM,Number=1,Type=Float,Description=\"mean NM for normal\">
76+
##INFO=<ID=NORM_COV,Number=1,Type=Integer,Description=\"Coverage in the matched normal\">
6477
##FILTER=<ID=commonSNP,Description=\"Common SNP site\">
6578
##FILTER=<ID=shearwater,Description=\"Noisy site\">
66-
##FILTER=<ID=not_in_masks,Description=\"Not in the commonSNP and noise masks\">
67-
#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
68-
79+
##FILTER=<ID=not_in_masks,Description=\"Not in the commonSNP and noise masks\">\n";
80+
open(I, "$ref_genome.fai") || die "Error: cannot find file $ref_genome.fai\n";
81+
while(<I>) {
82+
chomp;
83+
my($contig_name,$length) = (split)[0,1];
84+
$header .= "##contig=<ID=$contig_name,length=$length>\n";
85+
}
86+
close(I);
87+
$header .= "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
6988
print $header;
7089

71-
my $input_file = $ARGV[0];
7290
my %ds;
7391
my %complement;
7492
$complement{"A"} = "T";
@@ -163,8 +181,8 @@
163181
$ds{"$chrom:$pos:$ref:$mut"}->{"mean_min_BQ"} = $ds{"$chrom:$pos:$ref:$mut"}->{"mean_min_BQ" } + min($dplxCQfwdT,$dplxCQrevT);
164182
$ds{"$chrom:$pos:$ref:$mut"}->{"vaf_normal" } = ($bulkForwardT + $bulkReverseT)/$normal_coverage if($normal_coverage > 0);
165183
} else {
166-
print STDERR "Error: $call doesn't match A, C, G, T. Exiting...\n";
167-
exit;
184+
die "Error: $call doesn't match A, C, G, T. Exiting...\n";
185+
168186
}
169187
push(@{$ds{"$chrom:$pos:$ref:$mut"}->{"QPOS"}},$qpos);
170188
$ds{"$chrom:$pos:$ref:$mut"}->{"dplx_clip_filter" } = $ds{"$chrom:$pos:$ref:$mut"}->{"dplx_clip_filter" } + $dplx_clip_filter;

python/runNanoSeq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
import tempfile
4848
import copy
4949

50-
version = '3.5.6'
50+
version = '3.5.7'
5151

5252
parser = argparse.ArgumentParser()
5353
# arguments for all subcommands

0 commit comments

Comments
 (0)