|
38 | 38 | # fa8, Nov 2023 |
39 | 39 | # This script reads the discardedvariants.csv file and creates a summary VCF |
40 | 40 | # Output is written to standard output |
41 | | -# E.g. perl post_process_discarded_variants.pl tmpNanoSeq/post/discardedvariants.csv > tmpNanoSeq/post/discardedvariants.vcf |
| 41 | +# E.g. perl post_process_discarded_variants.pl tmpNanoSeq/post/discardedvariants.csv reference_genome.fa > tmpNanoSeq/post/discardedvariants.vcf |
42 | 42 |
|
| 43 | + |
| 44 | + |
| 45 | +my $input_file = $ARGV[0]; |
| 46 | +my $ref_genome = $ARGV[1]; |
| 47 | +if(!defined($ref_genome) || $ref_genome eq "") { |
| 48 | + die "Reference genome fasta file needs to be provided. Exiting...\n". |
| 49 | + "Usage: perl post_process_discarded_variants.pl discardedvariants.csv reference_genome.fa > discardedvariants.vcf\n"; |
| 50 | +} |
| 51 | + |
| 52 | +# Prepare header: |
43 | 53 | my $header = |
44 | 54 | "##fileformat=VCFv4.2 |
45 | 55 | ##source=NanoSeq pipeline (discarded variants from post_process_discarded_variants.pl) |
| 56 | +##reference=file://$ref_genome |
46 | 57 | ##INFO=<ID=PYR_SUB,Number=1,Type=String,Description=\"Pyrimidine-based trinucleotide substitution\"> |
47 | 58 | ##INFO=<ID=N,Number=1,Type=Integer,Description=\"Number of times this variant has been discarded\"> |
48 | 59 | ##INFO=<ID=dplx_clip_filter,Number=1,Type=Integer,Description=\"Number of times this variant has failed the dplx_clip_filter\"> |
|
55 | 66 | ##INFO=<ID=five_prime_trim_filter,Number=1,Type=Integer,Description=\"Number of times this variant has failed the five_prime_trim_filter\"> |
56 | 67 | ##INFO=<ID=three_prime_trim_filter,Number=1,Type=String,Description=\"Number of times this variant has failed the three_prime_trim_filter\"> |
57 | 68 | ##INFO=<ID=proper_pair_filter,Number=1,Type=String,Description=\"Number of times this variant has failed the proper_pair_filter\"> |
| 69 | +##INFO=<ID=vaf_filter,Description=\"VAF in matched normal higher than threshold\"> |
58 | 70 | ##INFO=<ID=QPOS,Number=.,Type=Integer,Description=\"Read position closest to 5-prime end. Up to 10 QPOS are reported\"> |
59 | 71 | ##INFO=<ID=NORM_VAF,Number=1,Type=Float,Description=\"VAF in matched normal\"> |
60 | 72 | ##INFO=<ID=MEAN_DX_ASXS,Number=1,Type=Float,Description=\"mean AS-XS for duplex\"> |
61 | 73 | ##INFO=<ID=MEAN_NORM_ASXS,Number=1,Type=Float,Description=\"mean AS-XS for normal\"> |
62 | 74 | ##INFO=<ID=MEAN_DX_NM,Number=1,Type=Float,Description=\"mean NM for duplex\"> |
63 | 75 | ##INFO=<ID=MEAN_NORM_NM,Number=1,Type=Float,Description=\"mean NM for normal\"> |
| 76 | +##INFO=<ID=NORM_COV,Number=1,Type=Integer,Description=\"Coverage in the matched normal\"> |
64 | 77 | ##FILTER=<ID=commonSNP,Description=\"Common SNP site\"> |
65 | 78 | ##FILTER=<ID=shearwater,Description=\"Noisy site\"> |
66 | | -##FILTER=<ID=not_in_masks,Description=\"Not in the commonSNP and noise masks\"> |
67 | | -#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"; |
68 | | - |
| 79 | +##FILTER=<ID=not_in_masks,Description=\"Not in the commonSNP and noise masks\">\n"; |
| 80 | +open(I, "$ref_genome.fai") || die "Error: cannot find file $ref_genome.fai\n"; |
| 81 | +while(<I>) { |
| 82 | + chomp; |
| 83 | + my($contig_name,$length) = (split)[0,1]; |
| 84 | + $header .= "##contig=<ID=$contig_name,length=$length>\n"; |
| 85 | +} |
| 86 | +close(I); |
| 87 | +$header .= "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"; |
69 | 88 | print $header; |
70 | 89 |
|
71 | | -my $input_file = $ARGV[0]; |
72 | 90 | my %ds; |
73 | 91 | my %complement; |
74 | 92 | $complement{"A"} = "T"; |
|
163 | 181 | $ds{"$chrom:$pos:$ref:$mut"}->{"mean_min_BQ"} = $ds{"$chrom:$pos:$ref:$mut"}->{"mean_min_BQ" } + min($dplxCQfwdT,$dplxCQrevT); |
164 | 182 | $ds{"$chrom:$pos:$ref:$mut"}->{"vaf_normal" } = ($bulkForwardT + $bulkReverseT)/$normal_coverage if($normal_coverage > 0); |
165 | 183 | } else { |
166 | | - print STDERR "Error: $call doesn't match A, C, G, T. Exiting...\n"; |
167 | | - exit; |
| 184 | + die "Error: $call doesn't match A, C, G, T. Exiting...\n"; |
| 185 | + |
168 | 186 | } |
169 | 187 | push(@{$ds{"$chrom:$pos:$ref:$mut"}->{"QPOS"}},$qpos); |
170 | 188 | $ds{"$chrom:$pos:$ref:$mut"}->{"dplx_clip_filter" } = $ds{"$chrom:$pos:$ref:$mut"}->{"dplx_clip_filter" } + $dplx_clip_filter; |
|
0 commit comments