1+ #! /bin/bash
2+
3+ set -Eeuo pipefail
4+
5+ cram_file=$1
6+ cram_index=$2
7+ sample_id=$3
8+ reference_fasta=$4
9+ reference_index=$5
10+ include_bed_file=$6
11+ primary_contigs_list=$7
12+ cpu_cores=${8:- 4}
13+
14+
15+ chr_list=$( paste -s -d ' ,' " ${primary_contigs_list} " )
16+
17+ # print some info that may be useful for debugging
18+ df -h
19+ echo " whamg $( whamg 2>&1 | grep Version) "
20+
21+ # necessary for getting permission to read from google bucket directly
22+ export GCS_OAUTH_TOKEN=` gcloud auth application-default print-access-token`
23+
24+ echo " Converting cram to bam ..."
25+ # covert cram to bam
26+ samtools view -b1@ ${cpu_cores} -T " ${reference_fasta} " " ${cram_file} " > sample.bam
27+ echo " Finished converting cram to bam."
28+
29+ # index bam file
30+ samtools index -@ ${cpu_cores} sample.bam
31+
32+ # ensure that index files are present in appropriate locations
33+ ln -s sample.bam.bai sample.bai
34+ if [ ! -e " ${reference_fasta} .fai" ]; then
35+ ln -s " ${reference_index} " " ${reference_fasta} .fai"
36+ fi
37+
38+ # run whamg on all specified intervals
39+ mkdir tmpVcfs
40+ cd tmpVcfs
41+ awk ' BEGIN{FS=OFS="\t"}{printf("%07d\t%s\n",NR,$1":"$2"-"$3)}' ${include_bed_file} | \
42+ while read -r line interval; do
43+ vcfFile=" $line .wham.vcf.gz"
44+ whamg \
45+ -c ${chr_list} " \
46+ -x ${cpu_cores} \
47+ -a " ${reference_fasta} " \
48+ -f ../sample.bam \
49+ -r $interval \
50+ | bgzip -c > $vcfFile
51+ bcftools index -t $vcfFile
52+ done
53+
54+ # We need to update both the VCF sample ID and the TAGS INFO field in the WHAM output VCFs.
55+ # WHAM uses both to store the sample identifier, and by default uses the SM identifier from the BAM file.
56+ # We need to update both to reflect the potentially-renamed sample identifier used by the pipeline (sample_id) --
57+ # svtk standardize_vcf uses the TAGS field to identify the sample for WHAM VCFs.
58+
59+ ls -1 *.wham.vcf.gz > vcf.list
60+ bcftools concat -a -Ov -f vcf.list | \
61+ sed -e 's/^#CHROM\t.*/#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t${sample_id} /' -e 's/;TAGS=[^;]*;/;TAGS=${sample_id} ;/' | \
62+ bgzip -c > " ../${sample_id} .wham.vcf.gz"
63+ cd ..
64+ bcftools index -t " ${sample_id} .wham.vcf.gz"
65+
66+ df -h
67+ ls -l
0 commit comments