-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path14_genotypeDB.sh
More file actions
66 lines (53 loc) · 1.75 KB
/
Copy path14_genotypeDB.sh
File metadata and controls
66 lines (53 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
#############
## written 2025.04.08 RMPeery
## USE: ./genotypeDB.sh
## Note: this is a loop script to combine gVCFs per interval,
## with one interval per loop then call genotypes across all samples
#############
set -eou pipefail
# change to match your project
DIR=/home/user/path
output_vcfs="$DIR/vcfs/intervalVCFs_splitBed"
reference="$DIR/refs/Phyci1_AssemblyScaffolds.fasta"
interval_dir="$DIR/refs/beds"
gvcfs="$DIR/gvcf"
interval_dbs="$DIR/vcfs/interval_dbs_splitBed"
# safely make directories jic
mkdir -p "$output_vcfs"
mkdir -p "$interval_dbs"
# name the dataset
DATASET=Pcin
# put gvcfs into an array
gvcf_in=("$gvcfs"/*.g.vcf.gz)
# make the genotype call command
# input from ChatGPT3 on this
cmd=""
for file in "${gvcf_in[@]}"; do
cmd+="--variant $file "
done
# run intervals sequentially
for bed in "$interval_dir"/interval_batch*.bed; do
bedfile=$(basename "$bed")
db_path="$interval_dbs/${bedfile}_db"
out_vcf="$output_vcfs/${bedfile}.vcf.gz"
# write out to check inputs are found
echo "Processing: $bedfile"
echo "Creating GenomicsDB: $db_path"
echo "Output VCF: $out_vcf"
# remove db if it exists - otherwise the run fails
rm -rf "$db_path"
# run gatk GenomicsDBImport with extra java options for memory
gatk --java-options "-Xmx20g -Xms12g" GenomicsDBImport \
$cmd \
--genomicsdb-workspace-path "$db_path" \
--reader-threads 2 \
-L "$bed"
# run gatk GenotypeGVCFs with extra java options for memory
gatk --java-options "-Xmx32g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" GenotypeGVCFs \
-R "$reference" \
-V "gendb://$db_path" \
-O "$out_vcf"
echo "Finished processing: $bedfile"
done
echo "All interval genotyping completed."