-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrun_SAPPER.sh
More file actions
113 lines (78 loc) · 3.64 KB
/
run_SAPPER.sh
File metadata and controls
113 lines (78 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
# Please read the descriptions and modify the following values for
# your own datasets.
# NOTE: ALL INPUT FILES SHOULD BE AT THE CURRENT WORKING DIRECTORY
# Name of the factor or the experiment
FACTORNAME="MyFactor";
# Path to the IP BAM file
CHIPBAM="IP.bam";
# Path to the control BAM file
CTRLBAM="CTRL.bam";
# For PE data, use 'PE', or for SE data, use 'SE'.
MODE="PE";
# Extra parameters for MACS2. Note, do not change '--broad' setting
# here. Do such in the next setting for MACS2MODE. And do not set '-f
# BAMPE' here if you are dealing with PE data, it will be set
# according to your previous MODE setting. If you plan to use "PE"
# MODE, you have to set '--broad-cutoff' together with '-q' or '-p'.
MACS2EXTPARAM="-g hs -q 0.01 --fe-cutoff 2";
#MACS2EXTPARAM="-g hs --broad-cutoff 0.1 -p 0.05 --fe-cutoff 5"; # this is for broad peak calling
# For broad region calling, use 'broad'; for narrow peak calling, use
# 'narrow'.
MACS2MODE="narrow";
# Filtering criteria for SAPPER
# Minimum depth for SAPPER
MINDEPTH=20
# Minimum Genotype Quality Score cutoff for heterozygous loci
MINHETEROGQ=100
# Minimum Genotype Quality Score cutoff for homozygous loci
MINHOMOGQ=10
###################DO NOT EDIT CODES BELOW################
# check settings
if [ ${MACS2MODE} = "narrow" ];then
MACS2OUTPUTFILE="${FACTORNAME}_peaks.narrowPeak";
elif [ ${MACS2MODE} = "broad" ];then
MACS2OUTPUTFILE="${FACTORNAME}_peaks.broadPeak";
MACS2EXTPARAM="${MACS2EXTPARAM} --broad";
else
echo "Wrong MACS2MODE setting! Please choose from 'narrow' and 'broad'!";
exit;
fi
if [ ${MODE} = "PE" ];then
MACS2EXTPARAM="${MACS2EXTPARAM} -f BAMPE";
elif [ ${MODE} = "SE" ];then
MACS2EXTPARAM="${MACS2EXTPARAM} -f BAM";
else
echo "Wrong MODE setting! Please choose from 'PE' and 'SE'!";
exit;
fi
# step 1 filter
samtools view -q 30 -F 4 -F 256 -F 2048 -b ${CHIPBAM} -o ${CHIPBAM/.bam/_clean.bam} &
samtools view -q 30 -F 4 -F 256 -F 2048 -b ${CTRLBAM} -o ${CTRLBAM/.bam/_clean.bam} &
wait;
# step 2 sort
samtools sort ${CHIPBAM/.bam/_clean.bam} ${CHIPBAM/.bam/_clean_sorted} &
samtools sort ${CTRLBAM/.bam/_clean.bam} ${CTRLBAM/.bam/_clean_sorted} &
wait;
# step 3 peak calling
macs2 callpeak -t ${CHIPBAM/.bam/_clean_sorted.bam} -c ${CTRLBAM/.bam/_clean_sorted.bam} -n ${FACTORNAME} ${MACS2EXTPARAM} &
wait;
# step 4 sort peak file
sort -k1,1 -k2,2n ${MACS2OUTPUTFILE} > ${FACTORNAME}_peaks.sorted.bed &
wait;
# step 5 extract reads in peak regions towards each side for 150bps then merge:
awk -v OFS="\t" '{print $1,(($2-150)>0?($2-150):0),$3+150}' ${FACTORNAME}_peaks.sorted.bed | bedtools merge -i - > ${FACTORNAME}_extended_peaks.bed
samtools view -b ${CHIPBAM/.bam/_clean_sorted.bam} -L ${FACTORNAME}_extended_peaks.bed -o ${FACTORNAME}_CHIP_peaks.bam &
samtools view -b ${CTRLBAM/.bam/_clean_sorted.bam} -L ${FACTORNAME}_extended_peaks.bed -o ${FACTORNAME}_CTRL_peaks.bam &
wait;
# step 6 run SAPPER
SAPPER call -b ${FACTORNAME}_extended_peaks.bed -t ${FACTORNAME}_CHIP_peaks.bam -c ${FACTORNAME}_CTRL_peaks.bam -o ${FACTORNAME}_SAPPER.vcf &
wait;
# step 7 run SAPPER_filter
SAPPER filter -i ${FACTORNAME}_SAPPER.vcf -d ${MINDEPTH} -t homo -q ${MINHOMOGQ} -o ${FACTORNAME}_SAPPER_homo.vcf
SAPPER filter -i ${FACTORNAME}_SAPPER.vcf -d ${MINDEPTH} -t hetero -q ${MINHETEROGQ} -o ${FACTORNAME}_SAPPER_hetero.vcf
echo "Finished! Please check:"
echo " " ${FACTORNAME}_SAPPER.vcf "for all the predicted SNVs."
echo " " ${FACTORNAME}_SAPPER_hetero.vcf "for the filtered heterozygous SNVs above cutoff."
echo " " ${FACTORNAME}_SAPPER_homo.vcf "for the filtered homozygous SNVs above cutoff."
echo "Please use SAPPER_filter script to further filter the results."