-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcalc.percent.core.genome.SNPs.bash
More file actions
executable file
·46 lines (38 loc) · 1.12 KB
/
calc.percent.core.genome.SNPs.bash
File metadata and controls
executable file
·46 lines (38 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
function usage {
echo "
Usage: `basename $0` -c input_coreSNPs.fasta -r input_ref.fasta
Given a SNPs FastA file and the corresponding reference
FastA file, reports the number of SNP sites per sample, the
total sites in the reference file, and fraction of reference
sites that are SNPs.
"
}
nopts=$#
for ((i=1 ; i <= nopts ; i++)); do
case $1 in
-c | --core) # FastA format
CORESNPS=$2
echo " core SNP file: $2"
shift 2
;;
-r | --ref) # FastA format
REF=$2
echo " ref genome file: $2"
shift 2
;;
-h | --help)
usage
exit 1
;;
esac
done
[[ -z $CORESNPS || -z $REF ]] && { usage; exit 1; }
REF_GENOME=$(grep -v '^>' $REF | tr -d [:space:] | wc -c)
echo " found $REF_GENOME sites in the reference file"
TOT_SITES=$(grep -v '^>' $CORESNPS | tr -d [:space:] | wc -c)
TOT_SMPL=$(grep -c '^>' $CORESNPS)
SITES_PER_SMPL=$((TOT_SITES / TOT_SMPL))
echo " found $SITES_PER_SMPL sites in each sample in $CORESNPS"
CORE_PER_REF=$(echo "scale=4;($SITES_PER_SMPL/$REF_GENOME)*100" | bc)
echo -e "\n ${CORE_PER_REF}% of the reference file was identified as core sites\n"