-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpolish_raw_medaka.sh
More file actions
executable file
·82 lines (75 loc) · 2.32 KB
/
polish_raw_medaka.sh
File metadata and controls
executable file
·82 lines (75 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env bash
###################
# HELP is triggered with no arguments, or with -h or --help
if [ "$1" == "-h" ] || [ "$1" == "--help" ] || [ -z $1 ]
then
echo "Argument 1: Input folder path containing clean FASTQ files"
echo "Argument 2: Input folder path containing Canu de novo assemblies"
echo "Argument 3: Output folder, destination for polished assemblies"
echo ""
echo "Usage example:"
echo "polish_raw_medaka.sh 02_bbduk/run001 03_canu/run001 04_medaka/run001"
exit 0
fi
###################
# Exit if Input directories don't exist
if [ ! -d $1 ]
then
echo "Input directory not found, verify the path "$1
exit 0
fi
if [ ! -d $2 ]
then
echo "Input directory not found, verify the path "$2
exit 0
fi
###################
# Create OUTPUT directory if it doesn't exist
mkdir -p $3
###################
# Set variables from command arguments
FASTQ_DIR=(`realpath $1`)
CANU_DIR=(`realpath $2`)
OUTPUT=(`realpath $3`)
###################
# Run Medaka on previously created BAM, or map reads first if BAM was not created
echo ""
echo ""
echo "Polishing de novo assemblies for sequencing run "$CANU_DIR" ..."
echo ""
cd $CANU_DIR
for ASM in *_canu
do
SAMPLE=${ASM//_canu/}
echo "-> Verifying sample "$SAMPLE" ..."
ASSEMBLY=$CANU_DIR"/"$ASM"/"$SAMPLE".contigs.fasta"
BAM=$CANU_DIR"/"$ASM"/"$SAMPLE".bam"
if [ ! -f $BAM ]
then
FASTQ=$FASTQ_DIR"/"$SAMPLE".fastq.gz"
echo "-> Mapping reads to assembly "$ASSEMBLY" ..."
samtools faidx $ASSEMBLY
minimap2 -x map-ont -a --MD --secondary=no \
$ASSEMBLY $FASTQ > $CANU_DIR"/"$ASM"/"$SAMPLE".sam"
samtools view -u $CANU_DIR"/"$ASM"/"$SAMPLE".sam" | \
samtools sort -@4 -o $CANU_DIR"/"$ASM"/"$SAMPLE".bam"
samtools index $CANU_DIR"/"$ASM"/"$SAMPLE".bam"
rm $CANU_DIR"/"$ASM"/"$SAMPLE".sam"
echo "-> Mapping completed for sample "$SAMPLE
fi
echo medaka consensus \
$BAM $OUTPUT"/"$SAMPLE".hdf" \
--model r941_min_sup_g507 \
--threads 2 "&>"$OUTPUT"/"$SAMPLE"_medaka.log &&" \
medaka stitch $OUTPUT"/"$SAMPLE".hdf" $ASSEMBLY $OUTPUT"/"$SAMPLE"_medaka.fasta" \
"&>>"$OUTPUT"/"$SAMPLE"_medaka.log" \
>> $OUTPUT"/polish_raw_medaka.params"
done
echo "-> Polishing assemblies in parallel ..."
parallel -j 12 -a $OUTPUT"/polish_raw_medaka.params"
rm $OUTPUT"/"*.hdf $OUTPUT"/"*.bed
echo ""
echo "Assembly polishing completed for sequencing run "$CANU_DIR" ..."
echo ""
echo ""
echo ""