forked from Oshlack/superTranscript_paper_code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbash_commands.txt
More file actions
121 lines (88 loc) · 5.15 KB
/
bash_commands.txt
File metadata and controls
121 lines (88 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
########################################################################
############################# Author: Anthony Hawkins ##################
# Description: A collection of command line arguments, used to produce #
# various parts of the analysis. #
########################################################################
#########################################
# Running Lace ###############
#########################################
#Use assembled transcripts from Trinity (Trinity.fasta) and Clustering from Corset(D_inf-clusters-0.3.txt)
# Using 20 cores, and producing the annotated transcript annotation (--alternate)
# Optionally giving the output directory
python Lace.py Trinity.fasta D_inf-clusters-0.3.txt --cores 20 --alternate --outputDir ST_20120709_trinity_corset
#########################################
# Making dynamic blocks ###############
#########################################
#Concatenate the splice junctions for all samples (output of STAR) into one
#splice junction list
cat *.SJ.out.tab > SJ.out.tab
#Create dynamic blocks annotation based on splice junctions (SJ.out.tab) and superTranscript sequence (SuperDuper.fasta)
python Mobius.py SJ.out.tab SuperDuper.fasta
#########################################
# Running featureCounts ###############
#########################################
# 20 threads (-T)
# Require both reads to aligne (-B)
# Allow reads which overlap multiple exons (-O)
# Use annotation file SuperDuper.gff (accepts .gff by defeault, but could also be .gtf or .saf), could be genome, or dynamic blocks instead
# Count at feature level (exon/blocks) rather than gene (-f)
# Output to Counts/counts.txt
featureCounts -T 20 -p -B -O --fraction -a SuperDuper.gff -f -o Counts/counts.txt *.out.bam
#########################################
# Running BLAT to find cluster gene #####
# correspondence for DTU analysis #####
#########################################
blat SuperDuper.fasta HumanSuperDuper.fasta --minIdentity=98 output.psl
#########################################
# Running Salmon ######################
#########################################
#Create salmon transcript index: transcripts.idx
salmon index -i transcripts.idx -t ../20120709_trinity/Trinity.fasta --type quasi -k 31
#Create a bash script to quantify transcript abundances per sample using Salmon and the previously created index
ls /mnt/storage/shared/public_data/cuffdiff2_data/SR*_1.trimmed.fastq | rev | cut -c 17- | rev | uniq > myfiles
for i in `cat myfiles`; do
echo "Performing Salmon quant on file $i"
outname=${i##*/}
salmon quant -i transcripts.idx -p 12 -l A -1 "$i"_1.trimmed.fastq -2 "$i"_2.trimmed.fastq -o "$outname"
done
#########################################
# Running Kallisto ######################
#########################################
#Create the kallisto transcript index: transcripts.idx
kallisto index -i transcripts.idx ../20120709_trinity/Trinity.fasta
#Create a bash script to quantify transcript abundances per sample using Kallisto and the previously created index
ls /mnt/storage/shared/public_data/cuffdiff2_data/SR*_1.trimmed.fastq | rev | cut -c 17- | rev | uniq > myfiles
for i in `cat myfiles`; do
echo "Performing kallisto quant on file $i"
outname=${i##*/}
kallisto quant -i transcripts.idx -t 8 -o "$outname" -b 100 "$i"_1.trimmed.fastq "$i"_2.trimmed.fastq
done
##########################################
# Running STAR ##########################
##########################################
#Make Index for SuperTranscript
STAR --runMode genomeGenerate --runThreadN 16 --genomeDir STARIndex --genomeFastaFiles SuperDuper.fasta --limitGenomeGenerateRAM 75070378709
#Get the files
firstpass=ST_20120709_trinity_corset/1pass
secondpass=ST_20120709_trinity_corset/2pass
GenomeDir=ST_20120709_trinity_corset/STARIndex
outdir=ST_20120709_trinity_corset/
GenomeFasta=ST_20120709_trinity_corset/SuperDuper.fasta
GenomeAnno=ST_20120709_trinity_corset/SuperDuper.gtf
#First pass mapping
cd /group/bioi1/shared/public_data/cuffdiff2_data/
STAR --genomeDir $GenomeDir \
--readFilesIn SRR493366_1.trimmed.fastq,SRR493367_1.trimmed.fastq,SRR493368_1.trimmed.fastq,SRR493369_1.trimmed.fastq,SRR493370_1.trimmed.fastq,SRR493371_1.trimmed.fastq SRR493366_2.trimmed.fastq,SRR493367_2.trimmed.fastq,SRR493368_2.trimmed.fastq,SRR493369_2.trimmed.fastq,SRR493370_2.trimmed.fastq,SRR493371_2.trimmed.fastq \
--outFileNamePrefix $firstpass/SRR \
--outSAMtype BAM Unsorted --runThreadN 24;
#Generate genome with junctions from 1st pass
STAR --genomeDir $secondpass --runMode genomeGenerate --genomeFastaFiles $GenomeFasta --sjdbFileChrStartEnd $firstpass/SRRSJ.out.tab --sjdbOverhang 100 --runThreadN 20 --limitGenomeGenerateRAM 75070378709;
#Run 2nd pass mapping to the new genome defined from sjdb
ls *.fastq | rev | cut -c 17- | rev | uniq > myfiles
for i in `cat myfiles`; do
echo "Performing alignment on file $i"
STAR --genomeDir $secondpass \
--readFilesIn "$i"_1.trimmed.fastq "$i"_2.trimmed.fastq \
--outFileNamePrefix $outdir/"$i" \
--outSAMtype BAM Unsorted --runThreadN 20 --outSJfilterOverhangMin 12 12 12 12;
done