Skip to content

Commit faa7dd6

Browse files
Jon PalmerJon Palmer
Jon Palmer
authored and
Jon Palmer
committed
update to v0.1.9
1 parent 6e454e6 commit faa7dd6

File tree

4 files changed

+73
-18
lines changed

4 files changed

+73
-18
lines changed

bin/funannotate-compare.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def __init__(self,prog):
425425
base = f.replace('.txt', '')
426426
goa_out = os.path.join(args.out, 'go_enrichment', base+'.go.enrichment.txt')
427427
with open(goa_out, 'w') as output:
428-
subprocess.call(['find_enrichment.py', '--obo', os.path.join(parentdir, 'DB', 'go.obo'), '--pval', '0.001', '--alpha', '0.001', '--fdr', file, os.path.join(go_folder, 'population.txt'), os.path.join(go_folder, 'associations.txt')], stderr=FNULL, stdout=output)
428+
subprocess.call(['find_enrichment.py', '--obo', os.path.join(parentdir, 'DB', 'go.obo'), '--pval', '0.001', '--alpha', '0.001', '--method', 'fdr', file, os.path.join(go_folder, 'population.txt'), os.path.join(go_folder, 'associations.txt')], stderr=FNULL, stdout=output)
429429

430430
#load into pandas and write to html
431431
with open(os.path.join(args.out, 'go.html'), 'w') as output:

bin/funannotate-predict.py

+64-9
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,14 @@ def __init__(self,prog):
170170
RepeatMasker = os.path.abspath(RepeatMasker)
171171
MaskGenome = os.path.abspath(MaskGenome)
172172

173+
#final output for augustus hints
174+
hints_all = os.path.join(args.out, 'predict_misc', 'hints.PE.gff')
175+
173176
#check for masked genome here
174177
if not os.path.isfile(MaskGenome) or lib.getSize(MaskGenome) < 10:
175178
lib.log.error("RepeatMasking failed, check log files.")
176179
os._exit(1)
177180

178-
179181
#if maker_gff passed, use that info and move directly to EVM
180182
if args.maker_gff:
181183
lib.log.info("Maker2 GFF passed, parsing results and proceeding directly to EVidence Modeler")
@@ -228,6 +230,26 @@ def __init__(self,prog):
228230
if not os.path.isfile(trans_out):
229231
lib.runGMAP(trans_temp, MaskGenome, args.cpus, args.max_intronlen, os.path.join(args.out, 'predict_misc'), trans_out)
230232
Transcripts = os.path.abspath(trans_out)
233+
#now run BLAT for Augustus hints
234+
blat_out = os.path.join(args.out, 'predict_misc', 'blat.psl')
235+
blat_filt = os.path.join(args.out, 'predict_misc', 'blat.filt.psl')
236+
blat_sort1 = os.path.join(args.out, 'predict_misc', 'blat.sort.tmp.psl')
237+
blat_sort2 = os.path.join(args.out, 'predict_misc', 'blat.sort.psl')
238+
hintsE = os.path.join(args.out, 'predict_misc', 'hints.E.gff')
239+
maxINT = '-maxIntron='+str(args.max_intronlen)
240+
lib.log.info("Aligning transcript evidence to genome with BLAT")
241+
if not os.path.isfile(hints_all):
242+
subprocess.call(['blat', '-noHead', '-minIdentity=80', maxINT, MaskGenome, trans_temp, blat_out], stdout=FNULL, stderr=FNULL)
243+
subprocess.call(['pslCDnaFilter', '-minId=0.9', '-localNearBest=0.005', '-ignoreNs', '-bestOverlap', blat_out, blat_filt], stdout=FNULL, stderr=FNULL)
244+
with open(blat_sort1, 'w') as output:
245+
subprocess.call(['sort', '-n', '-k', '16,16', blat_filt], stdout=output, stderr=FNULL)
246+
with open(blat_sort2, 'w') as output:
247+
subprocess.call(['sort', '-s', '-k', '14,14', blat_sort1], stdout=output, stderr=FNULL)
248+
#run blat2hints
249+
blat2hints = os.path.join(AUGUSTUS_BASE, 'scripts', 'blat2hints.pl')
250+
b2h_input = '--in='+blat_sort2
251+
b2h_output = '--out='+hintsE
252+
subprocess.call([blat2hints, b2h_input, b2h_output, '--minintronlen=20', '--trunkSS'], stdout=FNULL, stderr=FNULL)
231253
else:
232254
Transcripts = False
233255
else:
@@ -254,6 +276,14 @@ def __init__(self,prog):
254276
if not os.path.isfile(p2g_out):
255277
subprocess.call(p2g_cmd)
256278
exonerate_out = os.path.abspath(p2g_out)
279+
#now run exonerate2 hints for Augustus
280+
exonerate2hints = os.path.join(AUGUSTUS_BASE, 'scripts', 'exonerate2hints.pl')
281+
hintsP = os.path.join(args.out, 'predict_misc', 'hints.P.gff')
282+
e2h_in = '--in='+p2g_out
283+
e2h_out = '--out='+hintsP
284+
e2h_minINT = '--minintronlen='+str(args.min_intronlen)
285+
e2h_maxINT = '--maxintronlen='+str(args.max_intronlen)
286+
subprocess.call([exonerate2hints, e2h_in, e2h_out, e2h_minINT, e2h_maxINT], stdout=FNULL, stderr=FNULL)
257287
else:
258288
exonerate_out = False
259289
else:
@@ -265,7 +295,20 @@ def __init__(self,prog):
265295
with open(Exonerate, 'w') as output:
266296
subprocess.call([ExoConverter, exonerate_out], stdout = output, stderr = FNULL)
267297
Exonerate = os.path.abspath(Exonerate)
268-
298+
299+
#combine hints for Augustus
300+
if os.path.isfile(hintsP) or os.path.isfile(hintsE):
301+
with open(hints_all, 'a') as out:
302+
if os.path.isfile(hintsP):
303+
with open(hintsP) as input:
304+
out.write(input.read())
305+
if os.path.isfile(hintsE):
306+
with open(hintsE) as input2:
307+
out.write(input2.read())
308+
#setup hints and extrinic input
309+
hints_input = '--hintsfile='+hints_all
310+
extrinsic = '--extrinsicCfgFile='+os.path.join(AUGUSTUS_BASE, 'config', 'extrinsic', 'extrinsic.E.XNT.cfg')
311+
269312
Augustus = ''
270313
GeneMark = ''
271314

@@ -345,7 +388,10 @@ def __init__(self,prog):
345388
lib.log.info("Running Augustus gene prediction")
346389
if not os.path.isfile(aug_out):
347390
with open(aug_out, 'w') as output:
348-
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
391+
if os.path.isfile(hints_all):
392+
subprocess.call(['augustus', species, hints_input, extrinsic, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
393+
else:
394+
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
349395
Augustus = os.path.join(args.out, 'predict_misc', 'augustus.evm.gff3')
350396
with open(Augustus, 'w') as output:
351397
subprocess.call(['perl', Converter, aug_out], stdout = output, stderr = FNULL)
@@ -363,7 +409,10 @@ def __init__(self,prog):
363409
else:
364410
subprocess.call([AutoAug, '--noutr', '--singleCPU', cDNA, species, genome, training], stdout=logfile, stderr=logfile)
365411
with open(aug_out, 'w') as output:
366-
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
412+
if os.path.isfile(hints_all):
413+
subprocess.call(['augustus', species, hints_input, extrinsic, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
414+
else:
415+
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
367416
Augustus = os.path.join(args.out, 'predict_misc', 'augustus.evm.gff3')
368417
with open(Augustus, 'w') as output:
369418
subprocess.call(['perl', Converter, aug_out], stdout = output, stderr = FNULL)
@@ -406,7 +455,10 @@ def __init__(self,prog):
406455
lib.log.info("Running Augustus gene prediction")
407456
if not os.path.isfile(aug_out):
408457
with open(aug_out, 'w') as output:
409-
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
458+
if os.path.isfile(hints_all):
459+
subprocess.call(['augustus', species, hints_input, extrinsic, '--gff3=on', MaskGenome], stdout = output, stderr= FNULL)
460+
else:
461+
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
410462
Augustus = os.path.join(args.out, 'predict_misc', 'augustus.evm.gff3')
411463
with open(Augustus, 'w') as output:
412464
subprocess.call(['perl', Converter, aug_out], stdout = output, stderr = FNULL)
@@ -428,7 +480,10 @@ def __init__(self,prog):
428480
lib.log.info("BUSCO mediated Augustus training is complete, now running Augustus on whole genome.")
429481
if not os.path.isfile(aug_out):
430482
with open(aug_out, 'w') as output:
431-
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
483+
if os.path.isfile(hints_all):
484+
subprocess.call(['augustus', species, hints_input, extrinsic, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
485+
else:
486+
subprocess.call(['augustus', species, '--gff3=on', MaskGenome], stdout = output, stderr = FNULL)
432487
Augustus = os.path.join(args.out, 'predict_misc', 'augustus.evm.gff3')
433488
with open(Augustus, 'w') as output:
434489
subprocess.call(['perl', Converter, aug_out], stdout = output, stderr = FNULL)
@@ -444,7 +499,7 @@ def __init__(self,prog):
444499
gmc = 1
445500
if GM_check < 3:
446501
gmc = 0
447-
lib.log.error("GeneMark predictions failed, proceeding with just Augustus")
502+
lib.log.error("GeneMark predictions failed, proceeding with only Augustus")
448503

449504
#EVM related input tasks, find all predictions and concatenate together
450505
if args.pasa_gff:
@@ -457,7 +512,7 @@ def __init__(self,prog):
457512
with open(f) as input:
458513
output.write(input.read())
459514

460-
#set Weights file dependent on which data is present. I have yet to find an example of where Augustus outperforms GeneMark for fungi, hence the weightings are tilted towards genemark
515+
#set Weights file dependent on which data is present. I have yet to find an example of where Augustus outperforms GeneMark for fungi, but i don't have too much evidence to think that genemark is perfect either....
461516
Weights = os.path.join(args.out, 'predict_misc', 'weights.evm.txt')
462517
with open(Weights, 'w') as output:
463518
if args.pasa_gff:
@@ -466,7 +521,7 @@ def __init__(self,prog):
466521
output.write("ABINITIO_PREDICTION\tGeneMark\t1\n")
467522
else:
468523
output.write("ABINITIO_PREDICTION\tAugustus\t1\n")
469-
output.write("ABINITIO_PREDICTION\tGeneMark\t2\n")
524+
output.write("ABINITIO_PREDICTION\tGeneMark\t1\n")
470525
if exonerate_out:
471526
output.write("PROTEIN\texonerate\t1\n")
472527
if Transcripts:

funannotate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.1.8'
34+
version = '0.1.9'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>

setup.sh

+7-7
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ if [ "$db" = 'pass' ]; then
117117
makeblastdb -in uniprot_sprot.fasta -input_type fasta -dbtype prot -title uniprot -parse_seqids -out uniprot
118118
echo "-----------------------------------------------"
119119
else
120-
echo "UniProt DB found, skipping download"
120+
echo "UniProt DB found, skipping download. To update delete uniprot_sprot.fasta."
121121
echo "-----------------------------------------------"
122122
fi
123123

@@ -129,7 +129,7 @@ if [ "$db" = 'pass' ]; then
129129
hmmpress Pfam-A.hmm
130130
echo "-----------------------------------------------"
131131
else
132-
echo "Pfam-A DB found, skipping download"
132+
echo "Pfam-A DB found, skipping download. To update delete Pfam-A.hmm"
133133
echo "-----------------------------------------------"
134134
fi
135135

@@ -140,7 +140,7 @@ if [ "$db" = 'pass' ]; then
140140
gunzip Pfam-A.clans.tsv.gz
141141
echo "-----------------------------------------------"
142142
else
143-
echo "PFAM mapping found, skipping download"
143+
echo "PFAM mapping found, skipping download. To update delete Pfam-A.clans.tsv"
144144
echo "-----------------------------------------------"
145145
fi
146146

@@ -212,7 +212,7 @@ if [ "$db" = 'pass' ]; then
212212
wget -c --tries=0 --read-timeout=20 http://geneontology.org/ontology/go.obo
213213
echo "-----------------------------------------------"
214214
else
215-
echo "Gene Ontology already exists, if you want to update it, delete go.obo first and re-run setup.sh script"
215+
echo "Gene Ontology already exists, skipping download. To update delete go.obo."
216216
echo "-----------------------------------------------"
217217
fi
218218

@@ -235,7 +235,7 @@ if [ "$db" = 'pass' ]; then
235235
gunzip interpro.xml.gz
236236
echo "-----------------------------------------------"
237237
else
238-
echo "InterPro mapping file already exists, skipping download"
238+
echo "InterPro mapping file already exists, skipping download. To update delete interpro.xml"
239239
echo "-----------------------------------------------"
240240
fi
241241
fi
@@ -286,13 +286,13 @@ if [ "$dep" = 'pass' ]; then
286286

287287
if ! [ "$EVM_HOME" ]; then
288288
echo -e "${RED}ERROR:${NC} EVM_HOME variable has not been set
289-
example: export EVM_HOME=/usr/local/Cellar/evidencemodeler/1.1.2"
289+
example: export EVM_HOME=/usr/local/opt/evidencemodeler"
290290
check='fail'
291291
fi
292292

293293
if ! [ "$AUGUSTUS_CONFIG_PATH" ]; then
294294
echo -e "${RED}ERROR:${NC} AUGUSTUS_CONFIG_PATH variable has not been set
295-
example: export AUGUSTUS_CONFIG_PATH=/usr/local/Cellar/opt/augustus/config/"
295+
example: export AUGUSTUS_CONFIG_PATH=/usr/local/opt/augustus/libexec/config"
296296
check='fail'
297297
fi
298298

0 commit comments

Comments
 (0)