-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtabulateHHpred.py
More file actions
executable file
·111 lines (83 loc) · 3.31 KB
/
tabulateHHpred.py
File metadata and controls
executable file
·111 lines (83 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# This scripts takes the overly verbose HHpred outputs
# and makes tabulated outputs
import os
import subprocess
import sys
import argparse
import traceback
import warnings
import pandas as pd
from io import StringIO
# Template of HHpred's verbose tables
template = \
u"""
---|------|------------------------|----|-------|-------|------|-----|----|---------|--------------|
No Hit Short Desc Prob E-value P-value Score SS Cols Query HMM Template HMM
"""
def hhparse(hhresult_file, verbose):
'''Convert HHpred's text-based output table in to an actual matrix'''
pattern = StringIO(template).readlines()[1]
colBreaks = [i for i, ch in enumerate(pattern) if ch == '|']
widths = [j-i for i, j in zip( ([0]+colBreaks)[:-1], colBreaks ) ]
hhtable = pd.read_fwf(hhresult_file, skiprows=8, nrows=10, header=0, widths = widths)
if verbose is True:
print(hhtable)
top_hit = str(hhtable.loc[0,'Hit'])[0:4]
top_hit_full = hhtable.loc[0,'Hit']
top_prob = hhtable.loc[0,'Prob']
top_eval = hhtable.loc[0,'E-value']
top_pval = hhtable.loc[0,'P-value']
top_score = hhtable.loc[0,'Score']
if verbose is True:
print("Your best hit: (PDB ID | Probability | E-Value | P-Value | Score)")
print("\t" + str(top_hit) + "\t" + str(top_prob) + "\t" + str(top_eval) + "\t" + str(top_pval) + "\t" + str(top_score) )
return top_hit, top_hit_full, top_prob, top_eval, top_pval, top_score
def getFullDesc(hhresult_file,top_hit_full, verbose):
with open(hhresult_file, 'r') as hrh:
for line in hrh:
if line.startswith('>' + top_hit_full):
full_desc = line
if verbose is True:
print full_desc
return full_desc
def main():
try:
parser = argparse.ArgumentParser(description='This script converts HHpreds verbose output in to a full table for subsequent analysis.')
parser.add_argument(
'-i',
'--infile',
action='store',
required=True,
help='The HHpred output file to parse.')
parser.add_argument(
'-v',
'--verbose',
action='store_true',
help='Print additional messages to screen. Default behaviour is false, only the result would be printed to screen for piping etc.')
parser.add_argument(
'-o',
'--outfile',
action='store',
default='None',
help='Output file name to store results in. If none provided, the default will be infile.tsv.')
args = parser.parse_args()
except:
print "An exception occured with argument parsing. Check your provided options."
traceback.print_exc()
verbose = args.verbose
hhresult_file = args.infile
indir = os.path.dirname(hhresult_file)
split = os.path.splitext(args.infile)
basename = os.path.basename(split[0])
if args.outfile is 'None':
outfile = indir+ '/' + basename + '.tsv'
else:
outfile = args.outfile
# Main code begins:
top_hit, top_hit_full, top_prob, top_eval, top_pval, top_score = hhparse(hhresult_file, verbose)
full_desc = getFullDesc(hhresult_file, top_hit_full, verbose)
with open(outfile, 'w') as ofh:
ofh.write(basename + "\t" + str(top_hit) + "\t" + str(top_hit_full) + "\t" + str(top_prob) + "\t" + str(top_eval) + "\t" + str(top_pval) + "\t" + str(top_score) + "\t" + full_desc )
print(basename + "\t" + str(top_hit) + "\t" + str(top_hit_full) + "\t" + str(top_prob) + "\t" + str(top_eval) + "\t" + str(top_pval) + "\t" + str(top_score) + "\t" + full_desc)
if __name__ == '__main__':
main()