-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter.blast.py
More file actions
executable file
·58 lines (50 loc) · 1.88 KB
/
filter.blast.py
File metadata and controls
executable file
·58 lines (50 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
import os
import sys
from argparse import ArgumentParser
def parseArgs():
parser = ArgumentParser(description='Filters BLAST output format 6 '
'(tab-delimited) for best hit based on bitscore. '
'Handles additional data columns following bitscore.',
add_help=False)
req = parser.add_argument_group('Required')
req.add_argument('-i', '--infile', required=True, metavar='FILE',
help='input file in NCBI\'s BLAST -outfmt 6 format')
opt = parser.add_argument_group('Optional')
opt.add_argument('-h', '--help', action='help',
help='show this help message and exit')
opt.add_argument('-c', '--column', type=int, metavar='{1,2}',
choices=[1, 2], default=1, help='report best hit per query label '
'(1st column; \'1\') or target (2nd column; \'2\') [1]')
opt.add_argument('-o', '--outfile', metavar='FILE',
default=None, help='output file [stdout]')
opt.add_argument('-s', '--bitscore', type=float, metavar='FLOAT',
default=0.0, help='minimum alignment Bit score [0.0]')
return parser.parse_args()
def main():
opts = parseArgs()
infile = os.path.abspath(os.path.expanduser(opts.infile))
# Identify unique query labels and will not assume sorted file
with open(infile, 'r') as ifh:
query_labels = set()
for ln in ifh:
query_labels.add(ln.split('\t')[opts.column-1])
num_cols = len(ln.split('\t'))
# Filter best hits
best = {k: ['0']*num_cols for k in query_labels}
with open(infile, 'r') as ifh:
for ln in ifh:
data = ln.rstrip('\n').split('\t')
if float(data[11]) >= opts.bitscore and \
float(data[11]) > float(best[data[opts.column-1]][11]):
best[data[opts.column-1]] = data
# Write output
if opts.outfile is not None:
ofh = open(os.path.abspath(os.path.expanduser(opts.outfile)), 'w')
else:
ofh = sys.stdout
for val in sorted(best.values()):
if val != ['0']*num_cols:
ofh.write('\t'.join(val) + '\n')
if __name__ == '__main__':
main()