Skip to content

Commit f76afd0

Browse files
v5.1.
1 parent 43bd6c1 commit f76afd0

File tree

12 files changed

+2669
-2602
lines changed

12 files changed

+2669
-2602
lines changed

CAT_pack/CAT

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,43 +12,49 @@ import summarise
1212

1313

1414
def usage():
15-
message = ('usage: CAT (prepare | contigs | bin | bins | add_names | '
16-
'summarise) [-v / --version] [-h / --help]\n'
17-
'CAT: error: one of the arguments prepare contigs bin bins '
18-
'add_names summarise is required')
15+
message = (
16+
'usage: CAT (prepare | contigs | bin | bins | add_names | '
17+
'summarise) [-v / --version] [-h / --help]\n'
18+
'CAT: error: one of the arguments prepare contigs bin bins '
19+
'add_names summarise is required')
1920

2021
sys.stdout.write('{0}\n'.format(message))
22+
23+
return
2124

2225

2326
def version():
24-
message = ('CAT v{0} ({1}) by {2}.'
25-
''.format(about.__version__, about.__date__, about.__author__))
27+
message = ('CAT v{0} ({1}) by {2}.'.format(
28+
about.__version__, about.__date__, about.__author__))
2629

2730
sys.stdout.write('{0}\n'.format(message))
31+
32+
return
2833

2934

3035
def help():
31-
message = ('usage: CAT (prepare | contigs | bin | bins | add_names | '
32-
'summarise) [-v / --version] [-h / --help]\n\n'
33-
'Run Contig Annotation Tool (CAT) or '
34-
'Bin Annotation Tool (BAT).\n\n'
35-
'Required choice:\n'
36-
' prepare\t\tDownload database files and construct '
37-
'databases.\n'
38-
' contigs\t\tRun CAT.\n'
39-
' bin\t\t\tRun BAT on a single bin.\n'
40-
' bins\t\t\tRun BAT on a set of bins.\n'
41-
' add_names\t\tAdd taxonomic names to CAT or BAT output '
42-
'files.\n'
43-
' summarise\t\tSummarise a named CAT or BAT classification '
44-
'file.\n\n'
45-
'Optional arguments:\n'
46-
' -v, --version\t\tPrint version information and exit.\n'
47-
' -h, --help\t\tShow this help message and exit.')
36+
message = (
37+
'usage: CAT (prepare | contigs | bin | bins | add_names | '
38+
'summarise) [-v / --version] [-h / --help]\n\n'
39+
'Run Contig Annotation Tool (CAT) or '
40+
'Bin Annotation Tool (BAT).\n\n'
41+
'Required choice:\n'
42+
' prepare\t\tDownload database files and construct databases.\n'
43+
' contigs\t\tRun CAT.\n'
44+
' bin\t\t\tRun BAT on a single bin.\n'
45+
' bins\t\t\tRun BAT on a set of bins.\n'
46+
' add_names\t\tAdd taxonomic names to CAT or BAT output files.\n'
47+
' summarise\t\tSummarise a named CAT or BAT classification file.'
48+
'\n\n'
49+
'Optional arguments:\n'
50+
' -v, --version\t\tPrint version information and exit.\n'
51+
' -h, --help\t\tShow this help message and exit.')
4852

4953
sys.stdout.write('{0}\n'.format(message))
50-
51-
54+
55+
return
56+
57+
5258
def main():
5359
if len(sys.argv) == 1:
5460
usage()
@@ -71,6 +77,8 @@ def main():
7177
else:
7278
usage()
7379

80+
return
81+
7482

7583
if __name__ == '__main__':
7684
main()

CAT_pack/about.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
22

33
__author__ = 'F. A. Bastiaan von Meijenfeldt'
4-
__version__ = '5.0.5.1'
5-
__date__ = '18 June, 2020'
4+
__version__ = '5.1'
5+
__date__ = '22 June, 2020'

CAT_pack/add_names.py

Lines changed: 108 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python3
22

33
import argparse
4-
import os
54
import sys
65

76
import about
@@ -11,115 +10,121 @@
1110

1211

1312
def parse_arguments():
14-
parser = argparse.ArgumentParser(prog='CAT add_names',
15-
description='Add taxonomic names to CAT '
16-
'or BAT output files.',
17-
usage='CAT add_names -i -o -t '
18-
'[options] [-h / --help]',
19-
add_help=False)
13+
parser = argparse.ArgumentParser(
14+
prog='CAT add_names',
15+
description='Add taxonomic names to CAT or BAT output files.',
16+
usage='CAT add_names -i -o -t [options] [-h / --help]',
17+
add_help=False)
2018

2119
required = parser.add_argument_group('Required arguments')
2220

23-
required.add_argument('-i',
24-
'--input_file',
25-
dest='input_file',
26-
metavar='',
27-
required=True,
28-
type=str,
29-
help='Path to input file. Can be either '
30-
'classification output file or ORF2LCA output '
31-
'file.')
32-
required.add_argument('-o',
33-
'--output_file',
34-
dest='output_file',
35-
metavar='',
36-
required=True,
37-
type=str,
38-
help='Path to output file.')
39-
required.add_argument('-t',
40-
'--taxonomy_folder',
41-
dest='taxonomy_folder',
42-
metavar='',
43-
required=True,
44-
type=str,
45-
help='Path to folder that contains taxonomy files.')
21+
required.add_argument(
22+
'-i',
23+
'--input_file',
24+
dest='input_file',
25+
metavar='',
26+
required=True,
27+
type=str,
28+
action=shared.PathAction,
29+
help=('Path to input file. Can be either classification output '
30+
'file or ORF2LCA output file.'))
31+
required.add_argument(
32+
'-o',
33+
'--output_file',
34+
dest='output_file',
35+
metavar='',
36+
required=True,
37+
type=str,
38+
action=shared.PathAction,
39+
help='Path to output file.')
40+
required.add_argument(
41+
'-t',
42+
'--taxonomy_folder',
43+
dest='taxonomy_folder',
44+
metavar='',
45+
required=True,
46+
type=str,
47+
action=shared.PathAction,
48+
help='Path to folder that contains taxonomy files.')
4649

4750
optional = parser.add_argument_group('Optional arguments')
4851

49-
optional.add_argument('--only_official',
50-
dest='only_official',
51-
required=False,
52-
action='store_true',
53-
help='Only output official level names.')
54-
optional.add_argument('--exclude_scores',
55-
dest='exclude_scores',
56-
required=False,
57-
action='store_true',
58-
help='Do not include bit-score support scores in '
59-
'the lineage.')
60-
optional.add_argument('--force',
61-
dest='force',
62-
required=False,
63-
action='store_true',
64-
help='Force overwrite existing files.')
65-
optional.add_argument('-q',
66-
'--quiet',
67-
dest='quiet',
68-
required=False,
69-
action='store_true',
70-
help='Suppress verbosity.')
71-
optional.add_argument('-h',
72-
'--help',
73-
action='help',
74-
help='Show this help message and exit.')
52+
optional.add_argument(
53+
'--only_official',
54+
dest='only_official',
55+
required=False,
56+
action='store_true',
57+
help=('Only output official rank names (i.e., superkingdom, '
58+
'phylum, class, order, family, genus, species).'))
59+
optional.add_argument(
60+
'--exclude_scores',
61+
dest='exclude_scores',
62+
required=False,
63+
action='store_true',
64+
help=('Do not include bit-score support scores in the lineage of '
65+
'a classification output file.'))
66+
optional.add_argument(
67+
'--force',
68+
dest='force',
69+
required=False,
70+
action='store_true',
71+
help='Force overwrite existing files.')
72+
optional.add_argument(
73+
'-q',
74+
'--quiet',
75+
dest='quiet',
76+
required=False,
77+
action='store_true',
78+
help='Suppress verbosity.')
79+
optional.add_argument(
80+
'-h',
81+
'--help',
82+
action='help',
83+
help='Show this help message and exit.')
7584

7685
(args, extra_args) = parser.parse_known_args()
7786

7887
extra_args = [arg for (i, arg) in enumerate(extra_args) if
7988
(i, arg) != (0, 'add_names')]
8089
if len(extra_args) > 0:
81-
sys.exit('error: too much arguments supplied:\n{0}'
82-
''.format('\n'.join(extra_args)))
90+
sys.exit('error: too much arguments supplied:\n{0}'.format(
91+
'\n'.join(extra_args)))
92+
93+
# Add extra arguments.
94+
shared.expand_arguments(args)
8395

8496
return args
8597

8698

87-
def add_names(args):
88-
(input_file,
89-
output_file,
90-
taxonomy_folder,
91-
only_official,
92-
exclude_scores,
93-
force,
94-
quiet) = check.convert_arguments(args)
99+
def run():
100+
args = parse_arguments()
95101

96-
# Currently add_names does not allow for a log file.
97-
log_file = None
98-
99102
message = '# CAT v{0}.'.format(about.__version__)
100-
shared.give_user_feedback(message, log_file, quiet, show_time=False)
103+
shared.give_user_feedback(message, args.log_file, args.quiet,
104+
show_time=False)
101105

102106
errors = []
103107

104-
errors.append(check.check_input_file(input_file, log_file, quiet))
108+
errors.append(
109+
check.check_input_file(args.input_file, args.log_file, args.quiet))
105110

106-
if not force:
107-
errors.append(check.check_output_file(output_file, log_file, quiet))
111+
if not args.force:
112+
errors.append(
113+
check.check_output_file(
114+
args.output_file, args.log_file, args.quiet))
108115

109116
if True in errors:
110117
sys.exit(1)
111-
112-
(nodes_dmp,
113-
names_dmp,
114-
prot_accession2taxid_file) = check.inspect_taxonomy_folder(taxonomy_folder)
115118

116-
(taxid2parent, taxid2rank) = tax.import_nodes(nodes_dmp, log_file, quiet)
117-
taxid2name = tax.import_names(names_dmp, log_file, quiet)
119+
(taxid2parent,
120+
taxid2rank) = tax.import_nodes(
121+
args.nodes_dmp, args.log_file, args.quiet)
122+
taxid2name = tax.import_names(args.names_dmp, args.log_file, args.quiet)
118123

119124
message = 'Appending names...'
120-
shared.give_user_feedback(message, log_file, quiet)
125+
shared.give_user_feedback(message, args.log_file, args.quiet)
121126

122-
with open(input_file, 'r') as f1:
127+
with open(args.input_file, 'r') as f1:
123128
for line in f1:
124129
if line.startswith('#'):
125130
line = line.rstrip().split('\t')
@@ -128,11 +133,9 @@ def add_names(args):
128133
lineage_index = line.index('lineage')
129134
except:
130135
message = ('{0} is not a supported classification file.'
131-
''.format(input_file))
132-
shared.give_user_feedback(message,
133-
log_file,
134-
quiet,
135-
error=True)
136+
''.format(input_file))
137+
shared.give_user_feedback(
138+
message, args.log_file, args.quiet, error=True)
136139

137140
sys.exit(1)
138141

@@ -145,20 +148,20 @@ def add_names(args):
145148

146149
break
147150
else:
148-
message = ('{0} is not a supported classification file.'
149-
''.format(input_file))
151+
message = ('{0} is not a supported classification file.'.format(
152+
args.input_file))
150153
shared.give_user_feedback(message, log_file, quiet, error=True)
151154

152155
sys.exit(1)
153156

154-
with open(input_file, 'r') as f1, open(output_file, 'w') as outf1:
157+
with open(args.input_file, 'r') as f1, open(args.output_file, 'w') as outf1:
155158
for line in f1:
156159
line = line.rstrip()
157160

158161
if line.startswith('#'):
159-
if only_official:
162+
if args.only_official:
160163
outf1.write('{0}\tsuperkingdom\tphylum\tclass\torder\t'
161-
'family\tgenus\tspecies\n'.format(line))
164+
'family\tgenus\tspecies\n'.format(line))
162165
else:
163166
outf1.write('{0}\tfull lineage names\n'.format(line))
164167

@@ -173,7 +176,7 @@ def add_names(args):
173176
continue
174177

175178
if (line[1].startswith('no taxid found') or
176-
line[2].startswith('no taxid found')):
179+
line[2].startswith('no taxid found')):
177180
# ORF has database hits but the accession number is not found
178181
# in the taxonomy files.
179182
outf1.write('{0}\n'.format('\t'.join(line)))
@@ -182,34 +185,26 @@ def add_names(args):
182185

183186
lineage = line[lineage_index].split(';')
184187

185-
if scores_index and not exclude_scores:
188+
if scores_index and not args.exclude_scores:
186189
scores = line[scores_index].split(';')
187190
else:
188191
scores = None
189192

190-
if only_official:
191-
names = tax.convert_to_official_names(lineage,
192-
taxid2rank,
193-
taxid2name,
194-
scores)
193+
if args.only_official:
194+
names = tax.convert_to_official_names(
195+
lineage, taxid2rank, taxid2name, scores)
195196
else:
196-
names = tax.convert_to_names(lineage,
197-
taxid2rank,
198-
taxid2name,
199-
scores)
197+
names = tax.convert_to_names(
198+
lineage, taxid2rank, taxid2name, scores)
200199

201200
outf1.write('{0}\t{1}\n'.format('\t'.join(line), '\t'.join(names)))
202201

203-
message = 'Names written to {0}!'.format(output_file)
204-
shared.give_user_feedback(message, log_file, quiet)
205-
206-
207-
def run():
208-
args = parse_arguments()
202+
message = 'Names written to {0}!'.format(args.output_file)
203+
shared.give_user_feedback(message, args.log_file, args.quiet)
204+
205+
return
206+
209207

210-
add_names(args)
211-
212-
213208
if __name__ == '__main__':
214-
sys.exit('Please run \'CAT add_names\' to add taxonomic names to CAT or '
215-
'BAT output files.')
209+
sys.exit('Run \'CAT add_names\' to add taxonomic names to CAT or BAT '
210+
'output files.')

0 commit comments

Comments
 (0)