11#!/usr/bin/env python3
22
33import argparse
4- import os
54import sys
65
76import about
1110
1211
1312def parse_arguments ():
14- parser = argparse .ArgumentParser (prog = 'CAT add_names' ,
15- description = 'Add taxonomic names to CAT '
16- 'or BAT output files.' ,
17- usage = 'CAT add_names -i -o -t '
18- '[options] [-h / --help]' ,
19- add_help = False )
13+ parser = argparse .ArgumentParser (
14+ prog = 'CAT add_names' ,
15+ description = 'Add taxonomic names to CAT or BAT output files.' ,
16+ usage = 'CAT add_names -i -o -t [options] [-h / --help]' ,
17+ add_help = False )
2018
2119 required = parser .add_argument_group ('Required arguments' )
2220
23- required .add_argument ('-i' ,
24- '--input_file' ,
25- dest = 'input_file' ,
26- metavar = '' ,
27- required = True ,
28- type = str ,
29- help = 'Path to input file. Can be either '
30- 'classification output file or ORF2LCA output '
31- 'file.' )
32- required .add_argument ('-o' ,
33- '--output_file' ,
34- dest = 'output_file' ,
35- metavar = '' ,
36- required = True ,
37- type = str ,
38- help = 'Path to output file.' )
39- required .add_argument ('-t' ,
40- '--taxonomy_folder' ,
41- dest = 'taxonomy_folder' ,
42- metavar = '' ,
43- required = True ,
44- type = str ,
45- help = 'Path to folder that contains taxonomy files.' )
21+ required .add_argument (
22+ '-i' ,
23+ '--input_file' ,
24+ dest = 'input_file' ,
25+ metavar = '' ,
26+ required = True ,
27+ type = str ,
28+ action = shared .PathAction ,
29+ help = ('Path to input file. Can be either classification output '
30+ 'file or ORF2LCA output file.' ))
31+ required .add_argument (
32+ '-o' ,
33+ '--output_file' ,
34+ dest = 'output_file' ,
35+ metavar = '' ,
36+ required = True ,
37+ type = str ,
38+ action = shared .PathAction ,
39+ help = 'Path to output file.' )
40+ required .add_argument (
41+ '-t' ,
42+ '--taxonomy_folder' ,
43+ dest = 'taxonomy_folder' ,
44+ metavar = '' ,
45+ required = True ,
46+ type = str ,
47+ action = shared .PathAction ,
48+ help = 'Path to folder that contains taxonomy files.' )
4649
4750 optional = parser .add_argument_group ('Optional arguments' )
4851
49- optional .add_argument ('--only_official' ,
50- dest = 'only_official' ,
51- required = False ,
52- action = 'store_true' ,
53- help = 'Only output official level names.' )
54- optional .add_argument ('--exclude_scores' ,
55- dest = 'exclude_scores' ,
56- required = False ,
57- action = 'store_true' ,
58- help = 'Do not include bit-score support scores in '
59- 'the lineage.' )
60- optional .add_argument ('--force' ,
61- dest = 'force' ,
62- required = False ,
63- action = 'store_true' ,
64- help = 'Force overwrite existing files.' )
65- optional .add_argument ('-q' ,
66- '--quiet' ,
67- dest = 'quiet' ,
68- required = False ,
69- action = 'store_true' ,
70- help = 'Suppress verbosity.' )
71- optional .add_argument ('-h' ,
72- '--help' ,
73- action = 'help' ,
74- help = 'Show this help message and exit.' )
52+ optional .add_argument (
53+ '--only_official' ,
54+ dest = 'only_official' ,
55+ required = False ,
56+ action = 'store_true' ,
57+ help = ('Only output official rank names (i.e., superkingdom, '
58+ 'phylum, class, order, family, genus, species).' ))
59+ optional .add_argument (
60+ '--exclude_scores' ,
61+ dest = 'exclude_scores' ,
62+ required = False ,
63+ action = 'store_true' ,
64+ help = ('Do not include bit-score support scores in the lineage of '
65+ 'a classification output file.' ))
66+ optional .add_argument (
67+ '--force' ,
68+ dest = 'force' ,
69+ required = False ,
70+ action = 'store_true' ,
71+ help = 'Force overwrite existing files.' )
72+ optional .add_argument (
73+ '-q' ,
74+ '--quiet' ,
75+ dest = 'quiet' ,
76+ required = False ,
77+ action = 'store_true' ,
78+ help = 'Suppress verbosity.' )
79+ optional .add_argument (
80+ '-h' ,
81+ '--help' ,
82+ action = 'help' ,
83+ help = 'Show this help message and exit.' )
7584
7685 (args , extra_args ) = parser .parse_known_args ()
7786
7887 extra_args = [arg for (i , arg ) in enumerate (extra_args ) if
7988 (i , arg ) != (0 , 'add_names' )]
8089 if len (extra_args ) > 0 :
81- sys .exit ('error: too much arguments supplied:\n {0}'
82- '' .format ('\n ' .join (extra_args )))
90+ sys .exit ('error: too much arguments supplied:\n {0}' .format (
91+ '\n ' .join (extra_args )))
92+
93+ # Add extra arguments.
94+ shared .expand_arguments (args )
8395
8496 return args
8597
8698
87- def add_names (args ):
88- (input_file ,
89- output_file ,
90- taxonomy_folder ,
91- only_official ,
92- exclude_scores ,
93- force ,
94- quiet ) = check .convert_arguments (args )
99+ def run ():
100+ args = parse_arguments ()
95101
96- # Currently add_names does not allow for a log file.
97- log_file = None
98-
99102 message = '# CAT v{0}.' .format (about .__version__ )
100- shared .give_user_feedback (message , log_file , quiet , show_time = False )
103+ shared .give_user_feedback (message , args .log_file , args .quiet ,
104+ show_time = False )
101105
102106 errors = []
103107
104- errors .append (check .check_input_file (input_file , log_file , quiet ))
108+ errors .append (
109+ check .check_input_file (args .input_file , args .log_file , args .quiet ))
105110
106- if not force :
107- errors .append (check .check_output_file (output_file , log_file , quiet ))
111+ if not args .force :
112+ errors .append (
113+ check .check_output_file (
114+ args .output_file , args .log_file , args .quiet ))
108115
109116 if True in errors :
110117 sys .exit (1 )
111-
112- (nodes_dmp ,
113- names_dmp ,
114- prot_accession2taxid_file ) = check .inspect_taxonomy_folder (taxonomy_folder )
115118
116- (taxid2parent , taxid2rank ) = tax .import_nodes (nodes_dmp , log_file , quiet )
117- taxid2name = tax .import_names (names_dmp , log_file , quiet )
119+ (taxid2parent ,
120+ taxid2rank ) = tax .import_nodes (
121+ args .nodes_dmp , args .log_file , args .quiet )
122+ taxid2name = tax .import_names (args .names_dmp , args .log_file , args .quiet )
118123
119124 message = 'Appending names...'
120- shared .give_user_feedback (message , log_file , quiet )
125+ shared .give_user_feedback (message , args . log_file , args . quiet )
121126
122- with open (input_file , 'r' ) as f1 :
127+ with open (args . input_file , 'r' ) as f1 :
123128 for line in f1 :
124129 if line .startswith ('#' ):
125130 line = line .rstrip ().split ('\t ' )
@@ -128,11 +133,9 @@ def add_names(args):
128133 lineage_index = line .index ('lineage' )
129134 except :
130135 message = ('{0} is not a supported classification file.'
131- '' .format (input_file ))
132- shared .give_user_feedback (message ,
133- log_file ,
134- quiet ,
135- error = True )
136+ '' .format (input_file ))
137+ shared .give_user_feedback (
138+ message , args .log_file , args .quiet , error = True )
136139
137140 sys .exit (1 )
138141
@@ -145,20 +148,20 @@ def add_names(args):
145148
146149 break
147150 else :
148- message = ('{0} is not a supported classification file.'
149- '' . format ( input_file ))
151+ message = ('{0} is not a supported classification file.' . format (
152+ args . input_file ))
150153 shared .give_user_feedback (message , log_file , quiet , error = True )
151154
152155 sys .exit (1 )
153156
154- with open (input_file , 'r' ) as f1 , open (output_file , 'w' ) as outf1 :
157+ with open (args . input_file , 'r' ) as f1 , open (args . output_file , 'w' ) as outf1 :
155158 for line in f1 :
156159 line = line .rstrip ()
157160
158161 if line .startswith ('#' ):
159- if only_official :
162+ if args . only_official :
160163 outf1 .write ('{0}\t superkingdom\t phylum\t class\t order\t '
161- 'family\t genus\t species\n ' .format (line ))
164+ 'family\t genus\t species\n ' .format (line ))
162165 else :
163166 outf1 .write ('{0}\t full lineage names\n ' .format (line ))
164167
@@ -173,7 +176,7 @@ def add_names(args):
173176 continue
174177
175178 if (line [1 ].startswith ('no taxid found' ) or
176- line [2 ].startswith ('no taxid found' )):
179+ line [2 ].startswith ('no taxid found' )):
177180 # ORF has database hits but the accession number is not found
178181 # in the taxonomy files.
179182 outf1 .write ('{0}\n ' .format ('\t ' .join (line )))
@@ -182,34 +185,26 @@ def add_names(args):
182185
183186 lineage = line [lineage_index ].split (';' )
184187
185- if scores_index and not exclude_scores :
188+ if scores_index and not args . exclude_scores :
186189 scores = line [scores_index ].split (';' )
187190 else :
188191 scores = None
189192
190- if only_official :
191- names = tax .convert_to_official_names (lineage ,
192- taxid2rank ,
193- taxid2name ,
194- scores )
193+ if args .only_official :
194+ names = tax .convert_to_official_names (
195+ lineage , taxid2rank , taxid2name , scores )
195196 else :
196- names = tax .convert_to_names (lineage ,
197- taxid2rank ,
198- taxid2name ,
199- scores )
197+ names = tax .convert_to_names (
198+ lineage , taxid2rank , taxid2name , scores )
200199
201200 outf1 .write ('{0}\t {1}\n ' .format ('\t ' .join (line ), '\t ' .join (names )))
202201
203- message = 'Names written to {0}!' .format (output_file )
204- shared .give_user_feedback (message , log_file , quiet )
205-
206-
207- def run ():
208- args = parse_arguments ()
202+ message = 'Names written to {0}!' .format (args .output_file )
203+ shared .give_user_feedback (message , args .log_file , args .quiet )
204+
205+ return
206+
209207
210- add_names (args )
211-
212-
213208if __name__ == '__main__' :
214- sys .exit ('Please run \' CAT add_names\' to add taxonomic names to CAT or '
215- 'BAT output files.' )
209+ sys .exit ('Run \' CAT add_names\' to add taxonomic names to CAT or BAT '
210+ ' output files.' )
0 commit comments