1+ #!/usr/bin/env python3
2+ # coding=utf-8
3+
4+ # This file is part of MGnify genome analysis pipeline.
5+ #
6+ # MGnify genome analysis pipeline is free software: you can redistribute it and/or modify
7+ # it under the terms of the GNU General Public License as published by
8+ # the Free Software Foundation, either version 3 of the License, or
9+ # (at your option) any later version.
10+
11+ # MGnify genome analysis pipeline is distributed in the hope that it will be useful,
12+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+ # GNU General Public License for more details.
15+
16+ # You should have received a copy of the GNU General Public License
17+ # along with MGnify genome analysis pipeline. If not, see <https://www.gnu.org/licenses/>.
18+
19+ import argparse
20+ import os
21+
22+
23+ def main (remove_list_file , add_list_file , message , output_file ):
24+ already_in_remove_list = set ()
25+ if os .path .isfile (remove_list_file ):
26+ with open (remove_list_file , 'r' ) as file_in , open (output_file , "w" ) as file_out :
27+ for line in file_in :
28+ if line .startswith ("MGYG" ):
29+ file_out .write (line )
30+ accession = line .split ('\t ' )[0 ].strip ()
31+ already_in_remove_list .add (accession )
32+
33+ with open (add_list_file , 'r' ) as file_in , open (output_file , 'a' ) as file_out :
34+ for line in file_in :
35+ accession = line .strip ()
36+ if accession not in already_in_remove_list :
37+ file_out .write (f"{ accession } \t { message } \n " )
38+
39+
40+ def parse_args ():
41+ parser = argparse .ArgumentParser (description = 'Script adds genomes that existed in the previous version of a '
42+ 'catalogue but failed QC checks during the update to the remove '
43+ 'list. Only genomes that are not already present in the remove '
44+ 'list are added.' )
45+ parser .add_argument ('-r' , '--remove-list' , required = True ,
46+ help = 'Path to the file containing the list of genomes to remove. File should be tab-delimited '
47+ 'with the MGYG accession in the first column and reason for removal in the second.' )
48+ parser .add_argument ('-a' , '--add-list' , required = True ,
49+ help = 'Path to the file containing the list of genomes to add to the remove list.' )
50+ parser .add_argument ('-m' , '--message' , required = True ,
51+ help = 'Reason for removal that will be printed to the remove file.' )
52+ parser .add_argument ('-o' , '--output' , required = True ,
53+ help = 'Name of the output file.' )
54+ return parser .parse_args ()
55+
56+
57+ if __name__ == '__main__' :
58+ args = parse_args ()
59+ main (args .remove_list , args .add_list , args .message , args .output )
60+
0 commit comments