Skip to content

Commit 4343025

Browse files
authored
Merge pull request #128 from EBI-Metagenomics/dev
Dev
2 parents acd4a6f + 6753b02 commit 4343025

File tree

128 files changed

+8863
-1615
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+8863
-1615
lines changed

README.md

Lines changed: 82 additions & 39 deletions
Large diffs are not rendered by default.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"ftp": [
3+
"genomes-all_metadata.tsv",
4+
"all_genomes",
5+
"species_catalogue",
6+
"all_genomes.msh"
7+
],
8+
"additional_data": [
9+
"panaroo_output",
10+
"mgyg_genomes",
11+
{
12+
"intermediate_files": [
13+
"extra_weight_table.txt",
14+
"renamed_genomes_name_mapping.tsv"
15+
]
16+
}
17+
]
18+
}
19+

bin/add_genomes_to_remove_list.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env python3
2+
# coding=utf-8
3+
4+
# This file is part of MGnify genome analysis pipeline.
5+
#
6+
# MGnify genome analysis pipeline is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
11+
# MGnify genome analysis pipeline is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
16+
# You should have received a copy of the GNU General Public License
17+
# along with MGnify genome analysis pipeline. If not, see <https://www.gnu.org/licenses/>.
18+
19+
import argparse
20+
import os
21+
22+
23+
def main(remove_list_file, add_list_file, message, output_file):
24+
already_in_remove_list = set()
25+
if os.path.isfile(remove_list_file):
26+
with open(remove_list_file, 'r') as file_in, open(output_file, "w") as file_out:
27+
for line in file_in:
28+
if line.startswith("MGYG"):
29+
file_out.write(line)
30+
accession = line.split('\t')[0].strip()
31+
already_in_remove_list.add(accession)
32+
33+
with open(add_list_file, 'r') as file_in, open(output_file, 'a') as file_out:
34+
for line in file_in:
35+
accession = line.strip()
36+
if accession not in already_in_remove_list:
37+
file_out.write(f"{accession}\t{message}\n")
38+
39+
40+
def parse_args():
41+
parser = argparse.ArgumentParser(description='Script adds genomes that existed in the previous version of a '
42+
'catalogue but failed QC checks during the update to the remove '
43+
'list. Only genomes that are not already present in the remove '
44+
'list are added.')
45+
parser.add_argument('-r', '--remove-list', required=True,
46+
help='Path to the file containing the list of genomes to remove. File should be tab-delimited '
47+
'with the MGYG accession in the first column and reason for removal in the second.')
48+
parser.add_argument('-a', '--add-list', required=True,
49+
help='Path to the file containing the list of genomes to add to the remove list.')
50+
parser.add_argument('-m', '--message', required=True,
51+
help='Reason for removal that will be printed to the remove file.')
52+
parser.add_argument('-o', '--output', required=True,
53+
help='Name of the output file.')
54+
return parser.parse_args()
55+
56+
57+
if __name__ == '__main__':
58+
args = parse_args()
59+
main(args.remove_list, args.add_list, args.message, args.output)
60+

0 commit comments

Comments
 (0)