-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_pairs_orthofinder_qfo.py
More file actions
executable file
·114 lines (84 loc) · 4.11 KB
/
extract_pairs_orthofinder_qfo.py
File metadata and controls
executable file
·114 lines (84 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# See the NOTICE file distributed with this work for additional information
# regarding copyright ownership.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Write OrthoFinder pairwise predictions to a file compatible with Orthology Benchmark Service.
Typical usage examples::
$ python extract_pairs_orthofinder_qfo.py \
--predictions /path/to/OrthoFinder/Results_XXXXX/Orthologues \
--out_file /path/to/output/file.txt
"""
import argparse
import os
from typing import List
import warnings
def extract_orthologous_pairs(input_file: str) -> List[tuple]:
"""Extracts pairs of putative orthologues from a single OrthoFinder output file.
Args:
input_file: Path to the orthologues spreadsheet produced by OrthoFinder.
Returns:
A list of tuples where each tuple contains UniProtKB accessions for a pair of putative orthologues.
Warns:
UserWarning: If `input_file` cannot be opened or if a line in `input_file` is corrupted.
"""
orthologues = []
try:
with open(input_file) as file_handler:
for line in file_handler:
if "Orthogroup" in line:
continue
try:
temp = line.replace("\n", "").split("\t")
reference_genes = temp[1].replace(" ", "").split(",")
target_genes = temp[2].replace(" ", "").split(",")
except IndexError:
warnings.warn(f"There is an issue with file '{input_file}' in line '{line}'.")
continue
for reference in reference_genes:
reference_id = reference.split("|")[1]
for target in target_genes:
orthologues.append((reference_id, target.split("|")[1]))
except EnvironmentError:
warnings.warn(f"Could not open file '{input_file}'.")
return orthologues
def write_orthologous_pairs(orthologous_pairs: List[tuple], output_file: str) -> None:
"""Appends pairs of orthologues to the end of a specified file.
Args:
orthologous_pairs: A list of tuples representing pairs of putative orthologues.
output_file: Path to the output file containing pairs of putative orthologues.
"""
with open(output_file, "a") as file_handler:
for pair in orthologous_pairs:
file_handler.write(pair[0] + "\t" + pair[1] + "\n")
def process_orthofinder_predictions(predictions: str, out_file: str) -> None:
"""Extracts OrthoFinder pairwise predictions and writes them to a single output file.
Predictions are made on a QfO dataset and the code relies on the formatting of QfO fasta files.
Output file is compatible with Orthology Benchmark Service.
Args:
predictions: Path to OrthoFinder's 'Orthologues' directory.
output_file: Path to the output file containing pairs of putative orthologues.
"""
predictions_subdirs = [f.path for f in os.scandir(predictions) if f.is_dir()]
for dir in predictions_subdirs:
files = [f.path for f in os.scandir(dir) if f.is_file() and f.path.endswith(".tsv")]
for file in files:
orthologs = extract_orthologous_pairs(file)
write_orthologous_pairs(orthologs, out_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--predictions", required=True, type=str,
help="Path to OrthoFinder's 'Orthologues' directory")
parser.add_argument("--out_file", required=True, type=str, help="Output file")
args = parser.parse_args()
process_orthofinder_predictions(args.predictions, args.out_file)