1
1
import re
2
2
3
3
from Bio import SeqIO
4
+
4
5
from .bidirectionalmap .bidirectionalmap import BidirectionalMap
5
6
7
+
6
8
def get_contig_lengths_spades (contigs_file ):
7
9
# Get length and coverage of contigs
8
10
contig_lengths = {}
@@ -11,26 +13,25 @@ def get_contig_lengths_spades(contigs_file):
11
13
my_map = BidirectionalMap ()
12
14
13
15
for index , record in enumerate (SeqIO .parse (contigs_file , "fasta" )):
14
- start = ' NODE_'
15
- end = ' _length'
16
- contig_num = int (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 ))
17
-
18
- start = ' _length_'
19
- end = ' _cov'
20
- length = int (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 ))
21
-
22
- start = ' _cov_'
23
- end = ''
24
- coverage = int (float (re .search (' %s(.*)%s' % (start , end ), record .id ).group (1 )))
25
-
16
+ start = " NODE_"
17
+ end = " _length"
18
+ contig_num = int (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 ))
19
+
20
+ start = " _length_"
21
+ end = " _cov"
22
+ length = int (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 ))
23
+
24
+ start = " _cov_"
25
+ end = ""
26
+ coverage = int (float (re .search (" %s(.*)%s" % (start , end ), record .id ).group (1 )))
27
+
26
28
contig_lengths [contig_num ] = length
27
29
coverages [contig_num ] = coverage
28
30
29
31
return contig_lengths , coverages
30
32
31
33
32
34
def get_contig_paths_spades (contig_paths ):
33
-
34
35
paths = {}
35
36
segment_contigs = {}
36
37
node_count = 0
@@ -44,34 +45,33 @@ def get_contig_paths_spades(contig_paths):
44
45
with open (contig_paths ) as file :
45
46
name = file .readline ()
46
47
path = file .readline ()
47
-
48
+
48
49
while name != "" and path != "" :
49
-
50
50
while ";" in path :
51
- path = path [:- 2 ]+ "," + file .readline ()
52
-
53
- start = ' NODE_'
54
- end = ' _length_'
55
- contig_num = str (int (re .search (' %s(.*)%s' % (start , end ), name ).group (1 )))
56
-
51
+ path = path [:- 2 ] + "," + file .readline ()
52
+
53
+ start = " NODE_"
54
+ end = " _length_"
55
+ contig_num = str (int (re .search (" %s(.*)%s" % (start , end ), name ).group (1 )))
56
+
57
57
segments = path .rstrip ().split ("," )
58
58
59
59
if current_contig_num != contig_num :
60
60
my_map [node_count ] = int (contig_num )
61
61
contig_names [node_count ] = name .strip ()
62
62
current_contig_num = contig_num
63
63
node_count += 1
64
-
64
+
65
65
if contig_num not in paths :
66
66
paths [contig_num ] = [segments [0 ], segments [- 1 ]]
67
-
67
+
68
68
for segment in segments :
69
69
if segment not in segment_contigs :
70
70
segment_contigs [segment ] = set ([contig_num ])
71
71
else :
72
72
segment_contigs [segment ].add (contig_num )
73
-
73
+
74
74
name = file .readline ()
75
75
path = file .readline ()
76
76
77
- return my_map ,
77
+ return ( my_map ,)
0 commit comments