-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge_kmer2node.pl
More file actions
executable file
·95 lines (86 loc) · 2.76 KB
/
merge_kmer2node.pl
File metadata and controls
executable file
·95 lines (86 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl -w
# Loads multiple kmer2node outputs and merges them.
# This is a simple summation, but we may wish to use weighted sums
# or a union in some manner.
use strict;
my %node; # keyed on node name
my %edge; # keyed on node1,dir1,node2,dir2
# Parse GFA edges
my $gfa = shift(@ARGV);
open(my $fh, "<", $gfa) || die;
while (<$fh>) {
if (/^L/) {
my ($n1,$d1,$n2,$d2) = m/^L\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
$edge{"$n1.$d1.$n2.$d2"} = 0;
#print "Set edge $n1.$d1.$n2.$d2\n";
}
}
close($fh);
foreach my $file (@ARGV) {
open(my $fh, "<", $file) || die;
while (<$fh>) {
chomp();
if (/Node/) {
my @F = split(/\s+/, $_);
if (exists($node{$F[1]})) {
$node{$F[1]}[-1] = $F[-1] if ($F[-1] > $node{$F[1]}[-1]);
#$node{$F[1]}[-1] += $F[-1];
} else {
$node{$F[1]} = \@F;
}
} else {
if (/Edge/) {
my ($n1,$d1,$n2,$d2,$c)
= m/^Edge\s+(\S+)(\S)\s+(\S+)(\S)\s+(\d+)/;
my $d1m = $d2 eq "+" ? "-" : ($d2 eq "-" ? "+" : "b");
my $d2m = $d1 eq "+" ? "-" : ($d1 eq "-" ? "+" : "b");
my $e1 = "$n1.$d1.$n2.$d2";
my $e2 = "$n2.$d1m.$n1.$d2m";
#print "Edge\t$e1\t$c\t",exists($edge{$e1}) ? 1 : 0,"\n";
#print "Edge\t$e2\t$c\t",exists($edge{$e2}) ? 1 : 0,"\n";
if (exists($edge{$e1})) {
# Basic fwd strand case
$edge{$e1} += $c;
} elsif (exists($edge{$e2})) {
# Basic reverse-complement case
$edge{$e2} += $c;
} elsif ($d2 eq "b" && $d1 ne "b") {
# Second element in both orientations but we can resolve.
# We distribute evenly if both are found.
my $p = exists($edge{"$n1.$d1.$n2.+"});
my $m = exists($edge{"$n1.$d1.$n2.-"});
$edge{"$n1.$d1.$n2.+"} += $c / ($p+$m) if $p;
$edge{"$n1.$d1.$n2.-"} += $c / ($p+$m) if $m;
$p = exists($edge{"$n2.$d1m.$n1.+"});
$m = exists($edge{"$n2.$d1m.$n1.-"});
$edge{"$n2.$d1m.$n2.+"} += $c / ($p+$m) if $p;
$edge{"$n2.$d1m.$n1.-"} += $c / ($p+$m) if $m;
} elsif ($d1 eq "b" && $d2 ne "b") {
# First element in both orientations but we can resolve.
# We distribute evenly if both are found.
my $p = exists($edge{"$n1.+.$n2.$d2"});
my $m = exists($edge{"$n1.-.$n2.$d2"});
$edge{"$n1.+.$n2.$d2"} += $c / ($p+$m) if $p;
$edge{"$n1.-.$n2.$d2"} += $c / ($p+$m) if $m;
$p = exists($edge{"$n2.+.$n1.$d2m"});
$m = exists($edge{"$n2.-.$n1.$d2m"});
$edge{"$n2.+.$n2.$d2m"} += $c / ($p+$m) if $p;
$edge{"$n2.-.$n1.$d2m"} += $c / ($p+$m) if $m;
} else {
# Both first and second elements existing in two dirs.
# Not implemented yet.
}
}
}
}
close($fh);
}
$"="\t";
foreach (sort keys %node) {
print "@{$node{$_}}\n";
}
foreach (sort keys %edge) {
next unless $edge{$_} > 0;
m/(\S+)\.(\S+)\.(\S+)\.(\S+)/;
print "Edge\t$1$2\t$3$4\t$edge{$_}\n";
}