-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpathfinder2seq.pl
More file actions
executable file
·59 lines (50 loc) · 1.18 KB
/
pathfinder2seq.pl
File metadata and controls
executable file
·59 lines (50 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/perl -w
# Turn a GFA + pathfinder PATH outputs to sequences by copying the
# elements out of the GFA and stitching them together.
# Usage: pathfinder foo.gfa | pathfinder2seq.pl foo.gfa > foo.consensus.fa
# Parse GFA; minimally
open(my $gfa, "<", shift(@ARGV)) || die;
while (<$gfa>) {
chomp($_);
my @F = split(/\s+/, $_);
next unless scalar(@F) && $F[0] eq "S"; # skip other fields for now
$gfa{$F[1]}{seq} = uc($F[2]);
}
# Parse the path
my @path = ();
my $in_path=0;
my $contig=0;
sub print_path {
$contig++;
print ">contig_$contig\n";
my $seq = "";
foreach my $p (@_) {
my ($dir,$node) = @{$p};
my $gseq = $gfa{$node}{seq};
if ($dir eq "-") {
$gseq =~ tr/ACGT/TGCA/;
$gseq = reverse($gseq);
}
$seq .= $gseq;
}
print "$seq\n";
}
while (<>) {
if (!/^\[/) {
if (/^PATH/) {
#print ">contig_$contig\n";
print_path @path if (scalar(@path));
$in_path = 1;
@path = ();
} else {
$in_path = 0;
}
next;
}
next unless $in_path;
chomp();
print STDERR "in path $contig: $_\n";
my ($node,$dir) = ($_=~m/(\S+)([-+])$/);
push(@path, [$dir, $node]);
}
print_path @path if (scalar(@path));