-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta_to_separated_union.py
executable file
·59 lines (49 loc) · 1.46 KB
/
fasta_to_separated_union.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
import argparse
from fastaq import *
parser = argparse.ArgumentParser(
description = 'Makes union flie from FASTA, with separator between sequences to stop reads mapping',
usage = '%(prog)s [options] <fasta in> <outfiles prefix>')
parser.add_argument('infile', help='Name of input fasta file')
parser.add_argument('outprefix', help='Prefix of output files')
parser.add_argument('--seqname', help='Name of output sequence [%(default)s]', default='union')
options = parser.parse_args()
seq_reader = sequences.file_reader(options.infile)
seq_out = sequences.Fasta('union', '')
gff_lines = []
total_bases = 0
filler_seq = 'n' * 100 + 'g' * 100 + 'n' * 100
for seq in seq_reader:
seq_out.seq += seq.seq
gff_lines.append('\t'.join([
options.seqname,
'Contig',
seq.id,
str(total_bases + 1),
str(total_bases + len(seq)),
'.',
'.',
'.',
'color=3',
]))
seq_out.seq += filler_seq
total_bases += len(seq)
gff_lines.append('\t'.join([
options.seqname,
'Fake',
'Fake',
str(total_bases + 1),
str(total_bases + len(filler_seq)),
'.',
'.',
'.',
'color=2',
]))
total_bases += len(filler_seq)
f = utils.open_file_write(options.outprefix + '.fa')
print(seq_out, file=f)
utils.close(f)
f = utils.open_file_write(options.outprefix + '.gff')
for line in gff_lines:
print(line, file=f)
utils.close(f)