-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFeatur.py
More file actions
executable file
·89 lines (89 loc) · 2.61 KB
/
Featur.py
File metadata and controls
executable file
·89 lines (89 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#! /usr/bin/env python
# -*- coding: utf-8 -*-
'''''''''''''''''''''''''''''''''''''''''''''
此程序是为了提取出断裂区域的序列,可把出来的文件直接覆盖V1的Feature的
CDS和nucl两个文件。
'''''''''''''''''''''''''''''''''''''''''''''
import sys
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
scgene,faf,tblf=sys.argv
def main(faf,tblf):
fa=SeqIO.read(faf,'fasta')
seq=str(fa.seq)
tbl=open(tblf,'r')
out=open("CDS.tbl",'w')
out1=open("nucl.tbl",'w')
tlist=[]
for line in tbl:
tlist.append(line.strip().split())
tlist=tlist[1:]
list=[]
for i in tlist:
if i != []:
list.append(i)
a=[i for i,x in enumerate(list) if list[i][0]=='gene']
b=[i for i ,x in enumerate(list) if list[i][0]=='product']
for i in range(len(b)):
c=int(a[i])
d=int(b[i])
type=list[c+1][2]
gene=list[c][1]
product="%s=%s" %(list[d][0],' '.join(list[d][1:]))
if int(d)-int(c)<=2:
if int(list[c-1][0]) <int(list[c-1][1]):
start=int(list[c-1][0])-1
end=int(list[c-1][1])
seq2=str(seq[start:end])
start1=start+1
end1=end
if int(list[c-1][1]) <int(list[c-1][0]):
start=int(list[c-1][1])-1
end=int(list[c-1][0])
seq1=Seq(seq[start:end],IUPAC.unambiguous_dna)
seq2=seq1.reverse_complement()
start1="complment[("+str(start+1)+".."
end1=str(end)+")]"
product="%s=%s" %(list[d][0],' '.join(list[d][1:]))
out1.write(">%s [%s] [(%s..%s)]\n%s\n" %(gene,product,start1,end1,seq2))
if type=="CDS":
out.write(">%s [%s] [(%s..%s)]\n%s\n" %(gene,product,start1,end1,seq2))
if int(d)-int(c)>2:
e=[]
f=[]
z=[]
g="no"
type=list[c+1][2]
gene=list[c][1]
product="%s=%s" %(list[d][0],' '.join(list[d][1:]))
for x in range(c+1,d,1):
if int(list[x][0]) < int(list[x][1]):
start=int(list[x][0])-1
end=int(list[x][1])
f.append(seq[start:end])
e.append("(%s..%s)" % (start+1,end))
if int(list[x][0]) > int(list[x][1]):
start=int(list[x][1])-1
end=int(list[x][0])
seq1=Seq(seq[start:end],IUPAC.unambiguous_dna)
seq2=seq1.reverse_complement()
f.append(str(seq2))
z.append([start+1,end])
z.sort()
g='yes'
for y in z :
e.append('(%s..%s)' % (y[0],y[1]))
seq3=str(''.join(f))
sub=' '.join(e)
if g =="no":
out1.write(">%s [%s] [%s]\n%s\n" %(gene,product,sub,seq3))
if type=="CDS":
out.write(">%s [%s] [%s]\n%s\n" %(gene,product,sub,seq3))
if g =="yes":
out1.write(">%s [%s] complment[%s]\n%s\n" %(gene,product,sub,seq3))
if type=="CDS":
out.write(">%s [%s] complment[%s]\n%s\n" %(gene,product,sub,seq3))
out.close()
out1.close()
main(faf,tblf)