@@ -36,7 +36,7 @@ def MEROPSBlast(input, cpus, evalue, tmpdir, output, diamond=True):
36
36
lib .runSubprocess (cmd , '.' , lib .log )
37
37
# parse results
38
38
with open (output , 'w' ) as out :
39
- with open (blast_tmp , 'rU ' ) as results :
39
+ with open (blast_tmp , 'r ' ) as results :
40
40
for qresult in SearchIO .parse (results , "blast-xml" ):
41
41
hits = qresult .hits
42
42
ID = qresult .id
@@ -66,7 +66,7 @@ def SwissProtBlast(input, cpus, evalue, tmpdir, GeneDict, diamond=True):
66
66
# parse results
67
67
counter = 0
68
68
total = 0
69
- with open (blast_tmp , 'rU ' ) as results :
69
+ with open (blast_tmp , 'r ' ) as results :
70
70
for qresult in SearchIO .parse (results , "blast-xml" ):
71
71
hits = qresult .hits
72
72
qlen = qresult .seq_len
@@ -155,7 +155,7 @@ def getEggNogHeaders(input):
155
155
12 eggNOG annot
156
156
'''
157
157
IDi , DBi , OGi , Genei , COGi , Desci = (None ,)* 6
158
- with open (input , 'rU ' ) as infile :
158
+ with open (input , 'r ' ) as infile :
159
159
for line in infile :
160
160
if line .startswith ('#query_name' ): # this is HEADER
161
161
line = line .rstrip ()
@@ -171,14 +171,57 @@ def getEggNogHeaders(input):
171
171
IDi , DBi , OGi , Genei , COGi , Desci = (0 , 8 , 9 , 4 , 11 , 12 )
172
172
return IDi , DBi , OGi , Genei , COGi , Desci
173
173
174
+ def getEggNogHeadersv2 (input ):
175
+ '''
176
+ function to get the headers from eggnog mapper annotations
177
+ web-based eggnog mapper has no header....
178
+ 1. query_name
179
+ 2. seed eggNOG ortholog
180
+ 3. seed ortholog evalue
181
+ 4. seed ortholog score
182
+ 5. Predicted taxonomic group
183
+ 6. Predicted protein name
184
+ 7. Gene Ontology terms
185
+ 8. EC number
186
+ 9. KEGG_ko
187
+ 10. KEGG_Pathway
188
+ 11. KEGG_Module
189
+ 12. KEGG_Reaction
190
+ 13. KEGG_rclass
191
+ 14. BRITE
192
+ 15. KEGG_TC
193
+ 16. CAZy
194
+ 17. BiGG Reaction
195
+ 18. tax_scope: eggNOG taxonomic level used for annotation
196
+ 19. eggNOG OGs
197
+ 20. bestOG (deprecated, use smallest from eggnog OGs)
198
+ 21. COG Functional Category
199
+ 22. eggNOG free text description
200
+ '''
201
+ IDi , DBi , OGi , Genei , COGi , Desci = (None ,)* 6
202
+ with open (input , 'r' ) as infile :
203
+ for line in infile :
204
+ if line .startswith ('#query_name' ): # this is HEADER
205
+ line = line .rstrip ()
206
+ headerCols = line .split ('\t ' )
207
+ IDi = item2index (headerCols , 'query_name' )
208
+ Genei = item2index (headerCols , 'Preferred_name' )
209
+ DBi = item2index (headerCols , 'taxonomic scope' )
210
+ OGi = item2index (headerCols , 'eggNOG OGs' )
211
+ COGi = item2index (headerCols , 'COG Functional cat.' )
212
+ Desci = item2index (headerCols , 'eggNOG free text desc.' )
213
+ break
214
+ if not IDi : # then no header file, so have to guess
215
+ IDi , DBi , OGi , Genei , COGi , Desci = (0 , 6 , 9 , 4 , 11 , 12 )
216
+ return IDi , DBi , OGi , Genei , COGi , Desci
174
217
175
218
def parseEggNoggMapper (input , output , GeneDict ):
176
219
Definitions = {}
177
220
# indexes from header file
178
221
IDi , DBi , OGi , Genei , COGi , Desci = getEggNogHeaders (input )
179
222
# take annotations file from eggnog-mapper and create annotations
180
223
with open (output , 'w' ) as out :
181
- with open (input , 'rU ' ) as infile :
224
+ with open (input , 'r ' ) as infile :
182
225
for line in infile :
183
226
line = line .replace ('\n ' , '' )
184
227
if line .startswith ('#' ):
@@ -330,7 +373,7 @@ def __init__(self, prog):
330
373
lib .log .error ('Database not properly configured, %s missing. Run funannotate database and/or funannotate setup.' %
331
374
os .path .join (FUNDB , 'funannotate-db-info.txt' ))
332
375
sys .exit (1 )
333
- with open (os .path .join (FUNDB , 'funannotate-db-info.txt' ), 'rU ' ) as dbfile :
376
+ with open (os .path .join (FUNDB , 'funannotate-db-info.txt' ), 'r ' ) as dbfile :
334
377
for line in dbfile :
335
378
line = line .strip ()
336
379
name , type , file , version , date , num_records , mdchecksum = line .split (
@@ -526,7 +569,7 @@ def __init__(self, prog):
526
569
genbank )
527
570
# since can't find a way to propage the WGS_accession, writing to a file and then parse here
528
571
if os .path .isfile (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' )):
529
- with open (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' ), 'rU ' ) as infile :
572
+ with open (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' ), 'r ' ) as infile :
530
573
for line in infile :
531
574
line = line .replace ('\n ' , '' )
532
575
if line == 'None' :
@@ -646,7 +689,7 @@ def __init__(self, prog):
646
689
lib .log .info ("Combining UniProt/EggNog gene and product names using Gene2Product version %s" %
647
690
versDB .get ('gene2product' ))
648
691
CuratedNames = {}
649
- with open (os .path .join (FUNDB , 'ncbi_cleaned_gene_products.txt' ), 'rU ' ) as input :
692
+ with open (os .path .join (FUNDB , 'ncbi_cleaned_gene_products.txt' ), 'r ' ) as input :
650
693
for line in input :
651
694
line = line .strip ()
652
695
if line .startswith ('#' ):
@@ -937,7 +980,7 @@ def __init__(self, prog):
937
980
938
981
# to update annotations, user can pass --fix or --remove, update Annotations here
939
982
if args .fix :
940
- with open (args .fix , 'rU ' ) as fixfile :
983
+ with open (args .fix , 'r ' ) as fixfile :
941
984
for line in fixfile :
942
985
line = line .strip ()
943
986
if line .startswith ('#' ):
@@ -958,7 +1001,7 @@ def __init__(self, prog):
958
1001
Gene2ProdFinal [cols [0 ]] = (cols [1 ], cols [2 ])
959
1002
960
1003
if args .remove :
961
- with open (args .remove , 'rU ' ) as removefile :
1004
+ with open (args .remove , 'r ' ) as removefile :
962
1005
for line in removefile :
963
1006
line = line .strip ()
964
1007
if line .startswith ('#' ):
@@ -988,13 +1031,13 @@ def __init__(self, prog):
988
1031
if args .p2g :
989
1032
p2gfile = args .p2g
990
1033
if p2gfile :
991
- with open (p2gfile , 'rU ' ) as input :
1034
+ with open (p2gfile , 'r ' ) as input :
992
1035
for line in input :
993
1036
cols = line .split ('\t ' )
994
1037
if not cols [0 ] in p2g :
995
1038
p2g [cols [0 ]] = cols [1 ]
996
1039
with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl' ), 'w' ) as outfile :
997
- with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl.bak' ), 'rU ' ) as infile :
1040
+ with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl.bak' ), 'r ' ) as infile :
998
1041
for line in infile :
999
1042
line = line .replace ('\n ' , '' )
1000
1043
if line .startswith ('\t \t \t protein_id' ) or line .startswith ('\t \t \t transcript_id' ):
@@ -1147,7 +1190,7 @@ def __init__(self, prog):
1147
1190
AntiSmashFolder , 'smcluster.MIBiG.blast.txt' )
1148
1191
mibig_db = os .path .join (FUNDB , 'mibig.dmnd' )
1149
1192
with open (mibig_fasta , 'w' ) as output :
1150
- with open (Proteins , 'rU ' ) as input :
1193
+ with open (Proteins , 'r ' ) as input :
1151
1194
SeqRecords = SeqIO .parse (Proteins , 'fasta' )
1152
1195
for record in SeqRecords :
1153
1196
genename = record .id
@@ -1160,7 +1203,7 @@ def __init__(self, prog):
1160
1203
lib .runSubprocess (cmd , '.' , lib .log )
1161
1204
# now parse blast results to get {qseqid: hit}
1162
1205
MIBiGBlast = {}
1163
- with open (mibig_blast , 'rU ' ) as input :
1206
+ with open (mibig_blast , 'r ' ) as input :
1164
1207
for line in input :
1165
1208
cols = line .split ('\t ' )
1166
1209
if '-T' in cols [0 ]:
@@ -1180,15 +1223,15 @@ def __init__(self, prog):
1180
1223
1181
1224
# load in antismash cluster bed file to slice record
1182
1225
slicing = []
1183
- with open (AntiSmashBed , 'rU ' ) as antibed :
1226
+ with open (AntiSmashBed , 'r ' ) as antibed :
1184
1227
for line in antibed :
1185
1228
cols = line .split ('\t ' )
1186
1229
# chr, cluster, start, stop in a tuple
1187
1230
cluster = (cols [0 ], cols [3 ], cols [1 ], cols [2 ])
1188
1231
slicing .append (cluster )
1189
1232
Offset = {}
1190
1233
# Get each cluster + 15 Kb in each direction to make sure you can see the context of the cluster
1191
- with open (os .path .join (ResultsFolder , organism_name + '.gbk' ), 'rU ' ) as gbk :
1234
+ with open (os .path .join (ResultsFolder , organism_name + '.gbk' ), 'r ' ) as gbk :
1192
1235
SeqRecords = SeqIO .parse (gbk , 'genbank' )
1193
1236
for record in SeqRecords :
1194
1237
for f in record .features :
@@ -1221,7 +1264,7 @@ def __init__(self, prog):
1221
1264
output .write ("#%s\n " % base )
1222
1265
output .write (
1223
1266
"#GeneID\t Chromosome:start-stop\t Strand\t ClusterPred\t Backbone Enzyme\t Backbone Domains\t Product\t smCOGs\t EggNog\t InterPro\t PFAM\t GO terms\t Notes\t MIBiG Blast\t Protein Seq\t DNA Seq\n " )
1224
- with open (file , 'rU ' ) as input :
1267
+ with open (file , 'r ' ) as input :
1225
1268
SeqRecords = SeqIO .parse (input , 'genbank' )
1226
1269
for record in SeqRecords :
1227
1270
for f in record .features :
@@ -1333,7 +1376,7 @@ def __init__(self, prog):
1333
1376
finallist .append (file )
1334
1377
with open (ClustersOut , 'w' ) as output :
1335
1378
for file in natsorted (finallist ):
1336
- with open (file , 'rU ' ) as input :
1379
+ with open (file , 'r ' ) as input :
1337
1380
output .write (input .read ())
1338
1381
output .write ('\n \n ' )
1339
1382
0 commit comments