@@ -1482,7 +1482,7 @@ def sortBedproper(input, output):
1482
1482
cols = line .split ('\t ' )
1483
1483
data .append (cols )
1484
1484
# we can now sort
1485
- sort_data = sorted (data , key = lambda x : (x [0 ], int (x [1 ])))
1485
+ sort_data = natsorted (data , key = lambda x : (x [0 ], int (x [1 ])))
1486
1486
# now we can write back out to file
1487
1487
with open (output , 'w' ) as outfile :
1488
1488
for x in sort_data :
@@ -1516,7 +1516,7 @@ def sortGFFproper(input, output):
1516
1516
order_map [x ] = idx
1517
1517
idx += 1
1518
1518
# we can now sort
1519
- sort_data = sorted (data , key = lambda x : (x [0 ], int (x [3 ]), order_map [x [2 ]]))
1519
+ sort_data = natsorted (data , key = lambda x : (x [0 ], int (x [3 ]), order_map [x [2 ]]))
1520
1520
# now we can write back out to file
1521
1521
with open (output , 'w' ) as outfile :
1522
1522
for y in comments :
@@ -5710,16 +5710,29 @@ def SortRenameHeaders(input, output):
5710
5710
def validate_tRNA (input , genes , gaps , output ):
5711
5711
# run bedtools intersect to keep only input that dont intersect with either genes or gaps
5712
5712
sortedInput = os .path .abspath (input )+ '.sorted.gff3'
5713
- sortGFFproper (input , sortedInput )
5713
+ #sortGFFproper(input, sortedInput)
5714
+ cmd1 = ['bedtools' , 'sort' , '-i' , input ]
5715
+ with open (sortedInput , 'w' ) as outfile :
5716
+ subprocess .call (cmd1 , stdout = outfile )
5714
5717
sortedGenes = os .path .abspath (genes )+ '.sorted.gff3'
5715
- sortGFFproper (genes , sortedGenes )
5718
+ #sortGFFproper(genes, sortedGenes)
5719
+ cmd2 = ['bedtools' , 'sort' , '-i' , genes ]
5720
+ with open (sortedGenes , 'w' ) as outfile :
5721
+ subprocess .call (cmd2 , stdout = outfile )
5716
5722
if gaps :
5717
5723
sortedGaps = os .path .abspath (gaps )+ '.sorted.gff3'
5718
- sortGFFproper (gaps , sortedGaps )
5724
+ #sortGFFproper(gaps, sortedGaps)
5725
+ cmd3 = ['bedtools' , 'sort' , '-i' , gaps ]
5726
+ with open (sortedGaps , 'w' ) as outfile :
5727
+ subprocess .call (cmd3 , stdout = outfile )
5719
5728
cmd = ['bedtools' , 'intersect' , '-sorted' , '-v' , '-a' , sortedInput , '-b' , sortedGenes ]
5720
5729
if gaps :
5721
5730
cmd .append (sortedGaps )
5722
- runSubprocess2 (cmd , '.' , log , output )
5731
+ tmpOut = os .path .abspath (output )+ '.tmp'
5732
+ runSubprocess2 (cmd , '.' , log , tmpOut )
5733
+ # now sort properly
5734
+ sortGFFproper (tmpOut , output )
5735
+ os .remove (tmpOut )
5723
5736
5724
5737
5725
5738
# via https://stackoverflow.com/questions/2154249/identify-groups-of-continuous-numbers-in-a-list
@@ -6592,8 +6605,14 @@ def RemoveBadModels(proteins, gff, length, repeats, BlastResults, tmpdir, method
6592
6605
repeat_temp = os .path .join (tmpdir , 'genome.repeats.to.remove.gff' )
6593
6606
gffSorted = os .path .abspath (gff )+ '.sorted.gff'
6594
6607
bedSorted = os .path .abspath (repeats )+ '.sorted.bed'
6595
- sortBedproper (repeats , bedSorted )
6596
- sortGFFproper (gff , gffSorted )
6608
+ #sortBedproper(repeats, bedSorted)
6609
+ cmd1 = ['bedtools' , 'sort' , '-i' , repeats ]
6610
+ with open (bedSorted , 'w' ) as bedout :
6611
+ subprocess .call (cmd1 , stdout = bedout )
6612
+ #sortGFFproper(gff, gffSorted)
6613
+ cmd2 = ['bedtools' , 'sort' , '-i' , gff ]
6614
+ with open (gffSorted , 'w' ) as gffout :
6615
+ subprocess .call (cmd2 , stdout = gffout )
6597
6616
cmd = ['bedtools' , 'intersect' , '-sorted' , '-f' , '0.9' , '-a' , gffSorted , '-b' , bedSorted ]
6598
6617
runSubprocess2 (cmd , '.' , log , repeat_temp )
6599
6618
# parse the results from bedtools and add to remove list
0 commit comments