1515except ImportError :
1616 from urllib import urlopen , urlretrieve
1717import argparse
18- version = """ stringMLST v0.4.2 (updated : September 13, 2017 ) """
18+ version = """ stringMLST v0.5 (updated : January 5, 2018 ) """
1919"""
2020
2121stringMLST free for academic users and requires permission before any commercial
@@ -515,6 +515,7 @@ def goodReads(read, k, non_overlapping_window):
515515 s = str (line [n :n + k ])
516516 if s in kmerDict [k ]:
517517 for probLoc in kmerDict [k ][s ]:
518+ # print(probLoc)
518519 if probLoc not in alleleCount :
519520 alleleCount [probLoc ] = {}
520521 a = kmerDict [k ][s ][probLoc ]
@@ -558,6 +559,8 @@ def getMaxCount(alleleCount, fileName):
558559 maxSupport = {}
559560 secondSupport = {}
560561 finalProfileCount = {}
562+ for locus in alleleNames :
563+ finalProfileCount [locus ] = {}
561564 num = ''
562565 for loc in alleleCount :
563566 n = 0
@@ -567,32 +570,42 @@ def getMaxCount(alleleCount, fileName):
567570 m = n
568571 n = alleleCount [loc ][num ]
569572 if n - m < fuzzy :
570- alleleCount [loc ][num ] = str (alleleCount [loc ][num ])+ '*'
571- max_n [loc ] = str (n )+ '*'
573+ try :
574+ alleleCount [loc ][num ]
575+ except :
576+ pass
577+ else :
578+ alleleCount [loc ][num ] = str (alleleCount [loc ][num ])+ '*'
579+ max_n [loc ] = str (n )+ '*'
572580 else :
573581 max_n [loc ] = n
574582 secondMax [loc ] = m
575583 for loc in alleleCount :
576- maxSupport [loc ] = {}
577- secondSupport [loc ] = {}
578- num_max = []
579- num_max2 = []
580- compare = float (re .sub ("\*$" , "" , str (max_n [loc ])))
581- for num in alleleCount [loc ]:
582- if alleleCount [loc ][num ] == compare :
583- if "\*" in str (max_n [loc ]):
584- insert = num + '*'
585- num_max .append (insert )
586- else :
587- num_max .append (num )
588- maxSupport [loc ][num ] = max_n [loc ]
589- if alleleCount [loc ][num ] == secondMax [loc ]:
590- num_max2 .append (num )
591- secondSupport [loc ][num ] = secondMax [loc ]
592584 try :
593- finalProfileCount [loc ] = num_max [0 ]
594- except LookupError :
595- finalProfileCount [loc ] = '0'
585+ max_n [loc ]
586+ except :
587+ pass
588+ else :
589+ maxSupport [loc ] = {}
590+ secondSupport [loc ] = {}
591+ num_max = []
592+ num_max2 = []
593+ compare = float (re .sub ("\*$" , "" , str (max_n [loc ])))
594+ for num in alleleCount [loc ]:
595+ if alleleCount [loc ][num ] == compare :
596+ if "\*" in str (max_n [loc ]):
597+ insert = num + '*'
598+ num_max .append (insert )
599+ else :
600+ num_max .append (num )
601+ maxSupport [loc ][num ] = max_n [loc ]
602+ if alleleCount [loc ][num ] == secondMax [loc ]:
603+ num_max2 .append (num )
604+ secondSupport [loc ][num ] = secondMax [loc ]
605+ try :
606+ finalProfileCount [loc ] = num_max [0 ]
607+ except LookupError :
608+ finalProfileCount [loc ] = '0'
596609 msgs = "Max Support :" + fileName + " : " + str (maxSupport )
597610 logging .debug (msgs )
598611 msgs = "Second Max Support :" + fileName + " : " + str (secondSupport )
@@ -630,7 +643,8 @@ def findST(finalProfile, stProfile):
630643 exit (0 )
631644 transformedFinalProfile = {}
632645 for gene , allele in finalProfile .items ():
633- allele = re .sub ("\*" , "" , allele )
646+ if allele :
647+ allele = re .sub ("\*" , "" , allele )
634648 transformedFinalProfile [finalGeneToSTGene [gene ]] = allele
635649 # Check to see if the dictionary is empty, if so then means no allele were found at all
636650 if bool (transformedFinalProfile ) is False :
@@ -698,10 +712,13 @@ def loadKmerDict(dbFile):
698712 kmerTableDict = {}
699713 with open (dbFile , 'r' ) as kmerTableFile :
700714 lines = kmerTableFile .readlines ()
715+ global alleleNames
716+ alleleNames = set ()
701717 for line in lines :
702718 array = line .rstrip ().rsplit ('\t ' )
703719 kmerTableDict [array [0 ]] = {}
704720 kmerTableDict [array [0 ]][array [1 ]] = array [2 ][1 :- 1 ].rsplit (',' )
721+ alleleNames .add (array [1 ])
705722 return kmerTableDict
706723#############################################################
707724# Function : loadWeightDict
@@ -820,7 +837,10 @@ def printResults(results, output_filename, overwrite, timeDisp):
820837 for l in sorted (results [s ]):
821838 if l == 'ST' or l == 't' :
822839 continue
823- sample += '\t ' + results [s ][l ]
840+ if results [s ][l ]:
841+ sample += '\t ' + results [s ][l ]
842+ else :
843+ sample += '\t NA'
824844 if timeDisp is True :
825845 sample += '\t ' + str (results [s ]['ST' ]) + '\t %.2f ' % results [s ]['t' ]
826846 else :
0 commit comments