Skip to content

Commit 16abada

Browse files
committed
fixed spotyping gui
1 parent 34739c8 commit 16abada

File tree

4 files changed

+35
-83
lines changed

4 files changed

+35
-83
lines changed

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ For Linux installs, you require Python 3 and the following packages. These will
3737
* biopython
3838
* pyvcf
3939
* pyfaidx
40-
* pyside2 (only if using GUI)
40+
* pyside2 (GUI only)
41+
* toytree (GUI only)
4142

4243
Other binaries required:
4344

notebooks/spotyping.ipynb

+24-76
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,18 @@
8080
" <tr>\n",
8181
" <th>1</th>\n",
8282
" <td>19</td>\n",
83-
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz</td>\n",
8483
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz</td>\n",
85-
" <td>19-MBovis_S32_L001-4_R1_001</td>\n",
84+
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz</td>\n",
8685
" <td>19-MBovis_S32_L001-4_R2_001</td>\n",
86+
" <td>19-MBovis_S32_L001-4_R1_001</td>\n",
8787
" </tr>\n",
8888
" <tr>\n",
8989
" <th>2</th>\n",
9090
" <td>26</td>\n",
91-
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz</td>\n",
9291
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz</td>\n",
93-
" <td>26-MBovis_S43_L001-4_R1_001</td>\n",
92+
" <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz</td>\n",
9493
" <td>26-MBovis_S43_L001-4_R2_001</td>\n",
94+
" <td>26-MBovis_S43_L001-4_R1_001</td>\n",
9595
" </tr>\n",
9696
" </tbody>\n",
9797
"</table>\n",
@@ -100,13 +100,13 @@
100100
"text/plain": [
101101
" sample filename1 \\\n",
102102
"0 17 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R1_001.fastq.gz \n",
103-
"1 19 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz \n",
104-
"2 26 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz \n",
103+
"1 19 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz \n",
104+
"2 26 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz \n",
105105
"\n",
106106
" filename2 name1 name2 \n",
107107
"0 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R2_001.fastq.gz 17-MBovis_S21_L001-4_R1_001 17-MBovis_S21_L001-4_R2_001 \n",
108-
"1 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz 19-MBovis_S32_L001-4_R1_001 19-MBovis_S32_L001-4_R2_001 \n",
109-
"2 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz 26-MBovis_S43_L001-4_R1_001 26-MBovis_S43_L001-4_R2_001 "
108+
"1 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz 19-MBovis_S32_L001-4_R2_001 19-MBovis_S32_L001-4_R1_001 \n",
109+
"2 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz 26-MBovis_S43_L001-4_R2_001 26-MBovis_S43_L001-4_R1_001 "
110110
]
111111
},
112112
"execution_count": 2,
@@ -212,20 +212,11 @@
212212
},
213213
{
214214
"cell_type": "code",
215-
"execution_count": 5,
215+
"execution_count": 35,
216216
"metadata": {},
217-
"outputs": [
218-
{
219-
"name": "stdout",
220-
"output_type": "stream",
221-
"text": [
222-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
223-
"1100101000000110111111111011101111111100000\n"
224-
]
225-
}
226-
],
217+
"outputs": [],
227218
"source": [
228-
"def get_spoligotype(filename, reads_limit=500000, threshold=0):\n",
219+
"def get_spoligotype(filename, reads_limit=3000000, threshold=0, threads=4):\n",
229220
" \"\"\"Get spoligotype from reads. Returns a binary string.\"\"\"\n",
230221
" \n",
231222
" ref = '../snipgenie/data/dr_spacers.fa'\n",
@@ -234,9 +225,10 @@
234225
" #make blast db from reads\n",
235226
" tools.make_blast_database('temp.fa')\n",
236227
" #blast spacers to db\n",
237-
" bl = tools.blast_fasta('temp.fa', ref, evalue=.1, \n",
238-
" maxseqs=reads_limit, show_cmd=True) \n",
239-
" bl=bl[(bl.qcovs>95) & (bl.mismatch<3)]\n",
228+
" bl = tools.blast_fasta('temp.fa', ref, evalue=.1, threads=threads,\n",
229+
" maxseqs=reads_limit, show_cmd=False) \n",
230+
" bl=bl[(bl.qcovs>80) & (bl.mismatch<2)]\n",
231+
" #print (bl)\n",
240232
" x = bl.groupby('qseqid').agg({'pident':np.size}).reset_index()\n",
241233
" #print (x)\n",
242234
" x = x[x.pident>=threshold] \n",
@@ -252,8 +244,8 @@
252244
" print (s)\n",
253245
" return s\n",
254246
"\n",
255-
"s = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/48-MBovis_S17_L001-4_R1_001.fastq.gz')\n",
256-
"get_sb_number(s)"
247+
"#s = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/48-MBovis_S17_L001-4_R1_001.fastq.gz')\n",
248+
"#get_sb_number(s)"
257249
]
258250
},
259251
{
@@ -295,68 +287,24 @@
295287
},
296288
{
297289
"cell_type": "code",
298-
"execution_count": 64,
290+
"execution_count": null,
299291
"metadata": {},
300-
"outputs": [
301-
{
302-
"name": "stdout",
303-
"output_type": "stream",
304-
"text": [
305-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
306-
"1100101000000110111011111011111011111100000\n",
307-
"0 None\n",
308-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
309-
"1100101000000110111011111011111011111100000\n",
310-
"1 None\n",
311-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
312-
"1000001000000010111010101001111011110100000\n",
313-
"2 None\n",
314-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
315-
"1000001000000010101000101000010000010000000\n",
316-
"3 None\n"
317-
]
318-
}
319-
],
292+
"outputs": [],
320293
"source": [
321294
"for t in range(0,4):\n",
322-
" b = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/36-MBovis_S38_L001-4_R1_001.fastq.gz', threshold=t)\n",
295+
" b = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz', threshold=t)\n",
323296
" print (t,get_sb_number(b))"
324297
]
325298
},
326299
{
327300
"cell_type": "code",
328-
"execution_count": 6,
301+
"execution_count": null,
329302
"metadata": {},
330-
"outputs": [
331-
{
332-
"name": "stdout",
333-
"output_type": "stream",
334-
"text": [
335-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
336-
"1100101000000010011111111101111111011100000\n",
337-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R1_001.fastq.gz None\n",
338-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
339-
"1100001000001100011010111111111111000100000\n",
340-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz None\n",
341-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
342-
"1100001000000110110110111100110111111000000\n",
343-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz None\n",
344-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
345-
"1100001000001110101101111111101110011000000\n",
346-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R2_001.fastq.gz None\n",
347-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
348-
"1100001000000100101100111111111100011100000\n",
349-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz None\n",
350-
"blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n",
351-
"1100101000001110011000111111000111110000000\n",
352-
"/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz None\n"
353-
]
354-
}
355-
],
303+
"outputs": [],
356304
"source": [
357305
"res=[]\n",
358306
"for f in files:\n",
359-
" s = get_spoligotype(f, threshold=0)\n",
307+
" s = get_spoligotype(f, threads=12)\n",
360308
" sb = get_sb_number(s)\n",
361309
" print (f, sb)\n",
362310
" res.append([f,sb]) "
@@ -386,7 +334,7 @@
386334
"name": "python",
387335
"nbconvert_exporter": "python",
388336
"pygments_lexer": "ipython3",
389-
"version": "3.9.5"
337+
"version": "3.9.7"
390338
}
391339
},
392340
"nbformat": 4,

snipgenie/gui.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,9 @@ def snp_typing(self, progress_callback):
12471247
def spoligotyping(self, progress_callback):
12481248
"""Mbovis spo typing tool"""
12491249

1250+
self.running == True
1251+
self.opts.applyOptions()
1252+
kwds = self.opts.kwds
12501253
df = self.fastq_table.model.df
12511254
data = self.get_selected()
12521255
if data is None or len(data) == 0:
@@ -1255,7 +1258,7 @@ def spoligotyping(self, progress_callback):
12551258
cols = ['sample','spotype','sb']
12561259
for i,r in data.iterrows():
12571260
name = r['sample']
1258-
s = tools.get_spoligotype(r.filename1, reads_limit=500000, threshold=2)
1261+
s = tools.get_spoligotype(r.filename1, threads=kwds['threads'])
12591262
sb = tools.get_sb_number(s)
12601263
print (name, s, sb)
12611264
#set new values in place

snipgenie/tools.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ def gff_bcftools_format(in_file, out_file):
835835
GFF.write([new], out_handle)
836836
return
837837

838-
def get_spoligotype(filename, reads_limit=500000, threshold=2):
838+
def get_spoligotype(filename, reads_limit=3000000, threshold=2, threads=4):
839839
"""Get mtb spoligotype from WGS reads"""
840840

841841
ref = os.path.join(datadir, 'dr_spacers.fa')
@@ -844,9 +844,9 @@ def get_spoligotype(filename, reads_limit=500000, threshold=2):
844844
#make blast db from reads
845845
make_blast_database('temp.fa')
846846
#blast spacers to db
847-
bl = blast_fasta('temp.fa', ref, evalue=0.1,
848-
maxseqs=100000, show_cmd=False)
849-
bl=bl[(bl.qcovs>95) & (bl.mismatch<2)]
847+
bl = blast_fasta('temp.fa', ref, evalue=0.1, threads=threads,
848+
maxseqs=reads_limit, show_cmd=False)
849+
bl=bl[(bl.qcovs>90) & (bl.mismatch<=threshold)]
850850
x = bl.groupby('qseqid').agg({'pident':np.size}).reset_index()
851851
x = x[x.pident>=threshold]
852852
found = list(x.qseqid)
@@ -862,7 +862,7 @@ def get_spoligotype(filename, reads_limit=500000, threshold=2):
862862

863863
def get_sb_number(binary_str):
864864
"""Get SB number from binary pattern usinf database reference"""
865-
865+
866866
df = pd.read_csv(os.path.join(datadir, 'Mbovis.org_db.csv'))
867867
x = df[df['binary'] == binary_str]
868868
if len(x) == 0:

0 commit comments

Comments
 (0)