|
80 | 80 | " <tr>\n",
|
81 | 81 | " <th>1</th>\n",
|
82 | 82 | " <td>19</td>\n",
|
83 |
| - " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz</td>\n", |
84 | 83 | " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz</td>\n",
|
85 |
| - " <td>19-MBovis_S32_L001-4_R1_001</td>\n", |
| 84 | + " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz</td>\n", |
86 | 85 | " <td>19-MBovis_S32_L001-4_R2_001</td>\n",
|
| 86 | + " <td>19-MBovis_S32_L001-4_R1_001</td>\n", |
87 | 87 | " </tr>\n",
|
88 | 88 | " <tr>\n",
|
89 | 89 | " <th>2</th>\n",
|
90 | 90 | " <td>26</td>\n",
|
91 |
| - " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz</td>\n", |
92 | 91 | " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz</td>\n",
|
93 |
| - " <td>26-MBovis_S43_L001-4_R1_001</td>\n", |
| 92 | + " <td>/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz</td>\n", |
94 | 93 | " <td>26-MBovis_S43_L001-4_R2_001</td>\n",
|
| 94 | + " <td>26-MBovis_S43_L001-4_R1_001</td>\n", |
95 | 95 | " </tr>\n",
|
96 | 96 | " </tbody>\n",
|
97 | 97 | "</table>\n",
|
|
100 | 100 | "text/plain": [
|
101 | 101 | " sample filename1 \\\n",
|
102 | 102 | "0 17 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R1_001.fastq.gz \n",
|
103 |
| - "1 19 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz \n", |
104 |
| - "2 26 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz \n", |
| 103 | + "1 19 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz \n", |
| 104 | + "2 26 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz \n", |
105 | 105 | "\n",
|
106 | 106 | " filename2 name1 name2 \n",
|
107 | 107 | "0 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R2_001.fastq.gz 17-MBovis_S21_L001-4_R1_001 17-MBovis_S21_L001-4_R2_001 \n",
|
108 |
| - "1 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz 19-MBovis_S32_L001-4_R1_001 19-MBovis_S32_L001-4_R2_001 \n", |
109 |
| - "2 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz 26-MBovis_S43_L001-4_R1_001 26-MBovis_S43_L001-4_R2_001 " |
| 108 | + "1 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz 19-MBovis_S32_L001-4_R2_001 19-MBovis_S32_L001-4_R1_001 \n", |
| 109 | + "2 /storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz 26-MBovis_S43_L001-4_R2_001 26-MBovis_S43_L001-4_R1_001 " |
110 | 110 | ]
|
111 | 111 | },
|
112 | 112 | "execution_count": 2,
|
|
212 | 212 | },
|
213 | 213 | {
|
214 | 214 | "cell_type": "code",
|
215 |
| - "execution_count": 5, |
| 215 | + "execution_count": 35, |
216 | 216 | "metadata": {},
|
217 |
| - "outputs": [ |
218 |
| - { |
219 |
| - "name": "stdout", |
220 |
| - "output_type": "stream", |
221 |
| - "text": [ |
222 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
223 |
| - "1100101000000110111111111011101111111100000\n" |
224 |
| - ] |
225 |
| - } |
226 |
| - ], |
| 217 | + "outputs": [], |
227 | 218 | "source": [
|
228 |
| - "def get_spoligotype(filename, reads_limit=500000, threshold=0):\n", |
| 219 | + "def get_spoligotype(filename, reads_limit=3000000, threshold=0, threads=4):\n", |
229 | 220 | " \"\"\"Get spoligotype from reads. Returns a binary string.\"\"\"\n",
|
230 | 221 | " \n",
|
231 | 222 | " ref = '../snipgenie/data/dr_spacers.fa'\n",
|
|
234 | 225 | " #make blast db from reads\n",
|
235 | 226 | " tools.make_blast_database('temp.fa')\n",
|
236 | 227 | " #blast spacers to db\n",
|
237 |
| - " bl = tools.blast_fasta('temp.fa', ref, evalue=.1, \n", |
238 |
| - " maxseqs=reads_limit, show_cmd=True) \n", |
239 |
| - " bl=bl[(bl.qcovs>95) & (bl.mismatch<3)]\n", |
| 228 | + " bl = tools.blast_fasta('temp.fa', ref, evalue=.1, threads=threads,\n", |
| 229 | + " maxseqs=reads_limit, show_cmd=False) \n", |
| 230 | + " bl=bl[(bl.qcovs>80) & (bl.mismatch<2)]\n", |
| 231 | + " #print (bl)\n", |
240 | 232 | " x = bl.groupby('qseqid').agg({'pident':np.size}).reset_index()\n",
|
241 | 233 | " #print (x)\n",
|
242 | 234 | " x = x[x.pident>=threshold] \n",
|
|
252 | 244 | " print (s)\n",
|
253 | 245 | " return s\n",
|
254 | 246 | "\n",
|
255 |
| - "s = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/48-MBovis_S17_L001-4_R1_001.fastq.gz')\n", |
256 |
| - "get_sb_number(s)" |
| 247 | + "#s = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/48-MBovis_S17_L001-4_R1_001.fastq.gz')\n", |
| 248 | + "#get_sb_number(s)" |
257 | 249 | ]
|
258 | 250 | },
|
259 | 251 | {
|
|
295 | 287 | },
|
296 | 288 | {
|
297 | 289 | "cell_type": "code",
|
298 |
| - "execution_count": 64, |
| 290 | + "execution_count": null, |
299 | 291 | "metadata": {},
|
300 |
| - "outputs": [ |
301 |
| - { |
302 |
| - "name": "stdout", |
303 |
| - "output_type": "stream", |
304 |
| - "text": [ |
305 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
306 |
| - "1100101000000110111011111011111011111100000\n", |
307 |
| - "0 None\n", |
308 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
309 |
| - "1100101000000110111011111011111011111100000\n", |
310 |
| - "1 None\n", |
311 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
312 |
| - "1000001000000010111010101001111011110100000\n", |
313 |
| - "2 None\n", |
314 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
315 |
| - "1000001000000010101000101000010000010000000\n", |
316 |
| - "3 None\n" |
317 |
| - ] |
318 |
| - } |
319 |
| - ], |
| 292 | + "outputs": [], |
320 | 293 | "source": [
|
321 | 294 | "for t in range(0,4):\n",
|
322 |
| - " b = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_09-07-18/36-MBovis_S38_L001-4_R1_001.fastq.gz', threshold=t)\n", |
| 295 | + " b = get_spoligotype('/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz', threshold=t)\n", |
323 | 296 | " print (t,get_sb_number(b))"
|
324 | 297 | ]
|
325 | 298 | },
|
326 | 299 | {
|
327 | 300 | "cell_type": "code",
|
328 |
| - "execution_count": 6, |
| 301 | + "execution_count": null, |
329 | 302 | "metadata": {},
|
330 |
| - "outputs": [ |
331 |
| - { |
332 |
| - "name": "stdout", |
333 |
| - "output_type": "stream", |
334 |
| - "text": [ |
335 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
336 |
| - "1100101000000010011111111101111111011100000\n", |
337 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R1_001.fastq.gz None\n", |
338 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
339 |
| - "1100001000001100011010111111111111000100000\n", |
340 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R1_001.fastq.gz None\n", |
341 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
342 |
| - "1100001000000110110110111100110111111000000\n", |
343 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R1_001.fastq.gz None\n", |
344 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
345 |
| - "1100001000001110101101111111101110011000000\n", |
346 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/17-MBovis_S21_L001-4_R2_001.fastq.gz None\n", |
347 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
348 |
| - "1100001000000100101100111111111100011100000\n", |
349 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/19-MBovis_S32_L001-4_R2_001.fastq.gz None\n", |
350 |
| - "blastn -out ../snipgenie/data/dr_spacers_blast.txt -outfmt \"6 qseqid sseqid qseq sseq pident qcovs length mismatch gapopen qstart qend sstart send evalue bitscore stitle\" -query ../snipgenie/data/dr_spacers.fa -db temp.fa -evalue 0.1 -max_target_seqs 500000 -num_threads 4 -task blastn\n", |
351 |
| - "1100101000001110011000111111000111110000000\n", |
352 |
| - "/storage/btbgenie/mbovis_ireland/Wicklow/Fastqs_07-01-18/26-MBovis_S43_L001-4_R2_001.fastq.gz None\n" |
353 |
| - ] |
354 |
| - } |
355 |
| - ], |
| 303 | + "outputs": [], |
356 | 304 | "source": [
|
357 | 305 | "res=[]\n",
|
358 | 306 | "for f in files:\n",
|
359 |
| - " s = get_spoligotype(f, threshold=0)\n", |
| 307 | + " s = get_spoligotype(f, threads=12)\n", |
360 | 308 | " sb = get_sb_number(s)\n",
|
361 | 309 | " print (f, sb)\n",
|
362 | 310 | " res.append([f,sb]) "
|
|
386 | 334 | "name": "python",
|
387 | 335 | "nbconvert_exporter": "python",
|
388 | 336 | "pygments_lexer": "ipython3",
|
389 |
| - "version": "3.9.5" |
| 337 | + "version": "3.9.7" |
390 | 338 | }
|
391 | 339 | },
|
392 | 340 | "nbformat": 4,
|
|
0 commit comments