|
21 | 21 | import numpy as np
|
22 | 22 | import pandas as pd
|
23 | 23 | import pyfastx
|
24 |
| -from Bio import SeqIO |
| 24 | +from Bio import SeqIO,Seq |
25 | 25 | #from pyfaidx import Fasta
|
26 | 26 | from scipy.stats import truncnorm
|
27 | 27 | ## application
|
@@ -399,7 +399,10 @@ def read_fasta(fasta_file):
|
399 | 399 | """
|
400 | 400 | seqs = {h:seq for h,seq in pyfastx.Fasta(fasta_file, build_index=False)}
|
401 | 401 | return seqs
|
402 |
| - |
| 402 | + |
| 403 | +def revcomp(seq): |
| 404 | + return Seq.Seq(seq).reverse_complement() |
| 405 | + |
403 | 406 | def parse_frags(refs, barcode, outFr, outFt):
|
404 | 407 | """Parsing fragment from a genome and writing them to a file.
|
405 | 408 | Giving each simulated fragment a UUID.
|
@@ -438,9 +441,14 @@ def parse_frags(refs, barcode, outFr, outFt):
|
438 | 441 | frag_max_end = 1 if frag_max_end < 1 else frag_max_end
|
439 | 442 | frag_start = np.random.randint(0, frag_max_end)
|
440 | 443 | frag_end = frag_start + frag_size
|
| 444 | + ## randomly selecting strand |
| 445 | + if np.random.randint(0,2,1)[0] == 1: |
| 446 | + seq = revcomp(f[contig_id][frag_start:frag_end]) |
| 447 | + frag_start,frag_end = frag_end,frag_start |
| 448 | + else: |
| 449 | + seq = f[contig_id][frag_start:frag_end] |
441 | 450 | ## writing sequence
|
442 |
| - outFr.write('>{}\n{}\n'.format(frag_uuid, |
443 |
| - f[contig_id][frag_start:frag_end])) |
| 451 | + outFr.write('>{}\n{}\n'.format(frag_uuid,seq)) |
444 | 452 | contigs.append(contig_id)
|
445 | 453 | # writing tsv of positions
|
446 | 454 | outFt.write('\t'.join([str(barcode),
|
|
0 commit comments