Skip to content

Commit b6e54c2

Browse files
author
Nicholas Youngblut
committed
added strandedness for htsim frag simulation
1 parent 75cc9c5 commit b6e54c2

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

MGSIM/SimHtReads.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import numpy as np
2222
import pandas as pd
2323
import pyfastx
24-
from Bio import SeqIO
24+
from Bio import SeqIO,Seq
2525
#from pyfaidx import Fasta
2626
from scipy.stats import truncnorm
2727
## application
@@ -399,7 +399,10 @@ def read_fasta(fasta_file):
399399
"""
400400
seqs = {h:seq for h,seq in pyfastx.Fasta(fasta_file, build_index=False)}
401401
return seqs
402-
402+
403+
def revcomp(seq):
404+
return Seq.Seq(seq).reverse_complement()
405+
403406
def parse_frags(refs, barcode, outFr, outFt):
404407
"""Parsing fragment from a genome and writing them to a file.
405408
Giving each simulated fragment a UUID.
@@ -438,9 +441,14 @@ def parse_frags(refs, barcode, outFr, outFt):
438441
frag_max_end = 1 if frag_max_end < 1 else frag_max_end
439442
frag_start = np.random.randint(0, frag_max_end)
440443
frag_end = frag_start + frag_size
444+
## randomly selecting strand
445+
if np.random.randint(0,2,1)[0] == 1:
446+
seq = revcomp(f[contig_id][frag_start:frag_end])
447+
frag_start,frag_end = frag_end,frag_start
448+
else:
449+
seq = f[contig_id][frag_start:frag_end]
441450
## writing sequence
442-
outFr.write('>{}\n{}\n'.format(frag_uuid,
443-
f[contig_id][frag_start:frag_end]))
451+
outFr.write('>{}\n{}\n'.format(frag_uuid,seq))
444452
contigs.append(contig_id)
445453
# writing tsv of positions
446454
outFt.write('\t'.join([str(barcode),

0 commit comments

Comments
 (0)