From b12edccca949545a44a0de8fb8eaa21adf733ce3 Mon Sep 17 00:00:00 2001 From: Martin Mokrejs Date: Thu, 24 May 2018 08:58:34 +0200 Subject: [PATCH] Support description in FASTA/FASTQ input when creating perfect reads to_perfect_reads.py does not support input which have a description text after an identifier in input file. It blindly appends the numbers to the identifier but if the line contained some description words separated by spaces from the identifier it will append the values after the description. This commit ensures the values will be appended to the FASTA/FASTQ identifier even if it is followed by a description text. --- pyfastaq/runners/to_perfect_reads.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pyfastaq/runners/to_perfect_reads.py b/pyfastaq/runners/to_perfect_reads.py index eb12d34..eec1922 100644 --- a/pyfastaq/runners/to_perfect_reads.py +++ b/pyfastaq/runners/to_perfect_reads.py @@ -52,7 +52,12 @@ def run(description): read_start1 = int(middle_pos - ceil(0.5 * isize)) read_start2 = read_start1 + isize - options.readlength - readname = ':'.join([ref.id, str(pair_counter), str(read_start1+1), str(read_start2+1)]) + if ' ' in ref.id: + readname = ':'.join([ref.id.split(' ')[0], str(pair_counter), str(read_start1+1), str(read_start2+1)]) + description = ' '.join(ref.id.split(' ')[1:]) + else: + readname = ':'.join([ref.id, str(pair_counter), str(read_start1+1), str(read_start2+1)]) + description = '' fragment = (middle_pos, isize) if fragment in used_fragments: @@ -61,8 +66,12 @@ def run(description): else: used_fragments[fragment] = 1 - read1 = sequences.Fastq(readname + '/1', ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength) - read2 = sequences.Fastq(readname + '/2', ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength) + if description: + read1 = sequences.Fastq(readname + '/1' + ' ' + description, ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength) + read2 = sequences.Fastq(readname + '/2' + ' ' + description, ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength) + else: + read1 = sequences.Fastq(readname + '/1', ref.seq[read_start1:read_start1 + options.readlength], 'I' * options.readlength) + read2 = sequences.Fastq(readname + '/2', ref.seq[read_start2:read_start2 + options.readlength], 'I' * options.readlength) if options.no_n and ('n' in read1.seq or 'N' in read1.seq or 'n' in read2.seq or 'N' in read2.seq):