Skip to content

Commit c8a1356

Browse files
committed
Hammer support for tell-seq format
1 parent f73f552 commit c8a1356

File tree

5 files changed

+38
-7
lines changed

5 files changed

+38
-7
lines changed

assembler/src/common/io/reads/ireadstream.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ ireadstream &operator>>(Read &r) {
100100
return *this;
101101
}
102102
r.setName(seq_->name.s);
103-
r.setComment(seq_->comment.s);
103+
if (seq_->comment.l != 0) {
104+
r.setComment(seq_->comment.s);
105+
}
104106
if (seq_->qual.s) {
105107
r.setQuality(seq_->qual.s, offset_);
106108
}

assembler/src/projects/hammer/hammer_tools.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,23 +143,35 @@ CorrectionStats CorrectReadFile(const KMerData &data,
143143
}
144144

145145
CorrectionStats CorrectPairedReadFiles(const KMerData &data,
146-
const std::filesystem::path &fnamel, const std::filesystem::path &fnamer,
147-
ofstream * ofbadl, ofstream * ofcorl, ofstream * ofbadr, ofstream * ofcorr,
148-
ofstream * ofunp) {
146+
const std::filesystem::path &fnamel,
147+
const std::filesystem::path &fnamer,
148+
const std::filesystem::path &fnamea,
149+
ofstream *ofbadl,
150+
ofstream *ofcorl,
151+
ofstream *ofbadr,
152+
ofstream *ofcorr,
153+
ofstream *ofunp,
154+
ofstream *ofcora,
155+
bool has_aux) {
149156
int qvoffset = cfg::get().input_qvoffset;
150157
int trim_quality = cfg::get().input_trim_quality;
151158

152159
unsigned correct_nthreads = min(cfg::get().correct_nthreads, cfg::get().general_max_nthreads);
153160
size_t read_buffer_size = correct_nthreads * cfg::get().correct_readbuffer;
154161
std::vector<Read> l(read_buffer_size);
155162
std::vector<Read> r(read_buffer_size);
163+
std::vector<Read> a(read_buffer_size);
156164
std::vector<bool> left_res(read_buffer_size, false);
157165
std::vector<bool> right_res(read_buffer_size, false);
158166

159167
unsigned buffer_no = 0;
160168

161169
ireadstream irsl(fnamel, qvoffset), irsr(fnamer, qvoffset);
162170
VERIFY(irsl.is_open()); VERIFY(irsr.is_open());
171+
ireadstream irsa(fnamea, qvoffset);
172+
if (has_aux) {
173+
VERIFY(irsa.is_open());
174+
}
163175
CorrectionStats stats;
164176

165177
while (!irsl.eof() && !irsr.eof()) {
@@ -168,6 +180,9 @@ CorrectionStats CorrectPairedReadFiles(const KMerData &data,
168180
irsl >> l[buf_size]; irsr >> r[buf_size];
169181
l[buf_size].trimNsAndBadQuality(trim_quality);
170182
r[buf_size].trimNsAndBadQuality(trim_quality);
183+
if (has_aux) {
184+
irsa >> a[buf_size];
185+
}
171186
}
172187
INFO("Prepared batch " << buffer_no << " of " << buf_size << " reads.");
173188

@@ -181,6 +196,9 @@ CorrectionStats CorrectPairedReadFiles(const KMerData &data,
181196
if (left_res[i] && right_res[i]) {
182197
l[i].print(*ofcorl, qvoffset);
183198
r[i].print(*ofcorr, qvoffset);
199+
if (has_aux) {
200+
a[i].print(*ofcora, qvoffset);
201+
}
184202
} else {
185203
l[i].print(*(left_res[i] ? ofunp : ofbadl), qvoffset);
186204
r[i].print(*(right_res[i] ? ofunp : ofbadr), qvoffset);
@@ -234,6 +252,7 @@ size_t CorrectAllReads() {
234252
outlib.clear();
235253

236254
size_t iread = 0;
255+
auto aux_iter = lib.aux_begin();
237256
for (auto I = lib.paired_begin(), E = lib.paired_end(); I != E; ++I, ++iread) {
238257
INFO("Correcting pair of reads: " << I->first << " and " << I->second);
239258
std::filesystem::path usuffix = std::to_string(ilib) + "_" +
@@ -253,11 +272,22 @@ size_t CorrectAllReads() {
253272
std::ios::out | std::ios::ate);
254273
std::ofstream ofunp (outcoru);
255274

275+
std::filesystem::path aux_path = getLargestPrefix(I->first, I->second) + "_mock.fastq";
276+
if (lib.has_aux()) {
277+
aux_path = *aux_iter;
278+
++aux_iter;
279+
}
280+
std::filesystem::path outcora = getReadsFilename(cfg::get().output_dir, aux_path, Globals::iteration_no, usuffix);
281+
std::ofstream ofcora(outcora);
282+
256283
stats += CorrectPairedReadFiles(*Globals::kmer_data,
257-
I->first, I->second,
258-
&ofbadl, &ofcorl, &ofbadr, &ofcorr, &ofunp);
284+
I->first, I->second, aux_path,
285+
&ofbadl, &ofcorl, &ofbadr, &ofcorr, &ofunp, &ofcora, lib.has_aux());
259286
outlib.push_back_paired(outcorl, outcorr);
260287
outlib.push_back_single(outcoru);
288+
if (lib.has_aux()) {
289+
outlib.push_back_aux(outcora);
290+
}
261291
}
262292

263293
for (auto I = lib.merged_begin(), E = lib.merged_end(); I != E; ++I, ++iread) {
@@ -269,7 +299,6 @@ size_t CorrectAllReads() {
269299
INFO("Correcting single reads: " << *I);
270300
outlib.push_back_single(CorrectSingleReadSet(ilib, iread, *I, stats));
271301
}
272-
273302
outdataset.push_back(outlib);
274303
ilib += 1;
275304
}
Binary file not shown.
0 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)