Skip to content

Commit 08ccdb6

Browse files
committed
Fix fast eulertigs bug
1 parent bcb8698 commit 08ccdb6

File tree

5 files changed

+66
-21
lines changed

5 files changed

+66
-21
lines changed

crates/assembler/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -427,20 +427,24 @@ pub fn run_assembler<
427427
let (reorganized_reads, _final_unitigs_bucket) = if step
428428
<= AssemblerStartingStep::ReorganizeReads
429429
{
430-
if generate_maximal_unitigs_links || compute_tigs_mode.needs_matchtigs_library() {
430+
if generate_maximal_unitigs_links || compute_tigs_mode.needs_temporary_tigs() {
431431
reorganize_reads::<MergingHash, AssemblerColorsManager, StructSeqBinaryWriter<_, _>>(
432+
k,
432433
sequences,
433434
reads_map,
434435
temp_dir.as_path(),
435436
compressed_temp_unitigs_file.as_ref().unwrap(),
437+
circular_temp_unitigs_file.as_ref(),
436438
buckets_count,
437439
)
438440
} else {
439441
reorganize_reads::<MergingHash, AssemblerColorsManager, OutputMode::Backend<_, _>>(
442+
k,
440443
sequences,
441444
reads_map,
442445
temp_dir.as_path(),
443446
&final_unitigs_file,
447+
None,
444448
buckets_count,
445449
)
446450
}

crates/assembler/src/pipeline/compute_matchtigs.rs

+8
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ pub enum MatchtigMode {
317317

318318
pub trait MatchtigHelperTrait {
319319
fn needs_simplitigs(&self) -> bool;
320+
fn needs_temporary_tigs(&self) -> bool;
320321
fn needs_matchtigs_library(&self) -> bool;
321322
fn get_matchtigs_mode(&self) -> Self;
322323
}
@@ -326,6 +327,13 @@ impl MatchtigHelperTrait for Option<MatchtigMode> {
326327
*self == Some(MatchtigMode::FastSimpliTigs) || *self == Some(MatchtigMode::FastEulerTigs)
327328
}
328329

330+
fn needs_temporary_tigs(&self) -> bool {
331+
*self == Some(MatchtigMode::EulerTigs)
332+
|| *self == Some(MatchtigMode::GreedyTigs)
333+
|| *self == Some(MatchtigMode::PathTigs)
334+
|| *self == Some(MatchtigMode::FastEulerTigs)
335+
}
336+
329337
fn needs_matchtigs_library(&self) -> bool {
330338
*self == Some(MatchtigMode::EulerTigs)
331339
|| *self == Some(MatchtigMode::GreedyTigs)

crates/assembler/src/pipeline/eulertigs.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ impl CircularUnitig {
162162
last.rc ^= self.rc;
163163

164164
let end_offset = if !last.rc && !write_full { 0 } else { k - 1 };
165+
let start_offset = if last.rc && !write_full { k - 1 } else { 0 };
165166

166167
let last_part_entry = unitigs.get(&last.orig_index).unwrap();
167168

@@ -176,7 +177,8 @@ impl CircularUnitig {
176177
};
177178
}
178179

179-
let last_part_slice = last.start_pos..last.start_pos + last.length + end_offset;
180+
let last_part_slice =
181+
last.start_pos + start_offset..last.start_pos + last.length + end_offset;
180182
let last_part = last_part_entry
181183
.0
182184
.as_reference(unitigs_kmers)

crates/assembler/src/pipeline/reorganize_reads.rs

+49-18
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,12 @@ pub fn reorganize_reads<
156156
CX: ColorsManager,
157157
BK: StructuredSequenceBackend<PartialUnitigsColorStructure<CX>, ()>,
158158
>(
159+
k: usize,
159160
mut reads: Vec<SingleBucket>,
160161
mut mapping_files: Vec<SingleBucket>,
161162
temp_path: &Path,
162163
out_file: &StructuredSequenceWriter<PartialUnitigsColorStructure<CX>, (), BK>,
164+
circular_out_file: Option<&StructuredSequenceWriter<PartialUnitigsColorStructure<CX>, (), BK>>,
163165
buckets_count: usize,
164166
) -> (Vec<SingleBucket>, PathBuf) {
165167
PHASES_TIMES_MONITOR
@@ -199,10 +201,13 @@ pub fn reorganize_reads<
199201
NoMultiplicity,
200202
>,
201203
>::new(&buckets, buffers.take());
202-
203204
let mut tmp_lonely_unitigs_buffer =
204205
FastaWriterConcurrentBuffer::new(out_file, DEFAULT_OUTPUT_BUFFER_SIZE, true);
205206

207+
let mut tmp_circular_unitigs_buffer = circular_out_file.map(|out_file| {
208+
FastaWriterConcurrentBuffer::new(out_file, DEFAULT_OUTPUT_BUFFER_SIZE, true)
209+
});
210+
206211
let mut mappings = Vec::new();
207212

208213
assert_eq!(read_file.index, mapping_file.index);
@@ -268,23 +273,49 @@ pub fn reorganize_reads<
268273
);
269274
map_index += 1;
270275
} else {
271-
// No mapping, write unitig to file
272-
273-
tmp_lonely_unitigs_buffer.add_read(
274-
seq,
275-
None,
276-
extra_data.colors,
277-
color_buffer,
278-
(),
279-
&(),
280-
#[cfg(feature = "support_kmer_counters")]
281-
SequenceAbundance {
282-
first: extra_data.counters.first,
283-
sum: extra_data.counters.sum,
284-
last: extra_data.counters.last,
285-
},
286-
);
287-
276+
// Loop to allow skipping code parts with break
277+
'skip_writing: loop {
278+
let first_kmer_node = &seq[0..k - 1];
279+
let last_kmer_node = &seq[seq.len() - k + 1..];
280+
if let Some(circular_unitigs_buffer) = &mut tmp_circular_unitigs_buffer {
281+
// Check if unitig is circular
282+
if first_kmer_node == last_kmer_node {
283+
circular_unitigs_buffer.add_read(
284+
seq,
285+
None,
286+
extra_data.colors,
287+
color_buffer,
288+
(),
289+
&(),
290+
#[cfg(feature = "support_kmer_counters")]
291+
SequenceAbundance {
292+
first: extra_data.counters.first,
293+
sum: extra_data.counters.sum,
294+
last: extra_data.counters.last,
295+
},
296+
);
297+
break 'skip_writing;
298+
}
299+
}
300+
301+
// No mapping, write unitig to file
302+
tmp_lonely_unitigs_buffer.add_read(
303+
seq,
304+
None,
305+
extra_data.colors,
306+
color_buffer,
307+
(),
308+
&(),
309+
#[cfg(feature = "support_kmer_counters")]
310+
SequenceAbundance {
311+
first: extra_data.counters.first,
312+
sum: extra_data.counters.sum,
313+
last: extra_data.counters.last,
314+
},
315+
);
316+
317+
break;
318+
}
288319
// write_fasta_entry::<MH, CX, _>(
289320
// &mut fasta_temp_buffer,
290321
// &mut tmp_lonely_unitigs_buffer,

libs-crates/instrumenter-rs

0 commit comments

Comments
 (0)