@@ -93,6 +93,7 @@ impl ReadSet {
93
93
const SPACE : u8 = b' ' ;
94
94
const COLON : u8 = b':' ;
95
95
const PLUS : u8 = b'+' ;
96
+ const READ_NUMBERS : & [ u8 ] = b"12345678" ;
96
97
97
98
/// Produces an iterator over references to the template segments stored in this ``ReadSet``.
98
99
fn template_segments ( & self ) -> SegmentIter {
@@ -213,7 +214,12 @@ impl ReadSet {
213
214
None => {
214
215
// If no pre-existing comment, assume the read is a passing filter, non-control
215
216
// read and generate a comment for it (sample barcode is added below).
216
- write ! ( writer, "{}:N:0:" , read_num) ?;
217
+ if read_num < Self :: READ_NUMBERS . len ( ) {
218
+ writer. write_all ( & [ Self :: READ_NUMBERS [ read_num - 1 ] ] ) ?;
219
+ write ! ( writer, ":N:0:" ) ?;
220
+ } else {
221
+ write ! ( writer, "{}:N:0:" , read_num) ?;
222
+ }
217
223
}
218
224
Some ( chars) => {
219
225
// Else check it's a 4-part name... fix the read number at the front and
@@ -239,7 +245,11 @@ impl ReadSet {
239
245
& chars[ first_colon_idx + 1 ..chars. len ( ) ]
240
246
} ;
241
247
242
- write ! ( writer, "{}:" , read_num) ?;
248
+ if read_num < Self :: READ_NUMBERS . len ( ) {
249
+ writer. write_all ( & [ Self :: READ_NUMBERS [ read_num - 1 ] , b':' ] ) ?;
250
+ } else {
251
+ write ! ( writer, "{}:" , read_num) ?;
252
+ }
243
253
writer. write_all ( remainder) ?;
244
254
245
255
if * remainder. last ( ) . unwrap ( ) != Self :: COLON {
@@ -518,13 +528,13 @@ impl DemuxMetric {
518
528
/// default). Similarly, the sample barcode bases from the given read will be placed in the `BC`
519
529
/// tag.
520
530
///
521
- /// Metadata about the samples should be given as a headered metadata TSV file with at least the
531
+ /// Metadata about the samples should be given as a headered metadata TSV file with at least the
522
532
/// following two columns present:
523
- ///
524
- /// 1. `sample_id` - the id of the sample or library.
533
+ ///
534
+ /// 1. `sample_id` - the id of the sample or library.
525
535
/// 2. `barcode` - the expected barcode sequence associated with the `sample_id`.
526
- ///
527
- /// For reads containing multiple barcodes (such as dual-indexed reads), all barcodes should be
536
+ ///
537
+ /// For reads containing multiple barcodes (such as dual-indexed reads), all barcodes should be
528
538
/// concatenated together in the order they are read and stored in the `barcode` field.
529
539
///
530
540
/// The read structures will be used to extract the observed sample barcode, template bases, and
@@ -536,6 +546,7 @@ impl DemuxMetric {
536
546
/// mismatches (see `--max-mismatches`).
537
547
/// 2. The difference between number of mismatches in the best and second best barcodes is greater
538
548
/// than or equal to the minimum mismatch delta (`--min-mismatch-delta`).
549
+ ///
539
550
/// The expected barcode sequence may contains Ns, which are not counted as mismatches regardless
540
551
/// of the observed base (e.g. the expected barcode `AAN` will have zero mismatches relative to
541
552
/// both the observed barcodes `AAA` and `AAN`).
@@ -612,7 +623,7 @@ pub(crate) struct Demux {
612
623
#[ clap( long, short = 'd' , default_value = "2" ) ]
613
624
min_mismatch_delta : usize ,
614
625
615
- /// The number of threads to use. Cannot be less than 3 .
626
+ /// The number of threads to use. Cannot be less than 5 .
616
627
#[ clap( long, short = 't' , default_value = "8" ) ]
617
628
threads : usize ,
618
629
@@ -659,9 +670,12 @@ impl Demux {
659
670
read_structures. iter ( ) . map ( |s| s. segments_by_type ( * output_type) . count ( ) ) . sum ( ) ;
660
671
661
672
for idx in 1 ..=segment_count {
662
- output_type_writers. push ( BufWriter :: new ( File :: create (
663
- output_dir. join ( format ! ( "{}.{}{}.fq.gz" , prefix, file_type_code, idx) ) ,
664
- ) ?) ) ;
673
+ output_type_writers. push ( BufWriter :: with_capacity (
674
+ 65_536usize ,
675
+ File :: create (
676
+ output_dir. join ( format ! ( "{}.{}{}.fq.gz" , prefix, file_type_code, idx) ) ,
677
+ ) ?,
678
+ ) ) ;
665
679
}
666
680
667
681
match output_type {
@@ -741,7 +755,7 @@ impl Demux {
741
755
let mut new_sample_barcode_writers = None ;
742
756
let mut new_molecular_barcode_writers = None ;
743
757
744
- for ( optional_ws, target) in vec ! [
758
+ for ( optional_ws, target) in [
745
759
( template_writers, & mut new_template_writers) ,
746
760
( barcode_writers, & mut new_sample_barcode_writers) ,
747
761
( mol_writers, & mut new_molecular_barcode_writers) ,
@@ -894,7 +908,7 @@ impl Command for Demux {
894
908
) ;
895
909
896
910
let mut fq_iterators = fq_sources
897
- . zip ( self . read_structures . clone ( ) . into_iter ( ) )
911
+ . zip ( self . read_structures . clone ( ) )
898
912
. map ( |( source, read_structure) | {
899
913
ReadSetIterator :: new ( read_structure, source, self . skip_reasons . clone ( ) )
900
914
. read_ahead ( 1000 , 1000 )
@@ -1181,6 +1195,7 @@ mod tests {
1181
1195
skip_reasons : vec ! [ ] ,
1182
1196
} ;
1183
1197
let demux_result = demux_inputs. execute ( ) ;
1198
+ #[ allow( clippy:: permissions_set_readonly_false) ]
1184
1199
permissions. set_readonly ( false ) ;
1185
1200
fs:: set_permissions ( tmp. path ( ) , permissions) . unwrap ( ) ;
1186
1201
demux_result. unwrap ( ) ;
@@ -1875,11 +1890,11 @@ mod tests {
1875
1890
let read_structures =
1876
1891
vec ! [ ReadStructure :: from_str( "+T" ) . unwrap( ) , ReadStructure :: from_str( "7B" ) . unwrap( ) ] ;
1877
1892
1878
- let records = vec ! [
1893
+ let records = [
1879
1894
vec ! [ "AAAAAAA" , & SAMPLE1_BARCODE [ 0 ..7 ] ] , // barcode too short
1880
1895
vec ! [ "CCCCCCC" , SAMPLE1_BARCODE ] , // barcode the correct length
1881
1896
vec ! [ "" , SAMPLE1_BARCODE ] , // template basese too short
1882
- vec![ "G" , SAMPLE1_BARCODE ] , // barcode the correct length
1897
+ vec ! [ "G" , SAMPLE1_BARCODE ] ,
1883
1898
] ;
1884
1899
1885
1900
let input_files = vec ! [
@@ -1911,11 +1926,11 @@ mod tests {
1911
1926
let read_structures =
1912
1927
vec ! [ ReadStructure :: from_str( "+T" ) . unwrap( ) , ReadStructure :: from_str( "7B" ) . unwrap( ) ] ;
1913
1928
1914
- let records = vec ! [
1929
+ let records = [
1915
1930
vec ! [ "AAAAAAA" , & SAMPLE1_BARCODE [ 0 ..7 ] ] , // barcode too short
1916
1931
vec ! [ "CCCCCCC" , SAMPLE1_BARCODE ] , // barcode the correct length
1917
1932
vec ! [ "" , SAMPLE1_BARCODE ] , // template basese too short
1918
- vec![ "G" , SAMPLE1_BARCODE ] , // barcode the correct length
1933
+ vec ! [ "G" , SAMPLE1_BARCODE ] ,
1919
1934
] ;
1920
1935
1921
1936
let input_files = vec ! [
0 commit comments