@@ -25,7 +25,9 @@ import (
2525 "io"
2626 "os"
2727 "path/filepath"
28+ "regexp"
2829 "runtime"
30+ "strconv"
2931 "strings"
3032 "sync"
3133
@@ -113,6 +115,8 @@ If you want to cut a sequence into multiple segments.
113115
114116 extension := getFlagString (cmd , "extension" )
115117
118+ reRead := regexp .MustCompile (`\{read\}` )
119+
116120 prefixBySize := getFlagString (cmd , "by-size-prefix" )
117121 prefixByPart := getFlagString (cmd , "by-part-prefix" )
118122 prefixByLength := getFlagString (cmd , "by-length-prefix" )
@@ -134,6 +138,7 @@ If you want to cut a sequence into multiple segments.
134138 }
135139
136140 var source string
141+ var pairedEnd bool
137142 if read1 == "" {
138143 if read2 == "" {
139144 // single end from file or stdin
@@ -165,6 +170,17 @@ If you want to cut a sequence into multiple segments.
165170 }
166171 files = []string {read1 , read2 }
167172 source = read1 + " and " + read2
173+
174+ pairedEnd = true
175+ if prefixBySizeSet && ! reRead .MatchString (prefixBySize ) {
176+ checkError (fmt .Errorf (`--by-size-prefix should contains the placeholder "{read}" when paired-end files are given, such as "sample_{read}.fq.gz` ))
177+ }
178+ if prefixByPartSet && ! reRead .MatchString (prefixByPart ) {
179+ checkError (fmt .Errorf (`--by-part-prefix should contains the placeholder "{read}" when paired-end files are given, such as "sample_{read}.fq.gz` ))
180+ }
181+ if prefixByLengthSet && ! reRead .MatchString (prefixByLength ) {
182+ checkError (fmt .Errorf (`--by-size-prefix should contains the placeholder "{read}" when paired-end files are given, such as "sample_{read}.fq.gz` ))
183+ }
168184 }
169185 }
170186
@@ -180,7 +196,7 @@ If you want to cut a sequence into multiple segments.
180196 }
181197
182198 var wg sync.WaitGroup
183- for _ , file := range files {
199+ for i , file := range files {
184200 isstdin := isStdin (file )
185201 var fileName , fileExt , fileExt2 string
186202 if isstdin {
@@ -226,7 +242,7 @@ If you want to cut a sequence into multiple segments.
226242 }
227243
228244 wg .Add (1 )
229- go func (file string ) {
245+ go func (file string , pairedEnd bool , r int ) {
230246 defer wg .Done ()
231247
232248 renameFileExt := true
@@ -299,6 +315,9 @@ If you want to cut a sequence into multiple segments.
299315
300316 if prefixBySizeSet {
301317 prefix = prefixBySize
318+ if pairedEnd {
319+ prefix = reRead .ReplaceAllString (prefix , strconv .Itoa (r ))
320+ }
302321 } else {
303322 prefix = fmt .Sprintf ("%s.part_" , filepath .Base (fileName ))
304323 }
@@ -315,6 +334,9 @@ If you want to cut a sequence into multiple segments.
315334 var outfh2 * xopen.Writer
316335 if prefixByLengthSet {
317336 prefix = prefixByLength
337+ if pairedEnd {
338+ prefix = reRead .ReplaceAllString (prefix , strconv .Itoa (r ))
339+ }
318340 } else {
319341 prefix = fmt .Sprintf ("%s.part_" , filepath .Base (fileName ))
320342 }
@@ -342,6 +364,9 @@ If you want to cut a sequence into multiple segments.
342364
343365 if prefixByLengthSet {
344366 prefix = prefixByLength
367+ if pairedEnd {
368+ prefix = reRead .ReplaceAllString (prefix , strconv .Itoa (r ))
369+ }
345370 } else {
346371 prefix = fmt .Sprintf ("%s.part_" , filepath .Base (fileName ))
347372 }
@@ -365,6 +390,9 @@ If you want to cut a sequence into multiple segments.
365390 if outfhPre == nil {
366391 if prefixBySizeSet {
367392 prefix = prefixBySize
393+ if pairedEnd {
394+ prefix = reRead .ReplaceAllString (prefix , strconv .Itoa (r ))
395+ }
368396 } else {
369397 prefix = fmt .Sprintf ("%s.part_" , filepath .Base (fileName ))
370398 }
@@ -390,6 +418,9 @@ If you want to cut a sequence into multiple segments.
390418 var outfh2 * xopen.Writer
391419 if prefixByPartSet {
392420 prefix = prefixByPart
421+ if pairedEnd {
422+ prefix = reRead .ReplaceAllString (prefix , strconv .Itoa (r ))
423+ }
393424 } else {
394425 prefix = fmt .Sprintf ("%s.part_" , filepath .Base (fileName ))
395426 }
@@ -437,7 +468,7 @@ If you want to cut a sequence into multiple segments.
437468 }
438469 }
439470
440- }(file )
471+ }(file , pairedEnd , i + 1 )
441472 }
442473
443474 wg .Wait ()
@@ -455,9 +486,9 @@ func init() {
455486 split2Cmd .Flags ().StringP ("out-dir" , "O" , "" , "output directory (default value is $infile.split)" )
456487 split2Cmd .Flags ().BoolP ("force" , "f" , false , "overwrite output directory" )
457488
458- split2Cmd .Flags ().StringP ("by-size-prefix" , "" , "" , " file prefix for --by-size" )
459- split2Cmd .Flags ().StringP ("by-part-prefix" , "" , "" , " file prefix for --by-part" )
460- split2Cmd .Flags ().StringP ("by-length-prefix" , "" , "" , " file prefix for --by-length" )
489+ split2Cmd .Flags ().StringP ("by-size-prefix" , "" , "" , ` file prefix for --by-size. The placeholder "{read}" is needed for paired-end files.` )
490+ split2Cmd .Flags ().StringP ("by-part-prefix" , "" , "" , ` file prefix for --by-part. The placeholder "{read}" is needed for paired-end files.` )
491+ split2Cmd .Flags ().StringP ("by-length-prefix" , "" , "" , ` file prefix for --by-length. The placeholder "{read}" is needed for paired-end files.` )
461492
462493 split2Cmd .Flags ().StringP ("extension" , "e" , "" , `set output file extension, e.g., ".gz", ".xz", or ".zst"` )
463494}
0 commit comments