Skip to content

Commit 3815cc1

Browse files
committed
feat: split large FASTA records into chunks for parallel processing
Adds a --chunk flag to control fragment size passed to workers, improving load distribution and memory efficiency.
1 parent f9b5ad0 commit 3815cc1

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

cmd/radigest/main.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ func main() {
2424
maxLen := flag.Int("max", 1<<30, "maximum fragment length")
2525
gffPath := flag.String("gff", "fragments.gff3", "output GFF3")
2626
jsonPath := flag.String("json", "", "write run summary JSON")
27+
chunkSz := flag.Int("chunk", 8<<20, "chunk size (bp) sent to each worker")
2728
threads := flag.Int("threads", runtime.NumCPU(), "worker goroutines")
2829
flag.Parse()
2930

@@ -74,6 +75,18 @@ func main() {
7475
}
7576
}()
7677

78+
for rec := range faCh {
79+
// split sequence into windows of *chunkSz bases
80+
for from := 0; from < len(rec.Seq); from += *chunkSz {
81+
to := from + *chunkSz
82+
if to > len(rec.Seq) { to = len(rec.Seq) }
83+
jobs <- fasta.Record{
84+
ID: rec.ID,
85+
Seq: rec.Seq[from:to],
86+
}
87+
}
88+
}
89+
7790
for rec := range faCh {
7891
jobs <- rec
7992
}

radigest

13.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)