11package  main
22
33import  (
4+ 	"encoding/json" 
45	"flag" 
56	"fmt" 
67	"log" 
8+ 	"os" 
79	"runtime" 
10+ 	"sort" 
811	"strings" 
912	"sync" 
10- 	"encoding/json" 
11- 	"os" 
1213
1314	"radigest/internal/collector" 
1415	"radigest/internal/digest" 
1516	"radigest/internal/enzyme" 
1617	"radigest/internal/fasta" 
1718)
1819
20+ var  (
21+ 	version  =  "dev" 
22+ 	commit   =  "none" 
23+ 	date     =  "unknown" 
24+ )
25+ 
26+ func  produceChunks (faCh  <- chan  fasta.Record , jobs  chan <-  fasta.Record , chunkSz  int ) {
27+ 	defer  close (jobs )
28+ 	for  rec  :=  range  faCh  {
29+ 		seq  :=  rec .Seq 
30+ 		n  :=  len (seq )
31+ 		for  from  :=  0 ; from  <  n ; from  +=  chunkSz  {
32+ 			to  :=  from  +  chunkSz 
33+ 			if  to  >  n  {
34+ 				to  =  n 
35+ 			}
36+ 			jobs  <-  fasta.Record {
37+ 				ID :  rec .ID ,
38+ 				Seq : seq [from :to ],
39+ 			}
40+ 		}
41+ 	}
42+ }
43+ 
1944func  main () {
2045	// ---- CLI flags ---------------------------------------------------------- 
2146	fastaPath  :=  flag .String ("fasta" , "" , "reference FASTA file (required)" )
22- 	enzFlag    :=  flag .String ("enzymes" , "" , "comma-separated enzyme names (≥2, first two form the AB pair)" )
23- 	minLen     :=  flag .Int ("min" , 0 , "minimum fragment length" )
24- 	maxLen     :=  flag .Int ("max" , 1 << 30 , "maximum fragment length" )
25- 	gffPath    :=  flag .String ("gff" ,  "fragments.gff3" , "output GFF3" )
26- 	jsonPath   :=  flag .String ("json" , "" , "write run summary JSON" )
27- 	chunkSz    :=  flag .Int ("chunk" , 8 << 20 , "chunk size (bp) sent to each worker" )
28- 	threads    :=  flag .Int ("threads" , runtime .NumCPU (), "worker goroutines" )
47+ 	enzFlag  :=  flag .String ("enzymes" , "" , "comma-separated enzyme names (≥2, first two form the AB pair)" )
48+ 	minLen  :=  flag .Int ("min" , 0 , "minimum fragment length (bp)" )
49+ 	maxLen  :=  flag .Int ("max" , 1 << 30 , "maximum fragment length (bp)" )
50+ 	gffPath  :=  flag .String ("gff" , "fragments.gff3" , "output GFF3 file" )
51+ 	jsonPath  :=  flag .String ("json" , "" , "optional: write run summary JSON here" )
52+ 	chunkSz  :=  flag .Int ("chunk" , 8 << 20 , "chunk size (bp) sent to each worker" )
53+ 	threads  :=  flag .Int ("threads" , runtime .NumCPU (), "number of worker goroutines" )
54+ 	showVer  :=  flag .Bool ("version" , false , "print version and exit" )
55+ 	listEns  :=  flag .Bool ("list-enzymes" , false , "list available enzyme names and exit" )
56+ 
57+ 	flag .Usage  =  func () {
58+ 		b  :=  & strings.Builder {}
59+ 		fmt .Fprintln (b , "radigest — in-silico double-digest and GFF3 fragment export" )
60+ 		fmt .Fprintln (b )
61+ 		fmt .Fprintln (b , "Usage:" )
62+ 		fmt .Fprintln (b , "  radigest -fasta <ref.fa> -enzymes <E1,E2[,E3...]> [options]" )
63+ 		fmt .Fprintln (b )
64+ 		fmt .Fprintln (b , "Required flags:" )
65+ 		fmt .Fprintln (b , "  -fasta, -enzymes" )
66+ 		fmt .Fprintln (b )
67+ 		fmt .Fprintln (b , "Options:" )
68+ 		flag .CommandLine .SetOutput (b )
69+ 		flag .PrintDefaults ()
70+ 		flag .CommandLine .SetOutput (os .Stderr )
71+ 		fmt .Fprintln (b )
72+ 		fmt .Fprintln (b , "Examples:" )
73+ 		fmt .Fprintln (b , "  # Basic EcoRI/MseI digest to GFF3" )
74+ 		fmt .Fprintln (b , "  radigest -fasta ref.fa -enzymes EcoRI,MseI -gff out.gff3" )
75+ 		fmt .Fprintln (b , "  # Restrict fragment size and emit JSON summary" )
76+ 		fmt .Fprintln (b , "  radigest -fasta ref.fa -enzymes EcoRI,MseI -min 100 -max 800 -json run.json" )
77+ 		fmt .Fprintln (b , "  # See supported enzymes" )
78+ 		fmt .Fprintln (b , "  radigest -list-enzymes" )
79+ 		fmt .Fprintln (os .Stderr , b .String ())
80+ 	}
81+ 
2982	flag .Parse ()
3083
84+ 	if  * showVer  {
85+ 		fmt .Printf ("radigest %s (commit %s, %s)\n " , version , commit , date )
86+ 		return 
87+ 	}
88+ 	if  * listEns  {
89+ 		names  :=  make ([]string , 0 , len (enzyme .DB ))
90+ 		for  name  :=  range  enzyme .DB  {
91+ 			names  =  append (names , name )
92+ 		}
93+ 		sort .Strings (names )
94+ 		for  _ , n  :=  range  names  {
95+ 			fmt .Println (n )
96+ 		}
97+ 		return 
98+ 	}
3199	if  * fastaPath  ==  ""  ||  * enzFlag  ==  ""  {
32- 		log .Fatal ("flags --fasta and --enzymes are required" )
100+ 		fmt .Fprintln (os .Stderr , "error: flags -fasta and -enzymes are required\n " )
101+ 		flag .Usage ()
102+ 		os .Exit (2 )
33103	}
34104
35105	// ---- build enzyme slice ------------------------------------------------- 
@@ -68,53 +138,44 @@ func main() {
68138	}
69139
70140	// ---- stream FASTA into jobs -------------------------------------------- 
71- 	faCh  :=  make (chan  fasta.Record ,  2 )
141+ 	faCh  :=  make (chan  fasta.Record )
72142	go  func () {
73143		if  err  :=  fasta .Stream (* fastaPath , faCh ); err  !=  nil  {
74144			log .Fatalf ("fasta stream: %v" , err )
75145		}
146+ 		// NOTE: assume fasta.Stream closes faCh when it returns. 
76147	}()
77148
78- 	for  rec  :=  range  faCh  {
79- 		// split sequence into windows of *chunkSz bases 
80- 		for  from  :=  0 ; from  <  len (rec .Seq ); from  +=  * chunkSz  {
81- 			to  :=  from  +  * chunkSz 
82- 			if  to  >  len (rec .Seq ) { to  =  len (rec .Seq ) }
83- 			jobs  <-  fasta.Record {
84- 				ID :  rec .ID ,
85- 				Seq : rec .Seq [from :to ],
86- 			}
87- 		}
88- 	}
89- 	
90- 	for  rec  :=  range  faCh  {
91- 		jobs  <-  rec 
92- 	}
93- 	close (jobs ) // no more work 
94- 	wg .Wait ()   // workers done 
95- 	close (cIn )  // tell collector to finish 
149+ 	// single consumer / producer path 
150+ 	go  produceChunks (faCh , jobs , * chunkSz )
151+ 
152+ 	// wait for workers, finish collector 
153+ 	wg .Wait ()  // jobs closed by produceChunks 
154+ 	close (cIn ) // tell collector to finish 
96155
97156	// ---- summary ------------------------------------------------------------ 
98157	stats  :=  <- done 
99158	fmt .Printf ("Fragments kept: %d\n Bases covered: %d\n Chromosomes: %d\n " ,
100- 	     stats .TotalFragments , stats .TotalBases , len (stats .PerChr ))	 
159+ 		 stats .TotalFragments , stats .TotalBases , len (stats .PerChr ))
101160	if  * jsonPath  !=  ""  {
102- 	    out  :=  struct  {
103- 	        Enzymes      []string                `json:"enzymes"` 
104- 	        MinLength    int                     `json:"min_length"` 
105- 	        MaxLength    int                     `json:"max_length"` 
106- 	        collector.Stats 
107- 	    }{
108- 	        Enzymes :   strings .Split (* enzFlag , "," ),
109- 	        MinLength : * minLen ,
110- 	        MaxLength : * maxLen ,
111- 	        Stats :     stats ,
112- 	    }
113- 	    f , err  :=  os .Create (* jsonPath )
114- 	    if  err  !=  nil  { log .Fatalf ("write json: %v" , err ) }
115- 	    if  err  :=  json .NewEncoder (f ).Encode (out ); err  !=  nil  {
116- 	        log .Fatalf ("encode json: %v" , err )
117- 	    }
118- 	    f .Close ()
161+ 		out  :=  struct  {
162+ 			Enzymes    []string  `json:"enzymes"` 
163+ 			MinLength  int       `json:"min_length"` 
164+ 			MaxLength  int       `json:"max_length"` 
165+ 			collector.Stats 
166+ 		}{
167+ 			Enzymes :   strings .Split (* enzFlag , "," ),
168+ 			MinLength : * minLen ,
169+ 			MaxLength : * maxLen ,
170+ 			Stats :     stats ,
171+ 		}
172+ 		f , err  :=  os .Create (* jsonPath )
173+ 		if  err  !=  nil  {
174+ 			log .Fatalf ("write json: %v" , err )
175+ 		}
176+ 		if  err  :=  json .NewEncoder (f ).Encode (out ); err  !=  nil  {
177+ 			log .Fatalf ("encode json: %v" , err )
178+ 		}
179+ 		_  =  f .Close ()
119180	}
120- }
181+ }
0 commit comments