@@ -2,7 +2,10 @@ use crate::utils::data_ingest;
22use either:: Either ;
33use glob:: glob;
44use serde:: { self , Deserialize , Serialize , de:: DeserializeOwned } ;
5- use std:: { collections:: HashSet , error:: Error } ;
5+ use std:: {
6+ collections:: { HashMap , HashSet } ,
7+ error:: Error ,
8+ } ;
69
710use super :: data_ingest:: DaisSeqData ;
811
@@ -26,6 +29,46 @@ pub fn append_with_comma(base: &mut String, new_entry: &str) {
2629 }
2730}
2831
32+ // Function to process reference names and generate segments, segset, and segcolor
33+ pub fn return_seg_data (
34+ reference_names : Vec < String > ,
35+ ) -> ( Vec < String > , Vec < String > , HashMap < String , & ' static str > ) {
36+ let mut segments: Vec < String > = reference_names. into_iter ( ) . collect ( ) ;
37+ segments. sort ( ) ;
38+ segments. dedup ( ) ;
39+
40+ let color_palette = vec ! [
41+ "#1f77b4" , "#ff7f0e" , "#2ca02c" , "#d62728" , "#9467bd" , "#8c564b" , "#e377c2" , "#7f7f7f" ,
42+ "#bcbd22" , "#17becf" ,
43+ ] ;
44+
45+ let mut segset: Vec < String > = Vec :: new ( ) ;
46+ for segment in & segments {
47+ let parts: Vec < & str > = segment. split ( '_' ) . collect ( ) ;
48+ if parts. len ( ) > 1 {
49+ segset. push ( parts[ 1 ] . to_string ( ) ) ;
50+ } else {
51+ segset. push ( segment. clone ( ) ) ;
52+ }
53+ }
54+
55+ let segset: Vec < String > = segset
56+ . into_iter ( )
57+ . collect :: < std:: collections:: HashSet < _ > > ( )
58+ . into_iter ( )
59+ . collect ( ) ;
60+
61+ let mut segcolor: HashMap < String , & str > = HashMap :: new ( ) ;
62+ for ( i, seg) in segset. iter ( ) . enumerate ( ) {
63+ if i < color_palette. len ( ) {
64+ segcolor. insert ( seg. clone ( ) , color_palette[ i] ) ;
65+ }
66+ }
67+
68+ ( segments, segset, segcolor)
69+ }
70+
71+ // Function to calculate the aa variants - this is specifically for flu right now
2972pub fn compute_dais_variants (
3073 ref_seqs_data : & Vec < DaisSeqData > ,
3174 sample_seqs_data : & Vec < DaisSeqData > ,
0 commit comments