sc2 subtype handling

mandysulli · mandysulli · commit 29ed12a037b4 · 2025-09-03T14:51:15.000-04:00
diff --git a/src/processes/prepare_mira_reports.rs b/src/processes/prepare_mira_reports.rs
@@ -148,7 +148,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
     let vtype_data = create_vtype_data(&read_data);
     let allele_data = allele_data_collection(&args.irma_path)?;
     let indel_data = indels_data_collection(&args.irma_path)?;
-    //let seq_data = amended_consensus_data_collection(&args.irma_path, virus);
+    let seq_data = amended_consensus_data_collection(&args.irma_path, &args.virus);
     let ref_lengths = match get_reference_lens(&args.irma_path) {
         Ok(data) => data,
         Err(e) => {
@@ -172,7 +172,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
         dais_ref_data = dais_ref_seq_data_collection(&args.workdir_path, "sc2")?;
     }
     //TODO: remove print statements at end
-    //println!("{vtype_data:?}");
+    println!("{vtype_data:?}");
     //println!("{qc_config:?}")
     //println!("cov data: {coverage_data:?}");
     //println!("Allele data: {allele_data:?}");
@@ -218,8 +218,17 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
             process_position_coverage_data(&coverage_data, &ref_lengths, 21563, 25384);
     }
 
-    let subtype_data = extract_subtype_flu(&dais_vars_data)?;
-    let irma_summary = create_irma_summary(
+    //Gather subtype information
+    //todo: add rsv handling
+    let mut subtype_data: Vec<Subtype> = Vec::new();
+    if args.virus.to_lowercase() == "flu" {
+        subtype_data = extract_subtype_flu(&dais_vars_data)?;
+    } else if args.virus.to_lowercase() == "sc2-wgs" || args.virus.to_lowercase() == "sc2-spike" {
+        subtype_data = extract_subtype_sc2(&dais_vars_data)?;
+    }
+    //Build prelim irma summary "dataframe"
+    //More will be added and analyzed before final irma summary created
+    let irma_summary = create_prelim_irma_summary_df(
         &sample_list,
         &melted_reads_df,
         &calculated_cov_df,
@@ -228,11 +237,12 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
         &subtype_data,
     )?;
 
+    //todo:remove before end
     //println!("{dais_vars_data:?}");
     //println!("{melted_reads_df:?}");
     //println!("{calculated_cov_df:?}");
     //println!("{calculated_position_cov_df:?}");
-    println!("{irma_summary:?}");
+    //println!("{irma_summary:?}");
 
     /////////////////////////////////////////////////////////////////////////////
     /////////////// Write the structs to JSON files and CSV files ///////////////
diff --git a/src/utils/data_processing.rs b/src/utils/data_processing.rs
@@ -47,7 +47,7 @@ pub struct ProcessedCoverage {
     pub percent_reference_covered: Option<f64>,
 }
 
-/// vtype struct
+/// IRMA struct
 #[derive(Serialize, Debug, Clone)]
 pub struct IRMASummary {
     pub sample_id: Option<String>,
@@ -525,6 +525,20 @@ pub fn extract_subtype_flu(dais_vars: &Vec<DaisVarsData>) -> Result<Vec<Subtype>
     Ok(subtype_data)
 }
 
+pub fn extract_subtype_sc2(dais_vars: &Vec<DaisVarsData>) -> Result<Vec<Subtype>, Box<dyn Error>> {
+    let mut subtype_data: Vec<Subtype> = Vec::new();
+
+    for entry in dais_vars {
+        println!("{}", entry.reference_id);
+        subtype_data.push(Subtype {
+            sample_id: entry.sample_id.clone(),
+            subtype: entry.reference_id.clone(),
+        })
+    }
+
+    Ok(subtype_data)
+}
+
 //////////////// Functions used to create irma_summary ///////////////
 /// Flip orientation of the reads structs
 pub fn melt_reads_data(records: &Vec<ReadsData>) -> Vec<MeltedRecord> {
@@ -776,7 +790,7 @@ pub fn count_minority_indels(data: &Vec<IndelsData>) -> Vec<VariantCountData> {
 }
 
 /// Combine all df to create IRMA summary
-pub fn create_irma_summary(
+pub fn create_prelim_irma_summary_df(
     sample_list: &Vec<String>,
     reads_count_df: &Vec<MeltedRecord>,
     calc_cov_df: &Vec<ProcessedCoverage>,