adding metadata into irma_sumarry

mandysulli · mandysulli · commit aa71572d62cd · 2025-09-04T11:18:14.000-04:00
diff --git a/src/processes/prepare_mira_reports.rs b/src/processes/prepare_mira_reports.rs
@@ -44,9 +44,9 @@ pub struct ReportsArgs {
     /// The file path to the working directory
     workdir_path: PathBuf,
 
-    #[arg(short = 'c', long, default_value = "default config")]
+    #[arg(short = 'c', long, default_value = "default-config")]
     /// the irma config used for IRMA
-    irma_config: Option<String>,
+    irma_config: String,
 }
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
@@ -59,8 +59,11 @@ pub struct SamplesheetI {
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct SamplesheetO {
+    #[serde(rename = "Barcode #")]
     pub barcode: String,
+    #[serde(rename = "Sample ID")]
     pub sample_id: String,
+    #[serde(rename = "Sample Type")]
     pub sample_type: Option<String>,
 }
 
@@ -130,7 +133,8 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
         Samplesheet::Illumina(ref sheet) => collect_sample_id(sheet),
         Samplesheet::ONT(ref sheet) => collect_sample_id(sheet),
     };
-    println!("samples: {sample_list:?}");
+
+    //println!("samples: {sample_list:?}");
     // Get the negative controls from the samplesheet
     let neg_control_list = match samplesheet {
         Samplesheet::Illumina(ref sheet) => collect_negatives(sheet),
@@ -148,6 +152,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
     let vtype_data = create_vtype_data(&read_data);
     let allele_data = allele_data_collection(&args.irma_path)?;
     let indel_data = indels_data_collection(&args.irma_path)?;
+
     let seq_data = amended_consensus_data_collection(&args.irma_path, &args.virus);
     let ref_lengths = match get_reference_lens(&args.irma_path) {
         Ok(data) => data,
@@ -226,6 +231,16 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
     } else if args.virus.to_lowercase() == "sc2-wgs" || args.virus.to_lowercase() == "sc2-spike" {
         subtype_data = extract_subtype_sc2(&dais_vars_data)?;
     }
+
+    //Gather Anlysis Metadata for irma_summary
+    let analysis_metadata = collect_analysis_metadata(
+        &args.workdir_path,
+        &args.platform,
+        &args.virus,
+        &args.irma_config,
+        &args.runid,
+    )?;
+
     //Build prelim irma summary "dataframe"
     //More will be added and analyzed before final irma summary created
     let irma_summary = create_prelim_irma_summary_df(
@@ -235,6 +250,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
         &allele_data,
         &indel_data,
         &subtype_data,
+        analysis_metadata,
     )?;
 
     //todo:remove before end
diff --git a/src/utils/data_processing.rs b/src/utils/data_processing.rs
@@ -2,6 +2,9 @@ use serde::{self, Deserialize, Serialize};
 use std::{
     collections::{HashMap, HashSet},
     error::Error,
+    fs::File,
+    io::{self, BufRead},
+    path::PathBuf,
 };
 
 use crate::processes::prepare_mira_reports::SamplesheetI;
@@ -28,6 +31,14 @@ pub struct Subtype {
     pub subtype: String,
 }
 
+/// Analysis Metadata
+#[derive(Debug)]
+pub struct Metadata {
+    pub module: String,
+    pub runid: String,
+    pub instrument: String,
+}
+
 //Melted Reads df
 #[derive(Debug)]
 pub struct MeltedRecord {
@@ -202,7 +213,6 @@ where
 
     // Skip the first element (header) and iterate over the rest
     for entry in samples.iter() {
-        println!("{}", entry.sample_id());
         sample_list.push(entry.sample_id().clone());
     }
 
@@ -530,7 +540,6 @@ pub fn extract_subtype_sc2(dais_vars: &Vec<DaisVarsData>) -> Result<Vec<Subtype>
     let mut subtype_data: Vec<Subtype> = Vec::new();
 
     for entry in dais_vars {
-        println!("{}", entry.reference_id);
         subtype_data.push(Subtype {
             sample_id: entry.sample_id.clone(),
             subtype: entry.reference_id.clone(),
@@ -790,6 +799,45 @@ pub fn count_minority_indels(data: &Vec<IndelsData>) -> Vec<VariantCountData> {
     result
 }
 
+pub fn collect_analysis_metadata(
+    work_path: &PathBuf,
+    platform: &str,
+    virus: &str,
+    irma_config: &String,
+    input_runid: &str,
+) -> Result<Metadata, Box<dyn Error>> {
+    let mut descript_dict = HashMap::new();
+    let description_path = format!("{}/DESCRIPTION", work_path.display());
+
+    // Open the file for reading
+    let file = File::open(&description_path)?;
+    let reader = io::BufReader::new(file);
+
+    // Read the file line by line
+    for line in reader.lines() {
+        if let Ok(line) = line {
+            let parts: Vec<&str> = line.split(':').collect();
+            if parts.len() == 2 {
+                descript_dict.insert(parts[0].trim().to_string(), parts[1].trim().to_string());
+            }
+        }
+    }
+
+    // Construct the modulestring
+    let version = descript_dict
+        .get("Version")
+        .ok_or("Version key not found in DESCRIPTION file")?;
+
+    let modulestring = format!("MIRA-NF-v{version} {platform}-{virus} {irma_config}");
+
+    let analysis_metadata = Metadata {
+        module: modulestring,
+        runid: input_runid.to_owned(),
+        instrument: platform.to_owned(),
+    };
+    Ok(analysis_metadata)
+}
+
 /// Combine all df to create IRMA summary
 pub fn create_prelim_irma_summary_df(
     sample_list: &Vec<String>,
@@ -798,6 +846,7 @@ pub fn create_prelim_irma_summary_df(
     alleles_df: &Vec<AllelesData>,
     indels_df: &Vec<IndelsData>,
     subtype_df: &Vec<Subtype>,
+    metadata: Metadata,
 ) -> Result<Vec<IRMASummary>, Box<dyn Error>> {
     let mut irma_summary: Vec<IRMASummary> = Vec::new();
     let allele_count_data = count_minority_alleles(alleles_df);
@@ -823,9 +872,9 @@ pub fn create_prelim_irma_summary_df(
                     spike_median_coverage: None,
                     pass_fail_reason: None,
                     subtype: None,
-                    mira_module: None,
-                    runid: None,
-                    instrument: None,
+                    mira_module: Some(metadata.module.to_owned()),
+                    runid: Some(metadata.runid.to_owned()),
+                    instrument: Some(metadata.instrument.to_owned()),
                 });
             }
         }