Skip to content

Commit aa71572

Browse files
committed
adding metadata into irma_sumarry
1 parent 3008f94 commit aa71572

File tree

2 files changed

+73
-8
lines changed

2 files changed

+73
-8
lines changed

src/processes/prepare_mira_reports.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ pub struct ReportsArgs {
4444
/// The file path to the working directory
4545
workdir_path: PathBuf,
4646

47-
#[arg(short = 'c', long, default_value = "default config")]
47+
#[arg(short = 'c', long, default_value = "default-config")]
4848
/// the irma config used for IRMA
49-
irma_config: Option<String>,
49+
irma_config: String,
5050
}
5151

5252
#[derive(Serialize, Deserialize, Clone, Debug)]
@@ -59,8 +59,11 @@ pub struct SamplesheetI {
5959

6060
#[derive(Serialize, Deserialize, Clone, Debug)]
6161
pub struct SamplesheetO {
62+
#[serde(rename = "Barcode #")]
6263
pub barcode: String,
64+
#[serde(rename = "Sample ID")]
6365
pub sample_id: String,
66+
#[serde(rename = "Sample Type")]
6467
pub sample_type: Option<String>,
6568
}
6669

@@ -130,7 +133,8 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
130133
Samplesheet::Illumina(ref sheet) => collect_sample_id(sheet),
131134
Samplesheet::ONT(ref sheet) => collect_sample_id(sheet),
132135
};
133-
println!("samples: {sample_list:?}");
136+
137+
//println!("samples: {sample_list:?}");
134138
// Get the negative controls from the samplesheet
135139
let neg_control_list = match samplesheet {
136140
Samplesheet::Illumina(ref sheet) => collect_negatives(sheet),
@@ -148,6 +152,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
148152
let vtype_data = create_vtype_data(&read_data);
149153
let allele_data = allele_data_collection(&args.irma_path)?;
150154
let indel_data = indels_data_collection(&args.irma_path)?;
155+
151156
let seq_data = amended_consensus_data_collection(&args.irma_path, &args.virus);
152157
let ref_lengths = match get_reference_lens(&args.irma_path) {
153158
Ok(data) => data,
@@ -226,6 +231,16 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
226231
} else if args.virus.to_lowercase() == "sc2-wgs" || args.virus.to_lowercase() == "sc2-spike" {
227232
subtype_data = extract_subtype_sc2(&dais_vars_data)?;
228233
}
234+
235+
//Gather Anlysis Metadata for irma_summary
236+
let analysis_metadata = collect_analysis_metadata(
237+
&args.workdir_path,
238+
&args.platform,
239+
&args.virus,
240+
&args.irma_config,
241+
&args.runid,
242+
)?;
243+
229244
//Build prelim irma summary "dataframe"
230245
//More will be added and analyzed before final irma summary created
231246
let irma_summary = create_prelim_irma_summary_df(
@@ -235,6 +250,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
235250
&allele_data,
236251
&indel_data,
237252
&subtype_data,
253+
analysis_metadata,
238254
)?;
239255

240256
//todo:remove before end

src/utils/data_processing.rs

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ use serde::{self, Deserialize, Serialize};
22
use std::{
33
collections::{HashMap, HashSet},
44
error::Error,
5+
fs::File,
6+
io::{self, BufRead},
7+
path::PathBuf,
58
};
69

710
use crate::processes::prepare_mira_reports::SamplesheetI;
@@ -28,6 +31,14 @@ pub struct Subtype {
2831
pub subtype: String,
2932
}
3033

34+
/// Analysis Metadata
35+
#[derive(Debug)]
36+
pub struct Metadata {
37+
pub module: String,
38+
pub runid: String,
39+
pub instrument: String,
40+
}
41+
3142
//Melted Reads df
3243
#[derive(Debug)]
3344
pub struct MeltedRecord {
@@ -202,7 +213,6 @@ where
202213

203214
// Skip the first element (header) and iterate over the rest
204215
for entry in samples.iter() {
205-
println!("{}", entry.sample_id());
206216
sample_list.push(entry.sample_id().clone());
207217
}
208218

@@ -530,7 +540,6 @@ pub fn extract_subtype_sc2(dais_vars: &Vec<DaisVarsData>) -> Result<Vec<Subtype>
530540
let mut subtype_data: Vec<Subtype> = Vec::new();
531541

532542
for entry in dais_vars {
533-
println!("{}", entry.reference_id);
534543
subtype_data.push(Subtype {
535544
sample_id: entry.sample_id.clone(),
536545
subtype: entry.reference_id.clone(),
@@ -790,6 +799,45 @@ pub fn count_minority_indels(data: &Vec<IndelsData>) -> Vec<VariantCountData> {
790799
result
791800
}
792801

802+
pub fn collect_analysis_metadata(
803+
work_path: &PathBuf,
804+
platform: &str,
805+
virus: &str,
806+
irma_config: &String,
807+
input_runid: &str,
808+
) -> Result<Metadata, Box<dyn Error>> {
809+
let mut descript_dict = HashMap::new();
810+
let description_path = format!("{}/DESCRIPTION", work_path.display());
811+
812+
// Open the file for reading
813+
let file = File::open(&description_path)?;
814+
let reader = io::BufReader::new(file);
815+
816+
// Read the file line by line
817+
for line in reader.lines() {
818+
if let Ok(line) = line {
819+
let parts: Vec<&str> = line.split(':').collect();
820+
if parts.len() == 2 {
821+
descript_dict.insert(parts[0].trim().to_string(), parts[1].trim().to_string());
822+
}
823+
}
824+
}
825+
826+
// Construct the modulestring
827+
let version = descript_dict
828+
.get("Version")
829+
.ok_or("Version key not found in DESCRIPTION file")?;
830+
831+
let modulestring = format!("MIRA-NF-v{version} {platform}-{virus} {irma_config}");
832+
833+
let analysis_metadata = Metadata {
834+
module: modulestring,
835+
runid: input_runid.to_owned(),
836+
instrument: platform.to_owned(),
837+
};
838+
Ok(analysis_metadata)
839+
}
840+
793841
/// Combine all df to create IRMA summary
794842
pub fn create_prelim_irma_summary_df(
795843
sample_list: &Vec<String>,
@@ -798,6 +846,7 @@ pub fn create_prelim_irma_summary_df(
798846
alleles_df: &Vec<AllelesData>,
799847
indels_df: &Vec<IndelsData>,
800848
subtype_df: &Vec<Subtype>,
849+
metadata: Metadata,
801850
) -> Result<Vec<IRMASummary>, Box<dyn Error>> {
802851
let mut irma_summary: Vec<IRMASummary> = Vec::new();
803852
let allele_count_data = count_minority_alleles(alleles_df);
@@ -823,9 +872,9 @@ pub fn create_prelim_irma_summary_df(
823872
spike_median_coverage: None,
824873
pass_fail_reason: None,
825874
subtype: None,
826-
mira_module: None,
827-
runid: None,
828-
instrument: None,
875+
mira_module: Some(metadata.module.to_owned()),
876+
runid: Some(metadata.runid.to_owned()),
877+
instrument: Some(metadata.instrument.to_owned()),
829878
});
830879
}
831880
}

0 commit comments

Comments
 (0)