Skip to content

Commit 44dc783

Browse files
committed
read in dais-ribosome data to structs
1 parent 8aa305d commit 44dc783

File tree

2 files changed

+121
-1
lines changed

2 files changed

+121
-1
lines changed

src/processes/prepare_mira_reports.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
112112
let indel_data = indels_data_collection(&args.irma_path)?;
113113
let seq_data = amended_consensus_data_collection(&args.irma_path, "flu");
114114

115+
//Read in DAIS-ribosome data
116+
let dais_ins_data = dias_insertion_data_collection(&args.irma_path);
117+
let dais_del_data = dias_deletion_data_collection(&args.irma_path);
118+
let dais_seq_data = dias_sequence_data_collection(&args.irma_path);
119+
115120
//println!("{samplesheet:?}");
116121
//println!("{qc_config:?}");
117122
//println!("Coverage data: {coverage_data:?}");
@@ -120,6 +125,9 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
120125
//println!("Allele data: {allele_data:?}");
121126
//println!("Indel data: {indel_data:?}");
122127
//println!("Seq data: {seq_data:#?}");
128+
//println!("dais ins data: {dais_ins_data:#?}");
129+
//println!("dais del data: {dais_del_data:#?}");
130+
//println!("dais seq data: {dais_seq_data:#?}");
123131

124132
// Write the structs to JSON files and CSV files
125133
let reads_struct_values = vec![

src/utils/data_ingest.rs

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ pub fn indels_data_collection(
466466
Ok(reads_data)
467467
}
468468

469-
/// Read in IRMA amended consensus fasta files to FastaNT struct from zoe crate
469+
/// Read in IRMA amended consensus fasta files to SeqData struct
470470
pub fn amended_consensus_data_collection(
471471
irma_path: &PathBuf,
472472
organism: &str,
@@ -573,3 +573,115 @@ pub fn create_vtype_data(reads_data: &Vec<ReadsData>) -> Vec<ProcessedRecord> {
573573
}
574574

575575
/////////////// Data reading functions for DAIS-ribosome ///////////////
576+
/// Read tab-delimited data a withouot including sample name
577+
pub fn process_txt<R, T>(reader: R, has_headers: bool) -> Result<Vec<T>, Box<dyn std::error::Error>>
578+
where
579+
R: Read,
580+
T: for<'de> Deserialize<'de>,
581+
{
582+
let mut rdr = ReaderBuilder::new()
583+
.has_headers(has_headers)
584+
.delimiter(b'\t')
585+
.from_reader(reader);
586+
587+
let mut records: Vec<T> = Vec::new();
588+
for result in rdr.deserialize() {
589+
let record: T = result?;
590+
records.push(record);
591+
}
592+
593+
Ok(records)
594+
}
595+
596+
/// Read in dais-ribosome ins file fto InsertionData struct
597+
pub fn dias_insertion_data_collection(
598+
dais_path: &PathBuf,
599+
) -> Result<Vec<InsertionData>, Box<dyn std::error::Error>> {
600+
// Construct the glob pattern for matching files
601+
//If using * situation, you will have to use glob
602+
let pattern = format!(
603+
"{}/aggregate_outputs/dais-ribosome/*.ins",
604+
dais_path.to_string_lossy()
605+
);
606+
607+
let mut dais_ins_data: Vec<InsertionData> = Vec::new();
608+
609+
// Use the glob crate to find all matching files
610+
for entry in glob(&pattern)? {
611+
match entry {
612+
Ok(path) => {
613+
let file = File::open(&path)?;
614+
let reader = BufReader::new(file);
615+
let mut records: Vec<InsertionData> = process_txt(reader, false)?;
616+
dais_ins_data.append(&mut records);
617+
}
618+
Err(e) => {
619+
eprintln!("Error processing file: {e}");
620+
}
621+
}
622+
}
623+
624+
Ok(dais_ins_data)
625+
}
626+
627+
/// Read in dais-ribosome ins file fto DeletionsData struct
628+
pub fn dias_deletion_data_collection(
629+
dais_path: &PathBuf,
630+
) -> Result<Vec<DeletionsData>, Box<dyn std::error::Error>> {
631+
// Construct the glob pattern for matching files
632+
//If using * situation, you will have to use glob
633+
let pattern = format!(
634+
"{}/aggregate_outputs/dais-ribosome/*.del",
635+
dais_path.to_string_lossy()
636+
);
637+
638+
let mut dais_del_data: Vec<DeletionsData> = Vec::new();
639+
640+
// Use the glob crate to find all matching files
641+
for entry in glob(&pattern)? {
642+
match entry {
643+
Ok(path) => {
644+
let file = File::open(&path)?;
645+
let reader = BufReader::new(file);
646+
let mut records: Vec<DeletionsData> = process_txt(reader, false)?;
647+
dais_del_data.append(&mut records);
648+
}
649+
Err(e) => {
650+
eprintln!("Error processing file: {e}");
651+
}
652+
}
653+
}
654+
655+
Ok(dais_del_data)
656+
}
657+
658+
/// Read in dais-ribosome ins file fto DeletionsData struct
659+
pub fn dias_sequence_data_collection(
660+
dais_path: &PathBuf,
661+
) -> Result<Vec<DaisSeqData>, Box<dyn std::error::Error>> {
662+
// Construct the glob pattern for matching files
663+
//If using * situation, you will have to use glob
664+
let pattern = format!(
665+
"{}/aggregate_outputs/dais-ribosome/*.seq",
666+
dais_path.to_string_lossy()
667+
);
668+
669+
let mut dais_seq_data: Vec<DaisSeqData> = Vec::new();
670+
671+
// Use the glob crate to find all matching files
672+
for entry in glob(&pattern)? {
673+
match entry {
674+
Ok(path) => {
675+
let file = File::open(&path)?;
676+
let reader = BufReader::new(file);
677+
let mut records: Vec<DaisSeqData> = process_txt(reader, false)?;
678+
dais_seq_data.append(&mut records);
679+
}
680+
Err(e) => {
681+
eprintln!("Error processing file: {e}");
682+
}
683+
}
684+
}
685+
686+
Ok(dais_seq_data)
687+
}

0 commit comments

Comments
 (0)