Skip to content

Commit 87bb8d6

Browse files
mandysullinbx0
authored andcommitted
working sample name back into df
1 parent ddc4f3f commit 87bb8d6

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ plotly = { git = "https://github.com/plotly/plotly.rs.git", branch = "main" }
1717
polars = { version = "0.33", default-features = false, features = [
1818
"csv",
1919
"lazy",
20+
"fmt",
21+
"fmt_no_tty",
2022
] }
2123

2224
zoe = { version = "0.0.19", default-features = false, features = [

src/processes/prepare_mira_reports.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ pub fn prepare_mira_reports_process(args: ReportsArgs) -> Result<(), Box<dyn Err
121121
*/
122122
println!("{samplesheet:?}");
123123
//println!("{:?}", qc_config);
124-
//println!("{:?}", cov_df);
124+
println!("{:?}", cov_df);
125125
println!("{reads_df:?}");
126126
println!("{vtype_df:?}");
127127

src/utils/dataframes.rs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@ pub fn read_csv_to_dataframe(file_path: &PathBuf) -> Result<DataFrame, Box<dyn E
1414
Ok(df)
1515
}
1616

17+
/// Extract the sample name from the file path
18+
fn extract_sample_name(path: &PathBuf) -> Result<String, Box<dyn Error>> {
19+
let parent_dir = path.parent().and_then(|p| p.parent());
20+
if let Some(parent_dir) = parent_dir {
21+
let sample = parent_dir
22+
.file_name()
23+
.unwrap()
24+
.to_string_lossy()
25+
.to_string();
26+
Ok(sample)
27+
} else {
28+
Err("Failed to extract sample name from path.".into())
29+
}
30+
}
31+
1732
///Read in the coverage files made by irma and convert to df
1833
pub fn coverage_df(irma_path: impl AsRef<Path>) -> Result<DataFrame, Box<dyn Error>> {
1934
// Define the pattern to match text files
@@ -29,14 +44,18 @@ pub fn coverage_df(irma_path: impl AsRef<Path>) -> Result<DataFrame, Box<dyn Err
2944
for entry in glob(&pattern).expect("Failed to read glob pattern") {
3045
match entry {
3146
Ok(path) => {
32-
// Read the CSV file into a DataFrame
47+
let sample = extract_sample_name(&path)?;
3348
let file_path = path.to_str().unwrap();
3449

35-
let df = CsvReader::from_path(file_path)?
50+
let mut df = CsvReader::from_path(file_path)?
3651
.has_header(true)
3752
.with_delimiter(b'\t')
3853
.finish()?;
3954

55+
// Add the "Sample" column to the DataFrame
56+
let sample_series = Series::new("Sample", vec![sample; df.height()]);
57+
df = df.hstack(&[sample_series])?;
58+
4059
// Combine the DataFrame with the existing one
4160
combined_cov_df = match combined_cov_df {
4261
Some(existing_df) => Some(existing_df.vstack(&df)?),
@@ -70,14 +89,18 @@ pub fn readcount_df(irma_path: impl AsRef<Path>) -> Result<DataFrame, Box<dyn Er
7089
for entry in glob(&pattern).expect("Failed to read glob pattern") {
7190
match entry {
7291
Ok(path) => {
73-
// Read the CSV file into a DataFrame
92+
let sample = extract_sample_name(&path)?;
7493
let file_path = path.to_str().unwrap();
7594

76-
let df = CsvReader::from_path(file_path)?
95+
let mut df = CsvReader::from_path(file_path)?
7796
.has_header(true)
7897
.with_delimiter(b'\t')
7998
.finish()?;
8099

100+
// Add the "Sample" column to the DataFrame
101+
let sample_series = Series::new("Sample", vec![sample; df.height()]);
102+
df = df.hstack(&[sample_series])?;
103+
81104
// Combine the DataFrame with the existing one
82105
combined_reads_df = match combined_reads_df {
83106
Some(existing_df) => Some(existing_df.vstack(&df)?),
@@ -122,7 +145,7 @@ pub fn read_record2type(record: &str) -> Vec<String> {
122145

123146
/// Processes the DataFrame to extract sample types based on the `Record` column.
124147
pub fn dash_irma_sample_type(reads_df: &DataFrame) -> Result<DataFrame, PolarsError> {
125-
println!("{reads_df:?}");
148+
//println!("{reads_df:?}");
126149

127150
// Filter rows where the first character of the 'Record' column is '4'
128151
let mask = reads_df
@@ -132,6 +155,7 @@ pub fn dash_irma_sample_type(reads_df: &DataFrame) -> Result<DataFrame, PolarsEr
132155
.map(|record| record.map(|r| r.starts_with('4')))
133156
.collect::<ChunkedArray<BooleanType>>();
134157
let type_df = reads_df.filter(&mask)?;
158+
// Filter the DataFrame where "Records" column contains '4' anywhere in the string
135159

136160
// Create new columns: 'vtype', 'ref_type', 'subtype'
137161
let new_cols = ["vtype", "ref_type", "subtype"];
@@ -167,7 +191,7 @@ pub fn dash_irma_sample_type(reads_df: &DataFrame) -> Result<DataFrame, PolarsEr
167191
new_columns.push(Series::new("Reference", reference_col));
168192

169193
// Create a new DataFrame with the selected columns
170-
let new_df = DataFrame::new(new_columns)?;
194+
let mut new_df = DataFrame::new(new_columns)?;
171195
//new_df = new_df.select(&["Sample", "vtype", "ref_type", "subtype"])?;
172196
Ok(new_df)
173197
}

0 commit comments

Comments
 (0)