@@ -14,6 +14,21 @@ pub fn read_csv_to_dataframe(file_path: &PathBuf) -> Result<DataFrame, Box<dyn E
1414 Ok ( df)
1515}
1616
17+ /// Extract the sample name from the file path
18+ fn extract_sample_name ( path : & PathBuf ) -> Result < String , Box < dyn Error > > {
19+ let parent_dir = path. parent ( ) . and_then ( |p| p. parent ( ) ) ;
20+ if let Some ( parent_dir) = parent_dir {
21+ let sample = parent_dir
22+ . file_name ( )
23+ . unwrap ( )
24+ . to_string_lossy ( )
25+ . to_string ( ) ;
26+ Ok ( sample)
27+ } else {
28+ Err ( "Failed to extract sample name from path." . into ( ) )
29+ }
30+ }
31+
1732///Read in the coverage files made by irma and convert to df
1833pub fn coverage_df ( irma_path : impl AsRef < Path > ) -> Result < DataFrame , Box < dyn Error > > {
1934 // Define the pattern to match text files
@@ -29,14 +44,18 @@ pub fn coverage_df(irma_path: impl AsRef<Path>) -> Result<DataFrame, Box<dyn Err
2944 for entry in glob ( & pattern) . expect ( "Failed to read glob pattern" ) {
3045 match entry {
3146 Ok ( path) => {
32- // Read the CSV file into a DataFrame
47+ let sample = extract_sample_name ( & path ) ? ;
3348 let file_path = path. to_str ( ) . unwrap ( ) ;
3449
35- let df = CsvReader :: from_path ( file_path) ?
50+ let mut df = CsvReader :: from_path ( file_path) ?
3651 . has_header ( true )
3752 . with_delimiter ( b'\t' )
3853 . finish ( ) ?;
3954
55+ // Add the "Sample" column to the DataFrame
56+ let sample_series = Series :: new ( "Sample" , vec ! [ sample; df. height( ) ] ) ;
57+ df = df. hstack ( & [ sample_series] ) ?;
58+
4059 // Combine the DataFrame with the existing one
4160 combined_cov_df = match combined_cov_df {
4261 Some ( existing_df) => Some ( existing_df. vstack ( & df) ?) ,
@@ -70,14 +89,18 @@ pub fn readcount_df(irma_path: impl AsRef<Path>) -> Result<DataFrame, Box<dyn Er
7089 for entry in glob ( & pattern) . expect ( "Failed to read glob pattern" ) {
7190 match entry {
7291 Ok ( path) => {
73- // Read the CSV file into a DataFrame
92+ let sample = extract_sample_name ( & path ) ? ;
7493 let file_path = path. to_str ( ) . unwrap ( ) ;
7594
76- let df = CsvReader :: from_path ( file_path) ?
95+ let mut df = CsvReader :: from_path ( file_path) ?
7796 . has_header ( true )
7897 . with_delimiter ( b'\t' )
7998 . finish ( ) ?;
8099
100+ // Add the "Sample" column to the DataFrame
101+ let sample_series = Series :: new ( "Sample" , vec ! [ sample; df. height( ) ] ) ;
102+ df = df. hstack ( & [ sample_series] ) ?;
103+
81104 // Combine the DataFrame with the existing one
82105 combined_reads_df = match combined_reads_df {
83106 Some ( existing_df) => Some ( existing_df. vstack ( & df) ?) ,
@@ -122,7 +145,7 @@ pub fn read_record2type(record: &str) -> Vec<String> {
122145
123146/// Processes the DataFrame to extract sample types based on the `Record` column.
124147pub fn dash_irma_sample_type ( reads_df : & DataFrame ) -> Result < DataFrame , PolarsError > {
125- println ! ( "{reads_df:?}" ) ;
148+ // println!("{reads_df:?}");
126149
127150 // Filter rows where the first character of the 'Record' column is '4'
128151 let mask = reads_df
@@ -132,6 +155,7 @@ pub fn dash_irma_sample_type(reads_df: &DataFrame) -> Result<DataFrame, PolarsEr
132155 . map ( |record| record. map ( |r| r. starts_with ( '4' ) ) )
133156 . collect :: < ChunkedArray < BooleanType > > ( ) ;
134157 let type_df = reads_df. filter ( & mask) ?;
158+ // Filter the DataFrame where "Records" column contains '4' anywhere in the string
135159
136160 // Create new columns: 'vtype', 'ref_type', 'subtype'
137161 let new_cols = [ "vtype" , "ref_type" , "subtype" ] ;
@@ -167,7 +191,7 @@ pub fn dash_irma_sample_type(reads_df: &DataFrame) -> Result<DataFrame, PolarsEr
167191 new_columns. push ( Series :: new ( "Reference" , reference_col) ) ;
168192
169193 // Create a new DataFrame with the selected columns
170- let new_df = DataFrame :: new ( new_columns) ?;
194+ let mut new_df = DataFrame :: new ( new_columns) ?;
171195 //new_df = new_df.select(&["Sample", "vtype", "ref_type", "subtype"])?;
172196 Ok ( new_df)
173197}
0 commit comments