1111from projman_filler .models .db_models import FlowcellRunfolder
1212from projman_filler .bcl2fastq_run_stats_parser import Bcl2fastqRunStatsParser
1313from projman_filler .interop_run_stats_parser import InteropRunStatsParser
14+ from projman_filler .qc_data_parser import QCDataParser
1415
1516from projman_filler .repositories .sample_results_repo import SampleResultRepo
1617from projman_filler .repositories .flowcell_lane_results_repo import FlowcellLaneResultsRepo
1718from projman_filler .repositories .flowcell_runfolder_repo import FlowcellRunfolderRepo
18-
19+ from checkQC .qc_data import QCData
20+ from checkQC .config import ConfigFactory
1921
2022class App (object ):
2123
@@ -43,8 +45,8 @@ def delete_existing_flowcell_from_db(self, flowcell_name, force):
4345 return
4446
4547 if force :
46- print ("Found the specified runfolder in the db, but got a force option, so will proceed to "
47- " delete it and insert new values." )
48+ print ("Found the specified runfolder in the db, but got a force "
49+ "option, so will proceed to delete it and insert new values." )
4850 self .flowcell_lane_results_repo .delete_by_flowcell_name (flowcell_name )
4951 self .flowcell_runfolder_repo .delete_by_flowcell_name (flowcell_name )
5052 self .sample_results_repo .delete_by_flowcell_name (flowcell_name )
@@ -59,50 +61,131 @@ def insert_flowcell_runfolder_into_db(self, runfolder, flowcell_name):
5961 run_date = runfolder_date )
6062 self .flowcell_runfolder_repo .add (flowcell_runfolder )
6163
62- def insert_runfolder_into_db (self , runfolder , bcl2fastq_stats_dir , force = False , atac_seq_mode = False , olink_mode = False ):
63- if olink_mode :
64- print ("Olink mode activated. Will read lane-level statistics from InterOp files instead of bcl2fastq Stats.json." )
64+ def insert_runfolder_into_db (self , runfolder , bcl2fastq_stats_dir ,
65+ demultiplexer , ch_config_path , force = False ,
66+ atac_seq_mode = False , olink_mode = False ):
67+ """
68+ Inserts runfolder data into the specific database based on the specified
69+ demultiplexer and mode.
70+
71+ :param runfolder (str): Path to the runfolder directory.
72+ :param bcl2fastq_stats_dir (str): Subdirectory containing bcl2fastq statistics.
73+ :param demultiplexer (str): Demultiplexer used (e.g 'bcl2fastq' or 'bclconvert').
74+ :param ch_config_path (Path): Path to the checkQC configuration file
75+ :param force (bool, optional): If True, existing flowcell data will be overwritten.
76+ :param atac_seq_mode (bool, optional): If True, enables ATAC-seq specific processing.
77+ :param olink_mode (bool, optional): If True, enables Olink-specific processing.
78+ """
79+ flowcell_name = None
80+
81+ if demultiplexer == "bcl2fastq" :
82+ flowcell_name = self ._handle_bcl2fastq (
83+ runfolder , bcl2fastq_stats_dir , force , atac_seq_mode
84+ )
85+ elif olink_mode :
86+ print ("Olink mode activated. Will read lane-level statistics from "
87+ "InterOp files instead of bcl2fastq Stats.json." )
6588 return self .insert_olink_runfolder_into_db (runfolder , force )
89+ else :
90+ flowcell_name = self ._handle_other_demultiplexer (
91+ runfolder , demultiplexer , ch_config_path , force
92+ )
93+
94+ self .insert_flowcell_runfolder_into_db (runfolder , flowcell_name )
95+
96+ def _handle_bcl2fastq (self , runfolder , stats_dir , force , atac_seq_mode ):
97+ """
98+ Handles runfolder processing for the bcl2fastq demultiplexer.
99+
100+ :param runfolder (str): Path to the runfolder directory.
101+ :param stats_dir (str): Subdirectory containing bcl2fastq statistics.
102+ :param force (bool): If True, existing flowcell data will be overwritten.
103+ :param atac_seq_mode (bool): If True, enables ATAC-seq specific processing.
104+
105+ :return flowcell_name (str): The flowcell name extracted from the bcl2fastq
106+ statistics (after saving flowcel_lane and sample data in DB).
107+ """
108+ stats_path = os .path .join (runfolder , stats_dir )
109+ bcl2fastq_stats = Bcl2fastqRunStatsParser (stats_path )
66110
67- bcl2fastq_stats = Bcl2fastqRunStatsParser (os .path .join (runfolder , bcl2fastq_stats_dir ))
68111 flowcell_name = bcl2fastq_stats .get_flowcell_name ()
69112 reads_and_cycles = bcl2fastq_stats .get_reads_and_cycles ()
70113 conversion_results = bcl2fastq_stats .get_conversion_results ()
71114
72- # Check if flowcell exists and should be overriden
73115 self .delete_existing_flowcell_from_db (flowcell_name , force )
74116
75- # For atac-seq we run bcl2fastq with special parameters declaring
76- # that the second index should be interpreted as a non-index read.
77- # So we allow overriding the Interop list of non-index-reads with
78- # a custom list obtained from bcl2fastq stats. /ML 2021-09
79117 non_index_reads = None
80118 if atac_seq_mode :
81- print ("ATAC-seq mode activated. Will re-map read numbers according to settings used by bcl2fastq." )
119+ print ("ATAC-seq mode activated. Will re-map read numbers according "
120+ "to settings used by bcl2fastq." )
82121 non_index_reads = bcl2fastq_stats .get_non_index_reads ()
83-
122+
84123 interop = InteropRunStatsParser (runfolder , non_index_reads )
85- lane_stats = calculate_lane_statistics (interop , flowcell_name , conversion_results )
124+ lane_stats = calculate_lane_statistics (
125+ interop , flowcell_name , conversion_results
126+ )
86127 self .flowcell_lane_results_repo .add (list (lane_stats ))
87128
88- samplesheet_file = os .path .join (runfolder , "SampleSheet.csv" )
89- samplesheet = Samplesheet ( samplesheet_file )
90-
91- sample_stats = calculate_sample_statistics ( flowcell_name , conversion_results , reads_and_cycles , samplesheet )
129+ samplesheet = Samplesheet ( os .path .join (runfolder , "SampleSheet.csv" ) )
130+ sample_stats = calculate_sample_statistics (
131+ flowcell_name , conversion_results , reads_and_cycles , samplesheet
132+ )
92133 self .sample_results_repo .add (list (sample_stats ))
93134
94- self .insert_flowcell_runfolder_into_db (runfolder , flowcell_name )
95-
135+ return flowcell_name
96136
97137 def insert_olink_runfolder_into_db (self , runfolder , force = False ):
138+ """
139+ Inserts runfolder data into the database using Olink-specific processing.
140+
141+ :param runfolder (str): Path to the runfolder directory.
142+ :param force (bool, optional): If True, existing flowcell data will be
143+ overwritten. Defaults to False.
144+ """
98145 interop = InteropRunStatsParser (runfolder )
99146 flowcell_name = interop .get_flowcell_name ()
100147
101148 # Check if flowcell exists and should be overriden
102149 self .delete_existing_flowcell_from_db (flowcell_name , force )
103150
104151 conversion_results = interop .get_conversion_results ()
105- lane_stats = calculate_lane_statistics (interop , flowcell_name , conversion_results )
152+ lane_stats = calculate_lane_statistics (
153+ interop , flowcell_name , conversion_results
154+ )
106155
107156 self .flowcell_lane_results_repo .add (list (lane_stats ))
108157 self .insert_flowcell_runfolder_into_db (runfolder , flowcell_name )
158+
159+ def _handle_other_demultiplexer (self , runfolder , demultiplexer , ch_config_path ,
160+ force ):
161+ """
162+ Handles runfolder processing for demultiplexers other than bcl2fastq.
163+
164+ :param runfolder (str): Path to the runfolder directory.
165+ :param demultiplexer (str): Demultiplexer used (e.g., 'bclconvert').
166+ :param ch_config_path (Path): Path to the checkQC configuration file
167+ :param force (bool): If True, existing flowcell data will be overwritten.
168+
169+ :return flowcell_name (str): The flowcell name extracted from the runfolder
170+ (after saving flowcel_lane and sample data in DB).
171+ """
172+
173+ checkqc_conf = ConfigFactory .from_config_path (ch_config_path )._config
174+ qc_data = getattr (QCData , f"from_{ demultiplexer } " )(
175+ runfolder_path = runfolder ,
176+ parser_config = (
177+ checkqc_conf
178+ .get ("parser_configurations" , {})
179+ .get (f"from_{ demultiplexer } " , {})
180+ )
181+ )
182+ qc_data_parser = QCDataParser (qc_data , runfolder )
183+ flowcell_name = qc_data_parser .flowcell_id
184+ self .delete_existing_flowcell_from_db (flowcell_name , force )
185+
186+ lane_results = qc_data_parser ._build_lane_results ()
187+ sample_results = qc_data_parser ._build_sample_results ()
188+ self .flowcell_lane_results_repo .add (lane_results )
189+ self .sample_results_repo .add (sample_results )
190+
191+ return flowcell_name
0 commit comments