Skip to content

Commit c81ef18

Browse files
committed
Testing usig checkqc illumina parser for bclconvert
1 parent 18e7feb commit c81ef18

26 files changed

+703
-39
lines changed

.github/workflows/unit_tests.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ jobs:
1313
with:
1414
python-version: '3.13.1'
1515

16+
# checkQC currently only accepts python version between 3.10 and 3.11 but
17+
# projman uses 3.13 thuis this is temporarily fixed here by cloning it
1618
- name: Install dependencies
1719
run: |
1820
python3 -m pip install --upgrade pip
1921
pip install -e . -r requirements_dev.txt
22+
git clone https://github.com/Molmed/checkQC.git
23+
2024
2125
- name: Launch tests
2226
run: |

projman_filler/app.py

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from projman_filler.repositories.sample_results_repo import SampleResultRepo
1616
from projman_filler.repositories.flowcell_lane_results_repo import FlowcellLaneResultsRepo
1717
from projman_filler.repositories.flowcell_runfolder_repo import FlowcellRunfolderRepo
18-
18+
from checkQC.qc_data import QCData
1919

2020
class App(object):
2121

@@ -59,39 +59,61 @@ def insert_flowcell_runfolder_into_db(self, runfolder, flowcell_name):
5959
run_date=runfolder_date)
6060
self.flowcell_runfolder_repo.add(flowcell_runfolder)
6161

62-
def insert_runfolder_into_db(self, runfolder, bcl2fastq_stats_dir, force=False, atac_seq_mode=False, olink_mode=False):
63-
if olink_mode:
64-
print("Olink mode activated. Will read lane-level statistics from InterOp files instead of bcl2fastq Stats.json.")
65-
return self.insert_olink_runfolder_into_db(runfolder, force)
66-
67-
bcl2fastq_stats = Bcl2fastqRunStatsParser(os.path.join(runfolder, bcl2fastq_stats_dir))
68-
flowcell_name = bcl2fastq_stats.get_flowcell_name()
69-
reads_and_cycles = bcl2fastq_stats.get_reads_and_cycles()
70-
conversion_results = bcl2fastq_stats.get_conversion_results()
71-
72-
# Check if flowcell exists and should be overriden
73-
self.delete_existing_flowcell_from_db(flowcell_name, force)
74-
75-
# For atac-seq we run bcl2fastq with special parameters declaring
76-
# that the second index should be interpreted as a non-index read.
77-
# So we allow overriding the Interop list of non-index-reads with
78-
# a custom list obtained from bcl2fastq stats. /ML 2021-09
79-
non_index_reads = None
80-
if atac_seq_mode:
81-
print("ATAC-seq mode activated. Will re-map read numbers according to settings used by bcl2fastq.")
82-
non_index_reads = bcl2fastq_stats.get_non_index_reads()
83-
84-
interop = InteropRunStatsParser(runfolder, non_index_reads)
85-
lane_stats = calculate_lane_statistics(interop, flowcell_name, conversion_results)
86-
self.flowcell_lane_results_repo.add(list(lane_stats))
87-
88-
samplesheet_file = os.path.join(runfolder, "SampleSheet.csv")
89-
samplesheet = Samplesheet(samplesheet_file)
90-
91-
sample_stats = calculate_sample_statistics(flowcell_name, conversion_results, reads_and_cycles, samplesheet)
92-
self.sample_results_repo.add(list(sample_stats))
93-
94-
self.insert_flowcell_runfolder_into_db(runfolder, flowcell_name)
62+
def insert_runfolder_into_db(self, runfolder, bcl2fastq_stats_dir, demultiplexer, force=False, atac_seq_mode=False, olink_mode=False):
63+
interop = InteropRunStatsParser(runfolder)
64+
if demultiplexer == "bclconvert":
65+
66+
qc_data_constructor = getattr(QCData, f"from_{demultiplexer}")
67+
qc_data = qc_data_constructor(
68+
runfolder_path=runfolder,
69+
parser_config={
70+
"reports_location": "Reports"
71+
}
72+
)
73+
flowcell_lane_results, sample_results = \
74+
interop.get_checkqc_interop_stats(qc_data, runfolder)
75+
76+
flowcell_name = interop.get_flowcell_name()
77+
# Check if flowcell exists and should be overriden
78+
self.delete_existing_flowcell_from_db(flowcell_name, force)
79+
80+
self.flowcell_lane_results_repo.add(flowcell_lane_results)
81+
self.sample_results_repo.add(sample_results)
82+
83+
self.insert_flowcell_runfolder_into_db(runfolder, flowcell_name)
84+
85+
else:
86+
if olink_mode:
87+
print("Olink mode activated. Will read lane-level statistics from InterOp files instead of bcl2fastq Stats.json.")
88+
return self.insert_olink_runfolder_into_db(runfolder, force)
89+
90+
bcl2fastq_stats = Bcl2fastqRunStatsParser(os.path.join(runfolder, bcl2fastq_stats_dir))
91+
flowcell_name = bcl2fastq_stats.get_flowcell_name()
92+
reads_and_cycles = bcl2fastq_stats.get_reads_and_cycles()
93+
conversion_results = bcl2fastq_stats.get_conversion_results()
94+
95+
# Check if flowcell exists and should be overriden
96+
self.delete_existing_flowcell_from_db(flowcell_name, force)
97+
98+
# For atac-seq we run bcl2fastq with special parameters declaring
99+
# that the second index should be interpreted as a non-index read.
100+
# So we allow overriding the Interop list of non-index-reads with
101+
# a custom list obtained from bcl2fastq stats. /ML 2021-09
102+
non_index_reads = None
103+
if atac_seq_mode:
104+
print("ATAC-seq mode activated. Will re-map read numbers according to settings used by bcl2fastq.")
105+
non_index_reads = bcl2fastq_stats.get_non_index_reads()
106+
107+
lane_stats = calculate_lane_statistics(interop, flowcell_name, conversion_results)
108+
self.flowcell_lane_results_repo.add(list(lane_stats))
109+
110+
samplesheet_file = os.path.join(runfolder, "SampleSheet.csv")
111+
samplesheet = Samplesheet(samplesheet_file)
112+
113+
sample_stats = calculate_sample_statistics(flowcell_name, conversion_results, reads_and_cycles, samplesheet)
114+
self.sample_results_repo.add(list(sample_stats))
115+
116+
self.insert_flowcell_runfolder_into_db(runfolder, flowcell_name)
95117

96118

97119
def insert_olink_runfolder_into_db(self, runfolder, force=False):

projman_filler/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
@click.option('--atac-seq-mode', is_flag=True)
2020
@click.option('--olink-mode', is_flag=True)
2121
@click.option('-b', '--bcl2fastq-stats', default="Unaligned/Stats", type=click.Path())
22+
@click.option('-d', '--demultiplexer', default ="bcl2fastq", type=click.Choice(['bcl2fastq', 'bclconvert']))
2223
@click.argument('runfolder', type=click.Path())
23-
def main(runfolder, force, atac_seq_mode, olink_mode, bcl2fastq_stats, debug):
24+
def main(runfolder, force, atac_seq_mode, olink_mode, bcl2fastq_stats, demultiplexer, debug):
2425
"""Console script for projman_filler."""
2526
print("projman_filler v{}".format(projman_filler_version))
2627
try:
2728
db_connection_string = os.environ["PROJMAN_DB"]
2829
app = App(db_connection_string, debug)
29-
app.insert_runfolder_into_db(runfolder, bcl2fastq_stats, force=force, atac_seq_mode=atac_seq_mode, olink_mode=olink_mode)
30+
app.insert_runfolder_into_db(runfolder, bcl2fastq_stats, demultiplexer, force=force, atac_seq_mode=atac_seq_mode, olink_mode=olink_mode)
3031
except FlowcellAlreadyInDb:
3132
print("Flowcell was already present in db.")
3233
sys.exit(1)

projman_filler/interop_run_stats_parser.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from math import isnan
55
from projman_filler.run_stats_parser_interface import RunStatsParserInterface
66
from projman_filler.lane import Lane
7+
from projman_filler.models.db_models import FlowcellLaneResult, SampleResult
78

89

910
class InteropRunStatsParser(RunStatsParserInterface):
@@ -128,5 +129,67 @@ def _get_conversion_results(self) -> list:
128129

129130
lanes.append(Lane(l, total_clusters_raw, total_clusters_pf))
130131
return lanes
131-
132-
132+
133+
def get_checkqc_interop_stats(self, qc_data, runfolder):
134+
"""
135+
Gets run stats from checkqc illumina parser and the rest from iterop
136+
137+
Params:
138+
:qc_data (acheckQC QCData object): results from checkQC illumina parser
139+
for specified demultiplexer
140+
:runfolder (str): Runfolder path string
141+
142+
Returns:
143+
: tuple of flowcell_lane_result, sample_results to be added to respective DB
144+
"""
145+
from checkQC.parsers.illumina import _read_interop_summary
146+
flowcell_lane_results = []
147+
sample_results = []
148+
149+
run_summary, index_summary = _read_interop_summary(runfolder)
150+
flowcell_id = self.get_flowcell_name() #self._run_info.flowcell_id()
151+
152+
for lane_no, lane_dict in qc_data.sequencing_metrics.items():
153+
for read_no, read_dict in lane_dict["reads"].items():
154+
if read_dict["is_index"]:
155+
continue
156+
interop_lane = run_summary.at(read_no-1).at(lane_no-1)
157+
cycles = run_summary.at(read_no-1).read().total_cycles()
158+
error_rate = run_summary.at(0).at(lane_no-1).error_rate().mean() # or interop_lane.error_rate().mean() ?,
159+
160+
flowcell_lane_results.append(
161+
FlowcellLaneResult(
162+
flowcell_id=flowcell_id,
163+
lane_num=lane_no,
164+
read_num=read_no,
165+
raw_density=interop_lane.density().mean(),
166+
pf_density=interop_lane.density_pf().mean(),
167+
error_rate= None if isnan(error_rate) else error_rate,
168+
raw_clusters=run_summary.at(0).at(lane_no-1).cluster_count().mean(), #Not sure if this should be cluster_count,
169+
pf_clusters= lane_dict["total_cluster_pf"],
170+
cycles=cycles,
171+
pct_q30=float((interop_lane.percent_gt_q30()) / 100),
172+
)
173+
)
174+
175+
sample_reads = index_summary.at(lane_no-1).at(read_no-1)
176+
lane_summary = run_summary.at(read_no-1).at(lane_no-1)
177+
sample_results.append(
178+
SampleResult(
179+
flowcell_id=flowcell_id,
180+
project_id=sample_reads.project_name(),
181+
sample_name=sample_reads.project_name(),
182+
tag_seq=f"{sample_reads.index1()}-{sample_reads.index2()}",
183+
lane_num=lane_no,
184+
read_num=read_no,
185+
cycles=cycles,
186+
pct_lane="",
187+
pf_clusters=sample_reads.cluster_count(),
188+
pct_q30=read_dict["percent_q30"],
189+
pct_tag_err="", # (float(mismatch_counts) / float(number_of_reads))*100
190+
library_name="",
191+
mean_q=None,
192+
)
193+
)
194+
195+
return flowcell_lane_results, sample_results

setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@
1616
'SQLAlchemy~=2.0.40',
1717
'pymssql~=2.3.4',
1818
'pandas~=2.2.3',
19-
'numpy~=2.2.4'
19+
'numpy~=2.2.4',
20+
# checkQC must be installed manually
21+
# git clone https://github.com/Molmed/checkQC.git
22+
# cd checkQC
23+
# sed -i 's/python_requires=.*$/python_requires=">=3.10",/' setup.py
24+
# sed -i 's/^interop~=.*$/interop==1.4.0/' requirements/prod
25+
# pip install --no-deps -r requirements/dev .
26+
2027
]
2128

2229
setup_requirements = [
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)