Molmed · nkongenelly · Oct 28, 2025 · Aug 21, 2025 · Aug 27, 2025 · Sep 3, 2025
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -5,13 +5,17 @@ on: [push, pull_request]
 jobs:
   build:
     runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.11', '3.12','3.13']
+    name: Set up Python ${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v4
 
-    - name: Set up Python 3.10
+    - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: ${{ matrix.python-version }}
 
     - name: Install dependencies
       run: |

diff --git a/checkQC/parsers/illumina.py b/checkQC/parsers/illumina.py
@@ -13,23 +13,32 @@ def from_bclconvert(cls, runfolder_path, parser_config):
     assert runfolder_path.is_dir()
 
     summary, index_summary = _read_interop_summary(runfolder_path)
-    quality_metrics = _read_quality_metrics(
+    quality_metrics = _read_demultiplexing_metrics(
         runfolder_path
         / parser_config["reports_location"]
         / "Quality_Metrics.csv"
     )
-    top_unknown_barcodes = _read_top_unknown_barcodes(
+    top_unknown_barcodes = _read_demultiplexing_metrics(
         runfolder_path
         / parser_config["reports_location"]
         / "Top_Unknown_Barcodes.csv"
     )
+
+    demultiplex_stats = _read_demultiplexing_metrics(
+        runfolder_path
+        / parser_config["reports_location"]
+        / "Demultiplex_Stats.csv"
+    )
     samplesheet = _read_samplesheet(runfolder_path)
 
     instrument, read_length = _read_run_metadata(runfolder_path)
 
     sequencing_metrics = {
         lane + 1: {
-            "total_cluster_pf": summary.at(0).at(lane).reads_pf(),
+            "total_reads_pf": summary.at(0).at(lane).reads_pf(),
+            "total_reads": summary.at(0).at(lane).reads(),
+            "raw_density":summary.at(0).at(lane).density().mean(),
+            "pf_density":summary.at(0).at(lane).density_pf().mean(),
             "yield": sum(
                 int(row["Yield"])
                 for row in quality_metrics
@@ -69,6 +78,36 @@ def from_bclconvert(cls, runfolder_path, parser_config):
                         sample_summary := index_summary.at(lane).at(sample_no)
                     ).sample_id(),
                     "cluster_count": sample_summary.cluster_count(),
+                    "percent_of_lane": next(
+                        round(float(sample_stat["% Reads"]) * 100, 2)
+                        for sample_stat in demultiplex_stats
+                        if sample_stat["Lane"] == str(lane + 1) and
+                        sample_stat["SampleID"] == sample_summary.sample_id()
+                    ),
+                    "percent_perfect_index_reads": next(
+                        round(float(sample_stat["% Perfect Index Reads"]) * 100, 2)
+                        for sample_stat in demultiplex_stats
+                        if sample_stat["Lane"] == str(lane + 1) and
+                        sample_stat["SampleID"] == sample_summary.sample_id()
+                    ),
+                    "mean_q30": next(
+                        float(row["Mean Quality Score (PF)"])
+                        for row in quality_metrics
+                        if (
+                            row["Lane"] == str(lane + 1)
+                            and row["SampleID"] == sample_summary.sample_id()
+                        )
+                    ),
+                    "percent_q30": next(
+                        float(row["% Q30"]) * 100
+                        for row in quality_metrics
+                        if (
+                            row["Lane"] == str(lane + 1)
+                            and row["SampleID"] == sample_summary.sample_id()
+                        )
+                    )
+
+
                 }
                 for sample_no in range(index_summary.at(lane).size())
             ],
@@ -107,21 +146,13 @@ def _read_interop_summary(runfolder_path):
     return run_summary, index_summary
 
 
-def _read_quality_metrics(quality_metrics_path):
+def _read_demultiplexing_metrics(metrics_path):
     """
-    Read quality metrics file
+    Read demultiplexing metrics file
     """
-    with open(quality_metrics_path, encoding="utf-8") as csvfile:
+    with open(metrics_path, encoding="utf-8") as csvfile:
         return list(csv.DictReader(csvfile))
-
-
-def _read_top_unknown_barcodes(top_unknown_barcodes_path):
-    """
-    Read top unknown barcodes file
-    """
-    with open(top_unknown_barcodes_path, encoding="utf-8") as csvfile:
-        return list(csv.DictReader(csvfile))
-
+
 
 def _read_run_metadata(runfolder_path):
     """

diff --git a/checkQC/qc_checkers/cluster_pf.py b/checkQC/qc_checkers/cluster_pf.py
@@ -17,33 +17,33 @@ def cluster_pf(
     if warning_threshold != "unknown":
         warning_threshold = int(warning_threshold * 10**6)
 
-    def format_msg(total_cluster_pf, threshold, lane, **kwargs):
-        return f"Clusters PF {total_cluster_pf / 10**6}M < {threshold / 10**6}M on lane {lane}"
+    def format_msg(total_reads_pf, threshold, lane, **kwargs):
+        return f"Clusters PF {total_reads_pf / 10**6}M < {threshold / 10**6}M on lane {lane}"
 
-    def _qualify_error(total_cluster_pf, lane):
+    def _qualify_error(total_reads_pf, lane):
         data = {
             "lane": lane,
-            "total_cluster_pf": total_cluster_pf,
+            "total_reads_pf": total_reads_pf,
             "qc_checker": "cluster_pf",
         }
 
-        match total_cluster_pf:
-            case total_cluster_pf if (
+        match total_reads_pf:
+            case total_reads_pf if (
                     error_threshold != "unknown"
-                    and total_cluster_pf < error_threshold
+                    and total_reads_pf < error_threshold
                 ):
                     data["threshold"] = error_threshold
                     return QCErrorFatal(format_msg(**data), data=data)
-            case total_cluster_pf if (
+            case total_reads_pf if (
                     warning_threshold != "unknown"
-                    and total_cluster_pf < warning_threshold
+                    and total_reads_pf < warning_threshold
                 ):
                     data["threshold"] = warning_threshold
                     return QCErrorWarning(format_msg(**data), data=data)
 
     return [
         qc_report
         for lane, lane_data in qc_data.sequencing_metrics.items()
-        if (qc_report := _qualify_error(lane_data["total_cluster_pf"], lane))
+        if (qc_report := _qualify_error(lane_data["total_reads_pf"], lane))
     ]
 
diff --git a/checkQC/qc_checkers/unidentified_index.py b/checkQC/qc_checkers/unidentified_index.py
@@ -40,7 +40,7 @@ def unidentified_index(
     qc_errors = []
     for lane, lane_data in qc_data.sequencing_metrics.items():
         for barcode in lane_data["top_unknown_barcodes"]:
-            significance = barcode["count"] / lane_data["total_cluster_pf"] * 100.
+            significance = barcode["count"] / lane_data["total_reads_pf"] * 100.
             if significance < significance_threshold:
                 continue
             index = (

diff --git a/requirements/prod b/requirements/prod
@@ -1,10 +1,10 @@
 click~=8.1.1
 PyYAML~=6.0
-interop~=1.3.2
+interop~=1.4.0
 xmltodict~=0.13.0
 tornado~=6.3.2
 sample_sheet~=0.13.0
 pandas~=2.2.2
-numpy~=1.26.4
+numpy~=2.2.4
 samshee~=0.2.3
 jsonschema~=4.23.0
diff --git a/setup.py b/setup.py
@@ -11,14 +11,19 @@
     author_email='[email protected]',
     url="https://www.github.com/Molmed/checkQC",
     download_url='https://github.com/Molmed/checkQC/archive/{}.tar.gz'.format(__version__),
-    python_requires='>3.10, <3.11',
+    python_requires='>3.10',
     install_requires=[
-        "click",
-        "PyYAML>=6.0",
-        "interop>=1.2.4",
-        "xmltodict",
-        "tornado",
-        "sample_sheet"],
+        "click~=8.1.1",
+        "PyYAML~=6.0",
+        "interop~=1.4.0",
+       " xmltodict~=0.13.0",
+        "tornado~=6.3.2",
+       " sample_sheet~=0.13.0",
+       " pandas~=2.2.2",
+        "numpy~=2.2.4",
+        "samshee~=0.2.3",
+        "jsonschema~=4.23.0",
+    ],
     packages=find_packages(exclude=["tests*"]),
     test_suite="tests",
     package_data={

diff --git a/tests/parsers/test_illumina_parser.py b/tests/parsers/test_illumina_parser.py
@@ -4,8 +4,7 @@
 
 from checkQC.parsers.illumina import (
     _read_interop_summary,
-    _read_quality_metrics,
-    _read_top_unknown_barcodes,
+    _read_demultiplexing_metrics,
     _read_run_metadata,
     _read_samplesheet,
 )
@@ -22,15 +21,15 @@ def runfolder_path():
 def test_read_interop_summary(runfolder_path):
     run_summary, index_summary = _read_interop_summary(runfolder_path)
 
-    total_cluster_pf = run_summary.at(0).at(0).reads_pf()
-    assert total_cluster_pf == 532464327
+    total_reads_pf = run_summary.at(0).at(0).reads_pf()
+    assert total_reads_pf == 532464327
 
     sample_id = index_summary.at(0).at(0).sample_id()
     assert sample_id == "Sample_14574-Qiagen-IndexSet1-SP-Lane1"
 
 
 def test_read_quality_metrics(runfolder_path):
-    quality_metrics = _read_quality_metrics(
+    quality_metrics = _read_demultiplexing_metrics(
             runfolder_path / "Reports/Quality_Metrics.csv")
 
     assert len(quality_metrics) == 6
@@ -50,7 +49,7 @@ def test_read_quality_metrics(runfolder_path):
 
 
 def test_read_to_unknown_barcodes(runfolder_path):
-    top_unknown_barcodes = _read_top_unknown_barcodes(
+    top_unknown_barcodes = _read_demultiplexing_metrics(
             runfolder_path / "Reports/Top_Unknown_Barcodes.csv")
 
     assert len(top_unknown_barcodes) == 2084
@@ -108,4 +107,5 @@ def test_read_samplesheet(runfolder_path):
         'lane': 1,
         'sample_id': 'Sample_14574-Qiagen-IndexSet1-SP-Lane1',
         'sample_project': 'AB-1234',
+        'custom_description': 'LIBRARY_NAME:test',
     }
diff --git a/tests/qc_checkers/test_cluster_pf.py b/tests/qc_checkers/test_cluster_pf.py
@@ -8,16 +8,16 @@
 def qc_data():
     return namedtuple("QCData", "sequencing_metrics")(
         {
-            1: {"total_cluster_pf":  1_000_000_000},
-            2: {"total_cluster_pf":     10_000_000},
-            3: {"total_cluster_pf":    100_000_000},
-            4: {"total_cluster_pf": 10_000_000_000},
+            1: {"total_reads_pf":  1_000_000_000},
+            2: {"total_reads_pf":     10_000_000},
+            3: {"total_reads_pf":    100_000_000},
+            4: {"total_reads_pf": 10_000_000_000},
         }
     )
 
 
-def format_msg(total_cluster_pf, threshold, lane, **kwargs):
-    return f"Clusters PF {total_cluster_pf / 10**6}M < {threshold / 10**6}M on lane {lane}"
+def format_msg(total_reads_pf, threshold, lane, **kwargs):
+    return f"Clusters PF {total_reads_pf / 10**6}M < {threshold / 10**6}M on lane {lane}"
 
 
 def test_cluster_pf(qc_data):
@@ -34,7 +34,7 @@ def test_cluster_pf(qc_data):
         match lane:
             case 2:
                 exp_data = {
-                    "total_cluster_pf": qc_data.sequencing_metrics[lane]["total_cluster_pf"],
+                    "total_reads_pf": qc_data.sequencing_metrics[lane]["total_reads_pf"],
                     "threshold": 50_000_000,
                     "lane": lane,
                     "qc_checker": "cluster_pf",
@@ -44,7 +44,7 @@ def test_cluster_pf(qc_data):
                 assert report.data == exp_data
             case 3:
                 exp_data = {
-                    "total_cluster_pf": qc_data.sequencing_metrics[lane]["total_cluster_pf"],
+                    "total_reads_pf": qc_data.sequencing_metrics[lane]["total_reads_pf"],
                     "threshold": 500_500_000,
                     "lane": lane,
                     "qc_checker": "cluster_pf",
@@ -69,7 +69,7 @@ def test_cluster_pf_error_unknown(qc_data):
         match lane:
             case 2:
                 exp_data = {
-                    "total_cluster_pf": qc_data.sequencing_metrics[lane]["total_cluster_pf"],
+                    "total_reads_pf": qc_data.sequencing_metrics[lane]["total_reads_pf"],
                     "threshold": 500_000_000,
                     "lane": lane,
                     "qc_checker": "cluster_pf",
@@ -79,7 +79,7 @@ def test_cluster_pf_error_unknown(qc_data):
                 assert report.data == exp_data
             case 3:
                 exp_data = {
-                    "total_cluster_pf": qc_data.sequencing_metrics[lane]["total_cluster_pf"],
+                    "total_reads_pf": qc_data.sequencing_metrics[lane]["total_reads_pf"],
                     "threshold": 500_000_000,
                     "lane": lane,
                     "qc_checker": "cluster_pf",
@@ -104,7 +104,7 @@ def test_cluster_pf_warning_unknown(qc_data):
     match lane:
         case 2:
             exp_data = {
-                "total_cluster_pf": qc_data.sequencing_metrics[lane]["total_cluster_pf"],
+                "total_reads_pf": qc_data.sequencing_metrics[lane]["total_reads_pf"],
                 "threshold": 50_000_000,
                 "lane": lane,
                 "qc_checker": "cluster_pf",

diff --git a/tests/qc_checkers/test_unidentified_index.py b/tests/qc_checkers/test_unidentified_index.py
@@ -165,7 +165,7 @@ def qc_data():
     return namedtuple("QCData", ["sequencing_metrics", "samplesheet"])(
         {
             1: {
-                "total_cluster_pf": 100,
+                "total_reads_pf": 100,
                 "top_unknown_barcodes": [
                     {"lane": 1, "index": "ACCT", "count": 10},
                     {"lane": 1, "index": "AC", "count": 50},

diff --git a/tests/resources/bclconvert/200624_A00834_0183_BHMTFYTINY/Reports/Demultiplex_Stats.csv b/tests/resources/bclconvert/200624_A00834_0183_BHMTFYTINY/Reports/Demultiplex_Stats.csv
@@ -0,0 +1,7 @@
+Lane,SampleID,Sample_Project,Index,# Reads,# Perfect Index Reads,# One Mismatch Index Reads,# Two Mismatch Index Reads,% Reads,% Perfect Index Reads,% One Mismatch Index Reads,% Two Mismatch Index Reads
+1,Sample_14574-Qiagen-IndexSet1-SP-Lane1,AB-1234,GAACTGAGCG-TCGTGGAGCG,9920,9718,202,0,0.0029,0.9796,0.0204,0.0000
+1,Sample_14575-Qiagen-IndexSet1-SP-Lane1,CD-5678,AGGTCAGATA-CTACAAGATA,8560,8402,158,0,0.0025,0.9815,0.0185,0.0000
+1,Undetermined,Undetermined,,3387226,3387226,0,0,0.9946,1.0000,0.0000,0.0000
+2,Sample_14574-Qiagen-IndexSet1-SP-Lane2,AB-1234,GAACTGAGCG-TCGTGGAGCG,10208,10024,184,0,0.0030,0.9820,0.0180,0.0000
+2,Sample_14575-Qiagen-IndexSet1-SP-Lane2,CD-5678,AGGTCAGATA-CTACAAGATA,8672,8524,148,0,0.0025,0.9829,0.0171,0.0000
+2,Undetermined,Undetermined,,3439373,3439373,0,0,0.9945,1.0000,0.0000,0.0000
diff --git a/tests/resources/bclconvert/200624_A00834_0183_BHMTFYTINY/SampleSheet.csv b/tests/resources/bclconvert/200624_A00834_0183_BHMTFYTINY/SampleSheet.csv
@@ -4,10 +4,10 @@ Date,6/24/2020,,
 Application,Illumina DRAGEN COVIDSeq Test Pipeline,,
 Instrument Type,NovaSeq6000,,
 Assay,Illumina COVIDSeq Test,,
-Index Adapters,IDT-ILMN DNA-RNA UDP Indexes ,,
+Index Adapters,"IDT-ILMN DNA-RNA UDP Indexes ",,
 Chemistry,Amplicon,,
 ,,,
-[Reads],,,,,,
+[Reads],,,,,
 Read1Cycles,36,,
 Index1Cycles,10,,
 Index2Cycles,10,,
@@ -18,8 +18,8 @@ FastqCompressionFormat,gzip,,
 SoftwareVersion,4.1.5,,
 ,,,
 [BCLConvert_Data],,,
-Lane,Sample_ID,Index,Index2,Sample_Project
-1,Sample_14574-Qiagen-IndexSet1-SP-Lane1,GAACTGAGCG,TCGTGGAGCG,AB-1234
-1,Sample_14575-Qiagen-IndexSet1-SP-Lane1,AGGTCAGATA,CTACAAGATA,CD-5678
-2,Sample_14574-Qiagen-IndexSet1-SP-Lane2,GAACTGAGCG,TCGTGGAGCG,AB-1234
-2,Sample_14575-Qiagen-IndexSet1-SP-Lane2,AGGTC  AGATA,C TACAA GATA,CD-5678
+Lane,Sample_ID,Index,Index2,Sample_Project,custom_Description
+1,Sample_14574-Qiagen-IndexSet1-SP-Lane1,GAACTGAGCG,TCGTGGAGCG,AB-1234,LIBRARY_NAME:test
+1,Sample_14575-Qiagen-IndexSet1-SP-Lane1,AGGTCAGATA,CTACAAGATA,CD-5678,LIBRARY_NAME:test
+2,Sample_14574-Qiagen-IndexSet1-SP-Lane2,GAACTGAGCG,TCGTGGAGCG,AB-1234,LIBRARY_NAME:test
+2,Sample_14575-Qiagen-IndexSet1-SP-Lane2,AGGTC  AGATA,C TACAA GATA,CD-5678,LIBRARY_NAME:test