Skip to content

Commit 18e7feb

Browse files
authored
Merge pull request #17 from nkongenelly/DATAOPS-832_olink_cluster_info
Implementing total_clusters_raw similar to as total_clusters_pf
2 parents 3cb8425 + 9cb6f19 commit 18e7feb

File tree

2 files changed

+49
-16
lines changed

2 files changed

+49
-16
lines changed

projman_filler/interop_run_stats_parser.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,18 @@ def _get_non_index_reads(self) -> list:
114114
def _get_conversion_results(self) -> list:
115115
ar = iop.summary(self._run_metrics, 'Lane')
116116
df = pd.DataFrame(ar)
117-
118117
# Get statistics per-lane
119118
n_lanes = self._run_summary.lane_count()
120119
lanes = []
120+
121121
for l in range(1, n_lanes+1):
122122
rows = df.loc[df['Lane'] == l][['ReadNumber','Reads', 'Reads Pf', 'IsIndex']]
123123
rows = rows.reset_index()
124-
# Each 'Reads Pf' value represents the entire lane,
125-
# while 'Reads' (total clusters raw) must be summed over the non-index reads
126-
# See: https://github.com/Illumina/interop/issues/271
127-
total_clusters_raw = 0
128-
total_clusters_pf = 0
129-
for index, row in rows.iterrows():
130-
if index in self._non_index_reads:
131-
# These are the same for the lane across all reads
132-
total_clusters_pf = row['Reads Pf']
133-
# These must be summed
134-
total_clusters_raw += row['Reads']
124+
125+
# These are the same for the lane across all reads
126+
total_clusters_pf = rows.iloc[0].get('Reads Pf', 0)
127+
total_clusters_raw = rows.iloc[0].get('Reads', 0)
128+
135129
lanes.append(Lane(l, total_clusters_raw, total_clusters_pf))
136130
return lanes
137131

tests/test_interop_run_stats_parser.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,68 @@
22
from unittest.mock import MagicMock
33
from projman_filler.interop_run_stats_parser import InteropRunStatsParser
44
from interop import py_interop_run, py_interop_run_metrics, py_interop_summary
5+
import pandas as pd
56
import interop
7+
from projman_filler.lane import Lane
8+
69

710
class TestRunStatsParsers(unittest.TestCase):
811
def test_interop_standardize_read_numbers(self):
912
runfolder = "tests/resources/200624_A00834_0183_BHMTFYTINY"
1013

11-
non_index_reads = [0, 2, 3]
14+
non_index_reads = [0]
1215
iop = InteropRunStatsParser(runfolder, non_index_reads)
1316
remapped = iop._standardize_read_numbers()
14-
expected = {1: 0, 2: 2, 3: 3}
17+
expected = {1: 0}
1518
self.assertEqual(remapped, expected)
1619

1720
reads = iop.get_reads()
18-
expected = [1, 2, 3]
21+
expected = [1]
1922
self.assertEqual(reads, expected)
2023

2124
def test_lanes_total_clusters(self):
22-
non_index_reads = [0, 2, 3]
25+
non_index_reads = [0]
2326
runfolder = "tests/resources/200624_A00834_0183_BHMTFYTINY"
2427
iop = InteropRunStatsParser(runfolder, non_index_reads)
2528
for lane in iop._conversion_results:
2629
assert lane._total_clusters_pf is not None and lane._total_clusters_pf != 0
2730
assert lane._total_clusters_raw is not None and lane._total_clusters_raw != 0
2831

32+
def test_clusters_same_across_lanes(self):
33+
"""
34+
Verify that 'Reads' and 'Reads Pf' are consistently the same across all reads within the same lane
35+
"""
36+
non_index_reads = [0]
37+
runfolder = "tests/resources/200624_A00834_0183_BHMTFYTINY"
38+
iop = InteropRunStatsParser(runfolder, non_index_reads)
39+
40+
interop_lane_summary = interop.summary(iop._run_metrics, 'Lane')
41+
df = pd.DataFrame(interop_lane_summary)
42+
43+
for lane_index, (lane, rows) in enumerate(df.groupby('Lane')):
44+
rows = rows.reset_index()
45+
# Assert all 'Reads' and 'Reads Pf' values are consistent within the lane
46+
assert rows['Reads'].nunique() == 1, (
47+
f"Inconsistent 'Reads' in lane {lane}"
48+
)
49+
assert rows['Reads Pf'].nunique() == 1, (
50+
f"Inconsistent 'Reads Pf' in lane {lane}"
51+
)
52+
53+
# These are the same for the lane across all reads
54+
total_clusters_pf = rows.at[0, 'Reads Pf']
55+
total_clusters_raw = rows.at[0, 'Reads']
56+
57+
58+
# Compare with InteropRunStatsParser results
59+
lane_results = iop._conversion_results[lane_index]
60+
assert lane_results._total_clusters_pf == total_clusters_pf, (
61+
f"Mismatch in total_clusters_pf for lane {lane}"
62+
)
63+
assert lane_results._total_clusters_raw == total_clusters_raw, (
64+
f"Mismatch in total_clusters_raw for lane {lane}"
65+
)
66+
2967
if __name__ == '__main__':
3068
unittest.main()
69+

0 commit comments

Comments
 (0)