Skip to content

Commit 906386f

Browse files
committed
DATAOPS-832: Optimize code and add unit test
1 parent b4f3b10 commit 906386f

File tree

2 files changed

+55
-12
lines changed

2 files changed

+55
-12
lines changed

projman_filler/interop_run_stats_parser.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,19 @@ def _get_non_index_reads(self) -> list:
114114
def _get_conversion_results(self) -> list:
115115
ar = iop.summary(self._run_metrics, 'Lane')
116116
df = pd.DataFrame(ar)
117-
117+
118118
# Get statistics per-lane
119119
n_lanes = self._run_summary.lane_count()
120120
lanes = []
121+
121122
for l in range(1, n_lanes+1):
122123
rows = df.loc[df['Lane'] == l][['ReadNumber','Reads', 'Reads Pf', 'IsIndex']]
123124
rows = rows.reset_index()
124-
# Each 'Reads Pf' value represents the entire lane,
125-
# while 'Reads' (total clusters raw) must be summed over the non-index reads
126-
# See: https://github.com/Illumina/interop/issues/271
127-
total_clusters_raw = 0
128-
total_clusters_pf = 0
129-
for index, row in rows.iterrows():
130-
if index in self._non_index_reads:
131-
# These are the same for the lane across all reads
132-
total_clusters_pf = row['Reads Pf']
133-
# These must be summed
134-
total_clusters_raw = row['Reads']
125+
126+
# These are the same for the lane across all reads
127+
total_clusters_pf = rows.iloc[0].get('Reads Pf', 0)
128+
total_clusters_raw = rows.iloc[0].get('Reads', 0)
129+
135130
lanes.append(Lane(l, total_clusters_raw, total_clusters_pf))
136131
return lanes
137132

tests/test_interop_run_stats_parser.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
from unittest.mock import MagicMock
33
from projman_filler.interop_run_stats_parser import InteropRunStatsParser
44
from interop import py_interop_run, py_interop_run_metrics, py_interop_summary
5+
import pandas as pd
56
import interop
7+
from projman_filler.lane import Lane
8+
69

710
class TestRunStatsParsers(unittest.TestCase):
811
def test_interop_standardize_read_numbers(self):
@@ -26,5 +29,50 @@ def test_lanes_total_clusters(self):
2629
assert lane._total_clusters_pf is not None and lane._total_clusters_pf != 0
2730
assert lane._total_clusters_raw is not None and lane._total_clusters_raw != 0
2831

32+
def test_clusters_same_across_lanes(self):
33+
"""
34+
Verify that 'Reads' and 'Reads Pf' are consistently the same across all reads within the same lane
35+
"""
36+
non_index_reads = [0, 2, 3]
37+
runfolder = "tests/resources/200624_A00834_0183_BHMTFYTINY"
38+
iop = InteropRunStatsParser(runfolder, non_index_reads)
39+
40+
41+
data = {
42+
'ReadNumber': [1, 1, 2, 2, 3, 3],
43+
'IsIndex': [78, 78, 89, 89, 89, 89],
44+
'Lane': [1, 2, 1, 2, 1, 2],
45+
'Reads': [638337024.0] * 6,
46+
'Reads Pf': [532464320.0, 530917568.0] * 3,
47+
}
48+
49+
df = pd.DataFrame(data)
50+
51+
for lane_index, (lane, rows) in enumerate(df.groupby('Lane')):
52+
rows = rows.reset_index()
53+
# Assert all 'Reads' and 'Reads Pf' values are consistent within the lane
54+
assert rows['Reads'].nunique() == 1, (
55+
f"Inconsistent 'Reads' in lane {lane}"
56+
)
57+
assert rows['Reads Pf'].nunique() == 1, (
58+
f"Inconsistent 'Reads Pf' in lane {lane}"
59+
)
60+
61+
# These are the same for the lane across all reads
62+
total_clusters_pf = rows.at[0, 'Reads Pf']
63+
total_clusters_raw = rows.at[0, 'Reads']
64+
65+
66+
# Compare with InteropRunStatsParser results
67+
lane_results = iop._conversion_results[lane_index]
68+
assert lane_results._total_clusters_pf == total_clusters_pf, (
69+
f"Mismatch in total_clusters_pf for lane {lane}"
70+
)
71+
assert lane_results._total_clusters_raw == total_clusters_raw, (
72+
f"Mismatch in total_clusters_raw for lane {lane}"
73+
)
74+
75+
2976
if __name__ == '__main__':
3077
unittest.main()
78+

0 commit comments

Comments
 (0)