Skip to content

Commit c0fa068

Browse files
Increase parsing robustness (#34)
* Use `parametrize` instead of `for` iterations * Fix some deprecation warnings * Extract measurement data selection to a variable * Add dataset to test issue #35 * De-duplicate measurement lines during parsing Closes #35
1 parent 6935ea7 commit c0fa068

File tree

4 files changed

+2840
-79
lines changed

4 files changed

+2840
-79
lines changed

bletl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
NoMeasurementData,
2020
)
2121

22-
__version__ = "1.2.1"
22+
__version__ = "1.2.2"

bletl/parsing/blpro.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import re
88
import warnings
99
import xml.etree.ElementTree
10-
from xml.etree.ElementTree import Element
1110

1211
import numpy
1312
import pandas
@@ -409,28 +408,41 @@ def transform_into_filtertimeseries(
409408
values = None
410409
# test if any filterset is not available in measurements due to invalid data #issue24
411410
if filter_number not in measurements.index.get_level_values("filterset"):
412-
logger.warn(
411+
logger.warning(
413412
'Skipped channel %s with name "%s" because no valid measurements are available.',
414413
fs.filter_type,
415414
fs.filter_name,
416415
)
417416
continue
418-
elif fs.filter_type == "Intensity" and ("Biomass" in fs.filter_name or "BS" in fs.filter_name):
417+
418+
dfm = measurements.xs(filter_number, level="filterset")
419+
# De-duplicate based on the index because in long-running experiments
420+
# the BioLector sometimes duplicates parts of the data.
421+
mask = dfm.index.duplicated(keep="first")
422+
if any(mask):
423+
logger.warning(
424+
"Duplicate filter %s measurements for (cycles, wells) %s.",
425+
filter_number,
426+
dfm[mask].index.to_list(),
427+
)
428+
dfm = dfm[~mask]
429+
430+
if fs.filter_type == "Intensity" and ("Biomass" in fs.filter_name or "BS" in fs.filter_name):
419431
key = f"BS{int(fs.gain_1)}"
420-
times = measurements.xs(filter_number, level="filterset")["time"].unstack()
421-
values = measurements.xs(filter_number, level="filterset")["amp_ref_1"].unstack()
432+
times = dfm["time"].unstack()
433+
values = dfm["amp_ref_1"].unstack()
422434
elif fs.filter_type in {"pH", "DO"} and not return_uncalibrated_optode_data:
423435
key = fs.filter_type
424-
times = measurements.xs(filter_number, level="filterset")["time"].unstack()
425-
values = measurements.xs(filter_number, level="filterset")["cal"].unstack()
436+
times = dfm["time"].unstack()
437+
values = dfm["cal"].unstack()
426438
elif fs.filter_type in {"pH", "DO"} and return_uncalibrated_optode_data:
427439
key = fs.filter_type
428-
times = measurements.xs(filter_number, level="filterset")["time"].unstack()
429-
values = measurements.xs(filter_number, level="filterset")["phase"].unstack()
440+
times = dfm["time"].unstack()
441+
values = dfm["phase"].unstack()
430442
elif fs.filter_type == "Intensity":
431443
key = fs.filter_name
432-
times = measurements.xs(filter_number, level="filterset")["time"].unstack()
433-
values = measurements.xs(filter_number, level="filterset")["amp_ref_1"].unstack()
444+
times = dfm["time"].unstack()
445+
values = dfm["amp_ref_1"].unstack()
434446
else:
435447
logger.warn(
436448
f'Skipped {fs.filter_type} channel with name "{fs.filter_name}" because no processing routine is implemented.'

0 commit comments

Comments
 (0)