Skip to content

Commit bbbd381

Browse files
authored
Feature/issue 277/allow duplicate time (#279)
* Initial setup for allow_duplicates in readers * Add cli/entrypoint for allow_duplicates * Update poetry.lock Bump for security * Fixing mypy/ruff * Fixed some typos * Modify tests to use fixture
1 parent 59e3647 commit bbbd381

12 files changed

Lines changed: 168 additions & 37 deletions

File tree

poetry.lock

Lines changed: 15 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "wristpy"
3-
version = "0.2.2"
3+
version = "0.2.3"
44
description = "wristpy is a Python package designed for processing and analyzing wrist-worn accelerometer data."
55
authors = [
66
"Adam Santorelli <adam.santorelli@childmind.org>",

src/wristpy/core/cli.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,18 @@ def main(
153153
"Must be greater than or equal to 1.",
154154
min=1,
155155
),
156+
allow_duplicates: bool = typer.Option(
157+
False,
158+
"-d",
159+
"--allow-duplicates",
160+
help="Whether to allow duplicate timestamps in the sensor data. "
161+
"If this flag is set, no error will be raised during Measurement validation "
162+
"and processing can continue. Only unique timestamps and their corresponding "
163+
"sensor values will be kept. "
164+
"The first occurrence of each timestamp is retained. Defaults to False. "
165+
"Note that the presence of duplicate timestamps may indicate sensor "
166+
"malfunction. Modifying this parameter should be done with caution.",
167+
),
156168
verbosity: bool = typer.Option(
157169
False,
158170
"-v",
@@ -194,6 +206,7 @@ def main(
194206
nonwear_algorithm=nonwear_algorithms, # type: ignore[arg-type] # Covered by NonwearAlgorithm Enum class
195207
verbosity=log_level,
196208
output_filetype=output_filetype.value,
209+
allow_duplicates=allow_duplicates,
197210
)
198211
except exceptions.EmptyDirectoryError as e:
199212
typer.echo(f"Error: {e}", err=True)

src/wristpy/core/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ def validate_time(cls, v: pl.Series) -> pl.Series:
9696
if not isinstance(v.dtype, pl.datatypes.Datetime):
9797
raise ValueError("Time must be a datetime series")
9898
if not v.is_unique().all():
99+
logger.error(
100+
"Duplicate timestamps found in time series. "
101+
"See the `allow_duplicates` parameter if you "
102+
"would want to process this data regardless."
103+
)
99104
raise ValueError("Time series must contain unique entries")
100105
if not v.is_sorted():
101106
raise ValueError("Time series must be sorted")

src/wristpy/core/orchestrator.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def run(
4141
epoch_length: float = 5,
4242
activity_metric: Sequence[Literal["enmo", "mad", "ag_count", "mims"]] = ["enmo"],
4343
nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"],
44+
allow_duplicates: bool = False,
4445
verbosity: int = logging.WARNING,
4546
output_filetype: Literal[".csv", ".parquet"] = ".csv",
4647
) -> Union[writers.OrchestratorResults, Dict[str, writers.OrchestratorResults]]:
@@ -70,6 +71,7 @@ def run(
7071
activity_metric: The metric(s) to be used for physical activity categorization.
7172
Multiple metrics can be specified as a sequence.
7273
nonwear_algorithm: The algorithm to be used for nonwear detection.
74+
allow_duplicates: Whether to allow duplicate timestamps in the sensor data.
7375
verbosity: The logging level for the logger.
7476
output_filetype: Specifies the data format for the save files. Only used when
7577
processing directories.
@@ -133,6 +135,7 @@ def run(
133135
activity_metric=activity_metric,
134136
verbosity=verbosity,
135137
nonwear_algorithm=nonwear_algorithm,
138+
allow_duplicates=allow_duplicates,
136139
)
137140

138141
return _run_directory(
@@ -145,6 +148,7 @@ def run(
145148
verbosity=verbosity,
146149
output_filetype=output_filetype,
147150
nonwear_algorithm=nonwear_algorithm,
151+
allow_duplicates=allow_duplicates,
148152
)
149153

150154

@@ -161,6 +165,7 @@ def _run_directory(
161165
verbosity: int = logging.WARNING,
162166
output_filetype: Literal[".csv", ".parquet"] = ".csv",
163167
activity_metric: Sequence[Literal["enmo", "mad", "ag_count", "mims"]] = ["enmo"],
168+
allow_duplicates: bool = False,
164169
) -> Dict[str, writers.OrchestratorResults]:
165170
"""Runs main processing steps for wristpy on directories.
166171
@@ -186,6 +191,7 @@ def _run_directory(
186191
output_filetype: Specifies the data format for the save files.
187192
activity_metric: The metric(s) to be used for physical activity categorization.
188193
Multiple metrics can be specified as a sequence.
194+
allow_duplicates: Whether to allow duplicate timestamps in the sensor data.
189195
190196
Returns:
191197
All calculated data in a save ready format as a dictionary of
@@ -256,6 +262,7 @@ def _run_directory(
256262
verbosity=verbosity,
257263
nonwear_algorithm=nonwear_algorithm,
258264
activity_metric=activity_metric,
265+
allow_duplicates=allow_duplicates,
259266
)
260267
except Exception as e:
261268
logger.error("Did not run file: %s, Error: %s", file, e)
@@ -275,6 +282,7 @@ def _run_file(
275282
epoch_length: float = 5.0,
276283
activity_metric: Sequence[Literal["enmo", "mad", "ag_count", "mims"]] = ["enmo"],
277284
nonwear_algorithm: Sequence[Literal["ggir", "cta", "detach"]] = ["ggir"],
285+
allow_duplicates: bool = False,
278286
verbosity: int = logging.WARNING,
279287
) -> writers.OrchestratorResults:
280288
"""Runs main processing steps for wristpy and returns data for analysis.
@@ -303,6 +311,10 @@ def _run_file(
303311
Multiple metrics can be specified as a sequence.
304312
nonwear_algorithm: The algorithm to be used for nonwear detection. A sequence of
305313
algorithms can be provided. If so, a majority vote will be taken.
314+
allow_duplicates: Whether to allow duplicate timestamps in the sensor data.
315+
If set to True, no error will be raised and we will keep only the unique
316+
timestamps and their associated sensor values. The first occurrence of each
317+
timestamp is kept.
306318
verbosity: The logging level for the logger.
307319
308320
Returns:
@@ -346,7 +358,7 @@ def _run_file(
346358
logger.error(msg)
347359
raise ValueError(msg)
348360

349-
watch_data = readers.read_watch_data(input)
361+
watch_data = readers.read_watch_data(input, allow_duplicates=allow_duplicates)
350362

351363
if calibrator is None:
352364
logger.debug("Running without calibration")

0 commit comments

Comments
 (0)