-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathmodels.py
More file actions
146 lines (114 loc) · 4.44 KB
/
models.py
File metadata and controls
146 lines (114 loc) · 4.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""Internal data model."""
from typing import Optional
import numpy as np
import polars as pl
from pydantic import BaseModel, field_validator
from wristpy.core import config
VALID_FILE_TYPES = (".csv", ".parquet")
logger = config.get_logger()
class Measurement(BaseModel):
"""A single measurement of a sensor and its corresponding time."""
name: str | None = None
measurements: np.ndarray
time: pl.Series
@classmethod
def from_data_frame(
cls, data_frame: pl.DataFrame, name: str | None = None
) -> "Measurement":
"""Creates a measurement from a Polars DataFrame.
Args:
data_frame: The Polars DataFrame, must have a time column. All
non-time columns will be used as the 'measurements' input.
name: Optional name describing the type of measurement.
"""
return Measurement(
name=name,
measurements=data_frame.drop("time").to_numpy().squeeze(),
time=data_frame["time"],
)
def lazy_frame(self) -> pl.LazyFrame:
"""Converts the measurement to a LazyFrame.
Returns:
The Measurement as a LazyFrame. The time property will have column name
'time'. Other column names should not be relied upon.
"""
return pl.concat(
[
pl.LazyFrame(self.measurements),
pl.LazyFrame({"time": self.time}).set_sorted("time"),
],
how="horizontal",
)
class Config:
"""Config to allow for ndarray as input."""
arbitrary_types_allowed = True
@field_validator("measurements")
def validate_measurements_not_empty(cls, v: np.ndarray) -> np.ndarray:
"""Validate that the measurements array is not empty.
Args:
cls: The class.
v: The measurements array to validate.
Returns:
v: The measurements array if it is not empty.
Raises:
ValueError: If the measurements array is empty.
"""
if v.size == 0:
raise ValueError("measurements array must not be empty")
return v
@field_validator("time")
def validate_time(cls, v: pl.Series) -> pl.Series:
"""Validate the time series.
Check that the time series is a datetime series, contains only unique entries,
and is sorted.
Args:
cls: The class.
v: The time series to validate.
Returns:
v: The time series if it is valid.
Raises:
ValueError: If the time series is not a datetime series or is not sorted,
or is empty.
"""
if not isinstance(v.dtype, pl.datatypes.Datetime):
raise ValueError("Time must be a datetime series")
if not v.is_unique().all():
logger.warning(
"Duplicate timestamps found in time series. "
"See the `allow_duplicates` parameter if you "
"would want to process this data regardless."
)
raise ValueError("Time series must contain unique entries")
if not v.is_sorted():
raise ValueError("Time series must be sorted")
if v.is_empty():
raise ValueError("Time series cannot be empty")
return v
class WatchData(BaseModel):
"""Watch data that is read off the device.
This class should provide access to all raw input data.
It must not be mutated during processing.
"""
acceleration: Measurement
lux: Optional[Measurement] = None
battery: Optional[Measurement] = None
capsense: Optional[Measurement] = None
temperature: Optional[Measurement] = None
idle_sleep_mode_flag: Optional[bool] = None
dynamic_range: Optional[tuple[float, float]] = None
time_zone: Optional[str] = None
@field_validator("acceleration")
def validate_acceleration(cls, v: Measurement) -> Measurement:
"""Validate the acceleration data.
Ensure that the acceleration data is a 2D array with 3 columns.
Args:
cls: The class.
v: The acceleration data to validate.
Returns:
v: The acceleration data if it is valid.
Raises:
ValueError: If the acceleration data is not a 2D array with 3 columns.
"""
if v.measurements.ndim != 2 or v.measurements.shape[1] != 3:
raise ValueError("acceleration must be a 2D array with 3 columns")
return v