Skip to content

Commit 4a37a23

Browse files
committed
refacto function with test
1 parent dc033a2 commit 4a37a23

File tree

2 files changed

+301
-171
lines changed

2 files changed

+301
-171
lines changed

lidar_for_fuel/pretreatment/filter_deviation_day.py

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,66 @@
1212
logger = logging.getLogger(__name__)
1313

1414
_SECONDS_PER_DAY = 86_400.0
15+
_EPSILON = 1e-3 # 1 ms — smaller than any realistic GpsTime resolution
16+
17+
18+
def gpstime_to_day_index(gpstime: np.ndarray, gpstime_ref_unix: float) -> np.ndarray:
19+
"""Convert a GpsTime array (relative seconds) to integer UTC day indices.
20+
21+
Each value is floored to midnight UTC, expressed as *days since the
22+
UNIX epoch* (1970-01-01).
23+
24+
Args:
25+
gpstime (np.ndarray): 1-D array of relative GPS timestamps (seconds).
26+
gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
27+
28+
Returns:
29+
np.ndarray: Integer array of shape ``(n,)`` with UNIX day indices.
30+
31+
"""
32+
unix_time = gpstime + gpstime_ref_unix
33+
return np.floor(unix_time / _SECONDS_PER_DAY).astype(np.int64)
34+
35+
36+
def modal_day(day_index: np.ndarray) -> int:
37+
"""Return the UNIX day index of the most densely sampled calendar day.
38+
39+
Args:
40+
day_index (np.ndarray): Integer array of UNIX day indices (one per point).
41+
42+
Returns:
43+
int: UNIX day index of the mode.
44+
45+
"""
46+
unique_days, counts = np.unique(day_index, return_counts=True)
47+
return int(unique_days[counts.argmax()])
48+
49+
50+
def compute_gpstime_window(
51+
main_day: int,
52+
deviation_day: int | float,
53+
gpstime_ref_unix: float,
54+
) -> tuple[float, float]:
55+
"""Compute the GpsTime [t_min, t_max] filter window.
56+
57+
The window covers all calendar days in
58+
``[main_day - deviation_day, main_day + deviation_day]``.
59+
``t_max`` is nudged inward by ``_EPSILON`` so that a point landing
60+
exactly on midnight of ``day_hi + 1`` is excluded.
61+
62+
Args:
63+
main_day (int): UNIX day index of the modal acquisition day.
64+
deviation_day (int | float): Half-width of the window in days.
65+
gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
66+
67+
Returns:
68+
tuple[float, float]: ``(t_min, t_max)`` in GpsTime space
69+
"""
70+
day_lo = main_day - int(deviation_day)
71+
day_hi = main_day + int(deviation_day)
72+
t_min = max(day_lo, 0) * _SECONDS_PER_DAY - gpstime_ref_unix
73+
t_max = (day_hi + 1) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON
74+
return t_min, t_max
1575

1676

1777
def filter_points_by_date(
@@ -56,29 +116,13 @@ def filter_points_by_date(
56116

57117
n_total = len(points)
58118

59-
# Convert relative GPS time to absolute UNIX timestamps
60-
unix_time = points["GpsTime"] + gpstime_ref_unix
61-
62-
# Floor to calendar day (midnight UTC)
63-
day_index = np.floor(unix_time / _SECONDS_PER_DAY).astype(np.int64) # days since epoch
64-
65-
# Find the most densely sampled day (mode)
66-
unique_days, counts = np.unique(day_index, return_counts=True)
67-
id_max_count = counts.argmax() # index of the modal day
68-
main_day = unique_days[id_max_count] # day index of the modal day
69-
70-
# Build the window of valid day indices
71-
day_lo = main_day - int(deviation_day)
72-
day_hi = main_day + int(deviation_day)
73-
74-
# Borne haute inclusive: include the *entire* last day
75-
# PDAL filters.range uses a closed interval [t_min, t_max], so we subtract
76-
# a small epsilon to exclude any point falling exactly on midnight of day_hi+1.
77-
_EPSILON = 1e-3 # 1 ms — smaller than any realistic GpsTime resolution
78-
t_min = max(day_lo, 0) * _SECONDS_PER_DAY - gpstime_ref_unix # back to GpsTime space
79-
t_max = (day_hi + 1) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON # strictly excludes day_hi+1
119+
day_index = gpstime_to_day_index(points["GpsTime"], gpstime_ref_unix)
120+
main_day_idx = modal_day(day_index)
121+
t_min, t_max = compute_gpstime_window(main_day_idx, deviation_day, gpstime_ref_unix)
80122

81-
# Warning: percentage of points removed
123+
unix_time = points["GpsTime"] + gpstime_ref_unix
124+
day_lo = main_day_idx - int(deviation_day)
125+
day_hi = main_day_idx + int(deviation_day)
82126
n_retained = int(
83127
np.sum((unix_time >= max(day_lo, 0) * _SECONDS_PER_DAY) & (unix_time < (day_hi + 1) * _SECONDS_PER_DAY))
84128
)
@@ -92,18 +136,16 @@ def filter_points_by_date(
92136
stacklevel=2,
93137
)
94138

139+
unique_days = np.unique(day_index)
95140
logger.debug(
96141
"Main day index: %d | GpsTime window [%.1f, %.1f) | %d/%d days retained | %.1f %% points removed",
97-
main_day,
142+
main_day_idx,
98143
t_min,
99144
t_max,
100145
int(np.sum((unique_days >= day_lo) & (unique_days <= day_hi))),
101146
len(unique_days),
102147
pct_removed,
103148
)
104149

105-
# Create PDAL pipeline using range filter for timestamps
106150
filter_pipeline_json = {"pipeline": [{"type": "filters.range", "limits": f"GpsTime[{t_min}:{t_max}]"}]}
107-
108-
# Return a PDAL Pipeline object
109151
return pdal.Pipeline(json.dumps(filter_pipeline_json), arrays=[points])

0 commit comments

Comments
 (0)