Skip to content

Commit a866eaa

Browse files
committed
refacto filter_deviation_day and update config
1 parent 4a37a23 commit a866eaa

File tree

5 files changed

+93
-260
lines changed

5 files changed

+93
-260
lines changed

.flake8

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
[flake8]
2-
max-line-length = 119
2+
max-line-length = 119
3+
extend-ignore = E203

configs/config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ pretreatment:
1818
dimension: Classification
1919
keep_values: [1, 2, 3, 4, 5, 9, 66] # Classes to keep
2020
filter_deviation:
21-
deviation_day: 14
22-
# UNIX timestamp (seconds since 1970-01-01 UTC) corresponding to gpstime == 0.
23-
# Default matches the R reference: as.POSIXct("2011-09-14 01:46:40") = 1315964800.0
21+
deviation_days: 14
22+
# ISO-8601 UTC string corresponding to gpstime == 0.
23+
# Default matches the R reference: as.POSIXct("2011-09-14 01:46:40")
2424
# Override this value if your LiDAR data uses a different GPS time origin.
25-
gpstime_ref_unix: 1315964800.0
25+
gpstime_ref: "2011-09-14 01:46:40"
2626
filter_normalize:
2727
height: 60
2828

lidar_for_fuel/main_pretreatment.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import hydra
1111
from omegaconf import DictConfig
1212

13-
from lidar_for_fuel.pretreatment.filter_deviation_day import filter_points_by_date
13+
from lidar_for_fuel.pretreatment.filter_deviation_day import filter_deviation_day
1414
from lidar_for_fuel.pretreatment.normalize_height_in_pointcloud import normalize_height
1515
from lidar_for_fuel.pretreatment.validate_lidar_file import check_lidar_file
1616

@@ -59,9 +59,9 @@ def main_on_one_tile(filename):
5959
pipeline_check_lidar = check_lidar_file(input_filename, srid)
6060

6161
logging.info(f"\nFilter deviation day of 1 for tile : {tilename}")
62-
deviation_days = config.pretreatment.filter_deviation.deviation_day
63-
gpstime_ref_unix = config.pretreatment.filter_deviation.gpstime_ref_unix
64-
pipeline_filter_deviation_day = filter_points_by_date(pipeline_check_lidar, deviation_days, gpstime_ref_unix)
62+
deviation_days = config.pretreatment.filter_deviation.deviation_days
63+
gpstime_ref = config.pretreatment.filter_deviation.gpstime_ref
64+
pipeline_filter_deviation_day = filter_deviation_day(pipeline_check_lidar, deviation_days, gpstime_ref)
6565

6666
logging.info(f"\nCalculate normalize of 1 for tile : {tilename}")
6767
# Generate output filenames
Lines changed: 50 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
"""
2-
Keep points within a ±deviation_day window around the most densely sampled acquisition day.
3-
"""
1+
"""Keep points within a ±deviation_days window around the most densely sampled acquisition day."""
2+
43
import json
5-
import logging
6-
import math
7-
import warnings
4+
import logging
5+
import math
6+
import warnings
7+
from datetime import datetime, timezone
88

99
import numpy as np
1010
import pdal
@@ -15,93 +15,36 @@
1515
_EPSILON = 1e-3 # 1 ms — smaller than any realistic GpsTime resolution
1616

1717

18-
def gpstime_to_day_index(gpstime: np.ndarray, gpstime_ref_unix: float) -> np.ndarray:
19-
"""Convert a GpsTime array (relative seconds) to integer UTC day indices.
20-
21-
Each value is floored to midnight UTC, expressed as *days since the
22-
UNIX epoch* (1970-01-01).
23-
24-
Args:
25-
gpstime (np.ndarray): 1-D array of relative GPS timestamps (seconds).
26-
gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
27-
28-
Returns:
29-
np.ndarray: Integer array of shape ``(n,)`` with UNIX day indices.
30-
31-
"""
32-
unix_time = gpstime + gpstime_ref_unix
33-
return np.floor(unix_time / _SECONDS_PER_DAY).astype(np.int64)
34-
35-
36-
def modal_day(day_index: np.ndarray) -> int:
37-
"""Return the UNIX day index of the most densely sampled calendar day.
38-
39-
Args:
40-
day_index (np.ndarray): Integer array of UNIX day indices (one per point).
41-
42-
Returns:
43-
int: UNIX day index of the mode.
44-
45-
"""
46-
unique_days, counts = np.unique(day_index, return_counts=True)
47-
return int(unique_days[counts.argmax()])
48-
49-
50-
def compute_gpstime_window(
51-
main_day: int,
52-
deviation_day: int | float,
53-
gpstime_ref_unix: float,
54-
) -> tuple[float, float]:
55-
"""Compute the GpsTime [t_min, t_max] filter window.
56-
57-
The window covers all calendar days in
58-
``[main_day - deviation_day, main_day + deviation_day]``.
59-
``t_max`` is nudged inward by ``_EPSILON`` so that a point landing
60-
exactly on midnight of ``day_hi + 1`` is excluded.
61-
62-
Args:
63-
main_day (int): UNIX day index of the modal acquisition day.
64-
deviation_day (int | float): Half-width of the window in days.
65-
gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
66-
67-
Returns:
68-
tuple[float, float]: ``(t_min, t_max)`` in GpsTime space
69-
"""
70-
day_lo = main_day - int(deviation_day)
71-
day_hi = main_day + int(deviation_day)
72-
t_min = max(day_lo, 0) * _SECONDS_PER_DAY - gpstime_ref_unix
73-
t_max = (day_hi + 1) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON
74-
return t_min, t_max
75-
76-
77-
def filter_points_by_date(
78-
input_pipeline: pdal.Pipeline,
79-
deviation_day: int | float,
80-
gpstime_ref_unix: float,
18+
def filter_deviation_day(
19+
pipeline: pdal.Pipeline,
20+
deviation_days: int | float = 14,
21+
gpstime_ref: str = "2011-09-14 01:46:40",
8122
) -> pdal.Pipeline:
82-
"""
83-
Filter a LiDAR point cloud, keeping only points acquired within
84-
[main_day - deviation_day, main_day + deviation_day], where *main_day*
85-
is the calendar day that contains the most points.
23+
"""Filter a LiDAR point cloud keeping only points acquired within ±deviation_days
24+
around the most densely sampled calendar day.
25+
26+
Equivalent to the R function ``filter_date_mode()`` from lidR.
8627
8728
Args:
88-
input_pipeline (pdal.Pipeline): Executed PDAL Pipeline object.
89-
deviation_day (int | float): Number of days around the main acquisition day.
90-
Pass ``math.inf`` to skip filtering entirely.
91-
gpstime_ref_unix (float): UNIX timestamp (seconds since 1970-01-01 UTC)
29+
pipeline (pdal.Pipeline): Executed PDAL Pipeline object.
30+
deviation_days (int | float): Half-width of the retention window in days.
31+
Pass ``math.inf`` to skip filtering entirely. Default: 14.
32+
gpstime_ref (str): ISO-8601 UTC string of the GPS time reference epoch.
33+
Default: "2011-09-14 01:46:40".
9234
9335
Returns:
94-
pdal.Pipeline: A new, configured-but-not-yet-executed PDAL Pipeline restricted to
95-
the selected time window.
36+
pdal.Pipeline: A new, configured-but-not-yet-executed PDAL Pipeline restricted
37+
to the selected time window, or the original pipeline unchanged if
38+
``deviation_days`` is infinite.
9639
9740
Raises:
98-
ValueError: If the pipeline produced no arrays, if the point cloud
99-
lacks a ``GpsTime`` dimension, or if ``deviation_day`` is negative.
41+
ValueError: If the pipeline has no arrays, lacks a ``GpsTime`` dimension,
42+
or if ``deviation_days`` is negative.
10043
"""
101-
if not math.isinf(deviation_day) and deviation_day < 0:
102-
raise ValueError(f"deviation_day must be >= 0 or math.inf, got {deviation_day!r}")
44+
if not math.isinf(deviation_days) and deviation_days < 0:
45+
raise ValueError(f"deviation_days must be >= 0 or math.inf, got {deviation_days!r}")
10346

104-
arrays = input_pipeline.arrays
47+
arrays = pipeline.arrays
10548
if not arrays:
10649
raise ValueError("No arrays produced by the pipeline.")
10750

@@ -110,19 +53,28 @@ def filter_points_by_date(
11053
if "GpsTime" not in points.dtype.names:
11154
raise ValueError("Point cloud does not contain a 'GpsTime' dimension.")
11255

113-
if math.isinf(deviation_day):
114-
logger.debug("deviation_day is Inf — no filtering applied.")
115-
return input_pipeline
56+
if math.isinf(deviation_days):
57+
logger.debug("deviation_days is Inf — no filtering applied.")
58+
return pipeline
11659

60+
gpstime_ref_unix = datetime.fromisoformat(gpstime_ref).replace(tzinfo=timezone.utc).timestamp()
11761
n_total = len(points)
11862

119-
day_index = gpstime_to_day_index(points["GpsTime"], gpstime_ref_unix)
120-
main_day_idx = modal_day(day_index)
121-
t_min, t_max = compute_gpstime_window(main_day_idx, deviation_day, gpstime_ref_unix)
122-
63+
# Convert GpsTime to absolute UNIX time and floor to calendar day
12364
unix_time = points["GpsTime"] + gpstime_ref_unix
124-
day_lo = main_day_idx - int(deviation_day)
125-
day_hi = main_day_idx + int(deviation_day)
65+
day_index = np.floor(unix_time / _SECONDS_PER_DAY).astype(np.int64)
66+
67+
# Find the modal day (most abundantly sampled)
68+
unique_days, counts = np.unique(day_index, return_counts=True)
69+
modal_day = int(unique_days[counts.argmax()])
70+
71+
# Compute the GpsTime filter window [t_min, t_max]
72+
day_lo = modal_day - int(deviation_days)
73+
day_hi = modal_day + int(deviation_days)
74+
t_min = max(day_lo, 0) * _SECONDS_PER_DAY - gpstime_ref_unix
75+
t_max = (day_hi + 1) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON
76+
77+
# Warn about removed points
12678
n_retained = int(
12779
np.sum((unix_time >= max(day_lo, 0) * _SECONDS_PER_DAY) & (unix_time < (day_hi + 1) * _SECONDS_PER_DAY))
12880
)
@@ -131,21 +83,18 @@ def filter_points_by_date(
13183
warnings.warn(
13284
f"Careful {round(pct_removed)} % of the returns were removed because they had a "
13385
f"deviation of days around the most abundant date greater than your threshold "
134-
f"({deviation_day} days).",
86+
f"({deviation_days} days).",
13587
UserWarning,
13688
stacklevel=2,
13789
)
13890

139-
unique_days = np.unique(day_index)
14091
logger.debug(
141-
"Main day index: %d | GpsTime window [%.1f, %.1f) | %d/%d days retained | %.1f %% points removed",
142-
main_day_idx,
92+
"Modal day: %d | GpsTime window [%.1f, %.1f] | %.1f%% points removed",
93+
modal_day,
14394
t_min,
14495
t_max,
145-
int(np.sum((unique_days >= day_lo) & (unique_days <= day_hi))),
146-
len(unique_days),
14796
pct_removed,
14897
)
14998

150-
filter_pipeline_json = {"pipeline": [{"type": "filters.range", "limits": f"GpsTime[{t_min}:{t_max}]"}]}
151-
return pdal.Pipeline(json.dumps(filter_pipeline_json), arrays=[points])
99+
filter_json = {"pipeline": [{"type": "filters.range", "limits": f"GpsTime[{t_min}:{t_max}]"}]}
100+
return pdal.Pipeline(json.dumps(filter_json), arrays=[points])

0 commit comments

Comments
 (0)