1- """
2- Keep points within a ±deviation_day window around the most densely sampled acquisition day.
3- """
1+ """Keep points within a ±deviation_days window around the most densely sampled acquisition day."""
2+
43import json
5- import logging
6- import math
7- import warnings
4+ import logging
5+ import math
6+ import warnings
7+ from datetime import datetime , timezone
88
99import numpy as np
1010import pdal
1515_EPSILON = 1e-3 # 1 ms — smaller than any realistic GpsTime resolution
1616
1717
18- def gpstime_to_day_index (gpstime : np .ndarray , gpstime_ref_unix : float ) -> np .ndarray :
19- """Convert a GpsTime array (relative seconds) to integer UTC day indices.
20-
21- Each value is floored to midnight UTC, expressed as *days since the
22- UNIX epoch* (1970-01-01).
23-
24- Args:
25- gpstime (np.ndarray): 1-D array of relative GPS timestamps (seconds).
26- gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
27-
28- Returns:
29- np.ndarray: Integer array of shape ``(n,)`` with UNIX day indices.
30-
31- """
32- unix_time = gpstime + gpstime_ref_unix
33- return np .floor (unix_time / _SECONDS_PER_DAY ).astype (np .int64 )
34-
35-
36- def modal_day (day_index : np .ndarray ) -> int :
37- """Return the UNIX day index of the most densely sampled calendar day.
38-
39- Args:
40- day_index (np.ndarray): Integer array of UNIX day indices (one per point).
41-
42- Returns:
43- int: UNIX day index of the mode.
44-
45- """
46- unique_days , counts = np .unique (day_index , return_counts = True )
47- return int (unique_days [counts .argmax ()])
48-
49-
50- def compute_gpstime_window (
51- main_day : int ,
52- deviation_day : int | float ,
53- gpstime_ref_unix : float ,
54- ) -> tuple [float , float ]:
55- """Compute the GpsTime [t_min, t_max] filter window.
56-
57- The window covers all calendar days in
58- ``[main_day - deviation_day, main_day + deviation_day]``.
59- ``t_max`` is nudged inward by ``_EPSILON`` so that a point landing
60- exactly on midnight of ``day_hi + 1`` is excluded.
61-
62- Args:
63- main_day (int): UNIX day index of the modal acquisition day.
64- deviation_day (int | float): Half-width of the window in days.
65- gpstime_ref_unix (float): UNIX timestamp of the GPS reference epoch.
66-
67- Returns:
68- tuple[float, float]: ``(t_min, t_max)`` in GpsTime space
69- """
70- day_lo = main_day - int (deviation_day )
71- day_hi = main_day + int (deviation_day )
72- t_min = max (day_lo , 0 ) * _SECONDS_PER_DAY - gpstime_ref_unix
73- t_max = (day_hi + 1 ) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON
74- return t_min , t_max
75-
76-
77- def filter_points_by_date (
78- input_pipeline : pdal .Pipeline ,
79- deviation_day : int | float ,
80- gpstime_ref_unix : float ,
18+ def filter_deviation_day (
19+ pipeline : pdal .Pipeline ,
20+ deviation_days : int | float = 14 ,
21+ gpstime_ref : str = "2011-09-14 01:46:40" ,
8122) -> pdal .Pipeline :
82- """
83- Filter a LiDAR point cloud, keeping only points acquired within
84- [main_day - deviation_day, main_day + deviation_day], where *main_day*
85- is the calendar day that contains the most points .
23+ """Filter a LiDAR point cloud keeping only points acquired within ±deviation_days
24+ around the most densely sampled calendar day.
25+
26+ Equivalent to the R function ``filter_date_mode()`` from lidR .
8627
8728 Args:
88- input_pipeline (pdal.Pipeline): Executed PDAL Pipeline object.
89- deviation_day (int | float): Number of days around the main acquisition day.
90- Pass ``math.inf`` to skip filtering entirely.
91- gpstime_ref_unix (float): UNIX timestamp (seconds since 1970-01-01 UTC)
29+ pipeline (pdal.Pipeline): Executed PDAL Pipeline object.
30+ deviation_days (int | float): Half-width of the retention window in days.
31+ Pass ``math.inf`` to skip filtering entirely. Default: 14.
32+ gpstime_ref (str): ISO-8601 UTC string of the GPS time reference epoch.
33+ Default: "2011-09-14 01:46:40".
9234
9335 Returns:
94- pdal.Pipeline: A new, configured-but-not-yet-executed PDAL Pipeline restricted to
95- the selected time window.
36+ pdal.Pipeline: A new, configured-but-not-yet-executed PDAL Pipeline restricted
37+ to the selected time window, or the original pipeline unchanged if
38+ ``deviation_days`` is infinite.
9639
9740 Raises:
98- ValueError: If the pipeline produced no arrays, if the point cloud
99- lacks a ``GpsTime`` dimension, or if ``deviation_day `` is negative.
41+ ValueError: If the pipeline has no arrays, lacks a ``GpsTime`` dimension,
42+ or if ``deviation_days `` is negative.
10043 """
101- if not math .isinf (deviation_day ) and deviation_day < 0 :
102- raise ValueError (f"deviation_day must be >= 0 or math.inf, got { deviation_day !r} " )
44+ if not math .isinf (deviation_days ) and deviation_days < 0 :
45+ raise ValueError (f"deviation_days must be >= 0 or math.inf, got { deviation_days !r} " )
10346
104- arrays = input_pipeline .arrays
47+ arrays = pipeline .arrays
10548 if not arrays :
10649 raise ValueError ("No arrays produced by the pipeline." )
10750
@@ -110,19 +53,28 @@ def filter_points_by_date(
11053 if "GpsTime" not in points .dtype .names :
11154 raise ValueError ("Point cloud does not contain a 'GpsTime' dimension." )
11255
113- if math .isinf (deviation_day ):
114- logger .debug ("deviation_day is Inf — no filtering applied." )
115- return input_pipeline
56+ if math .isinf (deviation_days ):
57+ logger .debug ("deviation_days is Inf — no filtering applied." )
58+ return pipeline
11659
60+ gpstime_ref_unix = datetime .fromisoformat (gpstime_ref ).replace (tzinfo = timezone .utc ).timestamp ()
11761 n_total = len (points )
11862
119- day_index = gpstime_to_day_index (points ["GpsTime" ], gpstime_ref_unix )
120- main_day_idx = modal_day (day_index )
121- t_min , t_max = compute_gpstime_window (main_day_idx , deviation_day , gpstime_ref_unix )
122-
63+ # Convert GpsTime to absolute UNIX time and floor to calendar day
12364 unix_time = points ["GpsTime" ] + gpstime_ref_unix
124- day_lo = main_day_idx - int (deviation_day )
125- day_hi = main_day_idx + int (deviation_day )
65+ day_index = np .floor (unix_time / _SECONDS_PER_DAY ).astype (np .int64 )
66+
67+ # Find the modal day (most abundantly sampled)
68+ unique_days , counts = np .unique (day_index , return_counts = True )
69+ modal_day = int (unique_days [counts .argmax ()])
70+
71+ # Compute the GpsTime filter window [t_min, t_max]
72+ day_lo = modal_day - int (deviation_days )
73+ day_hi = modal_day + int (deviation_days )
74+ t_min = max (day_lo , 0 ) * _SECONDS_PER_DAY - gpstime_ref_unix
75+ t_max = (day_hi + 1 ) * _SECONDS_PER_DAY - gpstime_ref_unix - _EPSILON
76+
77+ # Warn about removed points
12678 n_retained = int (
12779 np .sum ((unix_time >= max (day_lo , 0 ) * _SECONDS_PER_DAY ) & (unix_time < (day_hi + 1 ) * _SECONDS_PER_DAY ))
12880 )
@@ -131,21 +83,18 @@ def filter_points_by_date(
13183 warnings .warn (
13284 f"Careful { round (pct_removed )} % of the returns were removed because they had a "
13385 f"deviation of days around the most abundant date greater than your threshold "
134- f"({ deviation_day } days)." ,
86+ f"({ deviation_days } days)." ,
13587 UserWarning ,
13688 stacklevel = 2 ,
13789 )
13890
139- unique_days = np .unique (day_index )
14091 logger .debug (
141- "Main day index : %d | GpsTime window [%.1f, %.1f) | %d/%d days retained | % .1f %% points removed" ,
142- main_day_idx ,
92+ "Modal day: %d | GpsTime window [%.1f, %.1f] | %.1f%% points removed" ,
93+ modal_day ,
14394 t_min ,
14495 t_max ,
145- int (np .sum ((unique_days >= day_lo ) & (unique_days <= day_hi ))),
146- len (unique_days ),
14796 pct_removed ,
14897 )
14998
150- filter_pipeline_json = {"pipeline" : [{"type" : "filters.range" , "limits" : f"GpsTime[{ t_min } :{ t_max } ]" }]}
151- return pdal .Pipeline (json .dumps (filter_pipeline_json ), arrays = [points ])
99+ filter_json = {"pipeline" : [{"type" : "filters.range" , "limits" : f"GpsTime[{ t_min } :{ t_max } ]" }]}
100+ return pdal .Pipeline (json .dumps (filter_json ), arrays = [points ])
0 commit comments