Skip to content

Commit 2158a23

Browse files
committed
stop compare
1 parent 053098f commit 2158a23

4 files changed

Lines changed: 648 additions & 0 deletions

File tree

cmd/transitland/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
tl "github.com/interline-io/transitland-lib"
1010
"github.com/interline-io/transitland-lib/cmds"
1111
"github.com/interline-io/transitland-lib/diff"
12+
"github.com/interline-io/transitland-lib/stopcompare"
1213
"github.com/interline-io/transitland-lib/tlcli"
1314
"github.com/interline-io/transitland-lib/tlxy"
1415

@@ -55,6 +56,7 @@ func init() {
5556
tlcli.CobraHelper(&cmds.ValidatorCommand{}, pc, "validate"),
5657
tlcli.CobraHelper(&cmds.RTConvertCommand{}, pc, "rt-convert"),
5758
tlcli.CobraHelper(&diff.Command{}, pc, "diff"),
59+
tlcli.CobraHelper(&stopcompare.Command{}, pc, "stop-compare"),
5860
tlcli.CobraHelper(&tlxy.PolylinesCommand{}, pc, "polylines-create"),
5961
tlcli.CobraHelper(&cmds.ServerCommand{}, pc, "server"),
6062
tlcli.CobraHelper(&versionCommand{}, pc, "version"),

stopcompare/stopcompare.go

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
// Package stopcompare provides geometric comparison of GTFS feeds based on stop point clouds.
2+
package stopcompare
3+
4+
import (
5+
"math"
6+
"sort"
7+
8+
"github.com/golang/geo/s2"
9+
"github.com/interline-io/transitland-lib/adapters"
10+
"github.com/interline-io/transitland-lib/tlxy"
11+
"github.com/tidwall/rtree"
12+
)
13+
14+
// Relationship describes the spatial relationship between two feeds' stop sets.
15+
type Relationship int
16+
17+
const (
18+
RelationshipUnknown Relationship = iota
19+
RelationshipSame // A and B cover the same stops
20+
RelationshipSubset // A ⊆ B: A's stops are geographically contained in B
21+
RelationshipSuperset // A ⊇ B: B's stops are contained in A
22+
RelationshipOverlapping // Partial geographic overlap
23+
RelationshipDisjoint // No meaningful geographic overlap
24+
)
25+
26+
func (r Relationship) MarshalJSON() ([]byte, error) {
27+
return []byte(`"` + r.String() + `"`), nil
28+
}
29+
30+
func (r Relationship) String() string {
31+
switch r {
32+
case RelationshipSame:
33+
return "same"
34+
case RelationshipSubset:
35+
return "subset"
36+
case RelationshipSuperset:
37+
return "superset"
38+
case RelationshipOverlapping:
39+
return "overlapping"
40+
case RelationshipDisjoint:
41+
return "disjoint"
42+
default:
43+
return "unknown"
44+
}
45+
}
46+
47+
// Options controls the thresholds used for classification.
48+
type Options struct {
49+
// Normalized ANND at or below which stops are "well matched". Default: 0.02
50+
ANNDRatioThreshold float64
51+
// Bounding box IoU at or above which feeds cover the "same" region. Default: 0.75
52+
BboxIoUThreshold float64
53+
// Bounding box overlap coefficient at or above which one feed is a geographic subset. Default: 0.90
54+
BboxOverlapThreshold float64
55+
// If true, only consider stops with LocationType==0 (boarding stops). Default: false
56+
BoardingStopsOnly bool
57+
}
58+
59+
// DefaultOptions returns Options with recommended defaults.
60+
func DefaultOptions() Options {
61+
return Options{
62+
ANNDRatioThreshold: 0.02,
63+
BboxIoUThreshold: 0.75,
64+
BboxOverlapThreshold: 0.90,
65+
}
66+
}
67+
68+
// DirectionStats holds nearest-neighbor statistics from one feed to another.
69+
type DirectionStats struct {
70+
Direction string `json:"direction"`
71+
TotalStops int `json:"total_stops"`
72+
MeanDistMeters float64 `json:"mean_dist_meters"`
73+
MedianDistMeters float64 `json:"median_dist_meters"`
74+
P90DistMeters float64 `json:"p90_dist_meters"`
75+
MaxDistMeters float64 `json:"max_dist_meters"`
76+
// NormalizedANND is the mean nearest-neighbor distance divided by the bounding
77+
// box diagonal of the larger feed. ~0 means tight match; ~1 means far apart.
78+
NormalizedANND float64 `json:"normalized_annd"`
79+
}
80+
81+
// BboxMetrics holds bounding-box overlap statistics.
82+
type BboxMetrics struct {
83+
IoU float64 `json:"iou"`
84+
OverlapCoefficient float64 `json:"overlap_coefficient"`
85+
}
86+
87+
// Result is the full output of comparing two feeds.
88+
type Result struct {
89+
FeedA string `json:"feed_a"`
90+
FeedB string `json:"feed_b"`
91+
StopCountA int `json:"stop_count_a"`
92+
StopCountB int `json:"stop_count_b"`
93+
AtoB DirectionStats `json:"a_to_b"`
94+
BtoA DirectionStats `json:"b_to_a"`
95+
Bbox BboxMetrics `json:"bbox"`
96+
Relationship Relationship `json:"relationship"`
97+
}
98+
99+
// stopEntry is stored in the R-tree.
100+
type stopEntry struct {
101+
pt tlxy.Point
102+
}
103+
104+
// pointIndex is a spatial index over a set of stop points.
105+
type pointIndex struct {
106+
idx rtree.Generic[stopEntry]
107+
bbox s2.Rect
108+
count int
109+
}
110+
111+
func newPointIndex(pts []tlxy.Point) *pointIndex {
112+
pi := &pointIndex{
113+
bbox: s2.EmptyRect(),
114+
}
115+
for _, p := range pts {
116+
pi.insert(p)
117+
}
118+
return pi
119+
}
120+
121+
func (pi *pointIndex) insert(p tlxy.Point) {
122+
xy := [2]float64{p.Lon, p.Lat}
123+
pi.idx.Insert(xy, xy, stopEntry{pt: p})
124+
ll := s2.LatLngFromDegrees(p.Lat, p.Lon)
125+
pi.bbox = pi.bbox.AddPoint(ll)
126+
pi.count++
127+
}
128+
129+
// nearestDist returns the distance in meters from p to the nearest point in the index.
130+
func (pi *pointIndex) nearestDist(p tlxy.Point) float64 {
131+
// Start with a small search radius and expand until a candidate is found.
132+
approx := tlxy.NewApprox(p)
133+
radiusMeters := 1000.0
134+
for i := 0; i < 20; i++ {
135+
dLon := radiusMeters / approx.LonMeters()
136+
dLat := radiusMeters / approx.LatMeters()
137+
minPt := [2]float64{p.Lon - dLon, p.Lat - dLat}
138+
maxPt := [2]float64{p.Lon + dLon, p.Lat + dLat}
139+
best := -1.0
140+
pi.idx.Search(minPt, maxPt, func(min, max [2]float64, entry stopEntry) bool {
141+
d := tlxy.DistanceHaversine(p, entry.pt)
142+
if best < 0 || d < best {
143+
best = d
144+
}
145+
return true
146+
})
147+
if best >= 0 && best <= radiusMeters {
148+
return best
149+
}
150+
radiusMeters *= 4
151+
}
152+
return math.MaxFloat64
153+
}
154+
155+
// bboxDiagonalMeters returns the length of the bounding box diagonal in meters.
156+
func bboxDiagonalMeters(r s2.Rect) float64 {
157+
if r.IsEmpty() {
158+
return 0
159+
}
160+
lo := r.Lo()
161+
hi := r.Hi()
162+
a := tlxy.Point{Lon: lo.Lng.Degrees(), Lat: lo.Lat.Degrees()}
163+
b := tlxy.Point{Lon: hi.Lng.Degrees(), Lat: hi.Lat.Degrees()}
164+
return tlxy.DistanceHaversine(a, b)
165+
}
166+
167+
// computeBboxMetrics computes IoU and overlap coefficient for two bounding boxes.
168+
func computeBboxMetrics(rectA, rectB s2.Rect) BboxMetrics {
169+
if rectA.IsEmpty() || rectB.IsEmpty() {
170+
return BboxMetrics{}
171+
}
172+
intersect := rectA.Intersection(rectB)
173+
areaA := rectA.Area()
174+
areaB := rectB.Area()
175+
areaI := intersect.Area()
176+
areaUnion := areaA + areaB - areaI
177+
iou := 0.0
178+
if areaUnion > 0 {
179+
iou = areaI / areaUnion
180+
}
181+
minArea := math.Min(areaA, areaB)
182+
overlap := 0.0
183+
if minArea > 0 {
184+
overlap = areaI / minArea
185+
}
186+
return BboxMetrics{
187+
IoU: iou,
188+
OverlapCoefficient: overlap,
189+
}
190+
}
191+
192+
// computeDirectionStats computes nearest-neighbor statistics from each point in
193+
// srcs to the nearest point in the dstIdx spatial index.
194+
func computeDirectionStats(direction string, srcs []tlxy.Point, dstIdx *pointIndex, largerDiagonal float64) DirectionStats {
195+
if len(srcs) == 0 {
196+
return DirectionStats{Direction: direction}
197+
}
198+
dists := make([]float64, 0, len(srcs))
199+
for _, p := range srcs {
200+
d := dstIdx.nearestDist(p)
201+
if d < math.MaxFloat64 {
202+
dists = append(dists, d)
203+
}
204+
}
205+
if len(dists) == 0 {
206+
return DirectionStats{Direction: direction, TotalStops: len(srcs)}
207+
}
208+
sort.Float64s(dists)
209+
sum := 0.0
210+
for _, d := range dists {
211+
sum += d
212+
}
213+
mean := sum / float64(len(dists))
214+
median := dists[len(dists)/2]
215+
p90 := dists[int(math.Min(float64(len(dists)-1), math.Ceil(float64(len(dists))*0.90)))]
216+
maxD := dists[len(dists)-1]
217+
normalizedANND := 0.0
218+
if largerDiagonal > 0 {
219+
normalizedANND = mean / largerDiagonal
220+
}
221+
return DirectionStats{
222+
Direction: direction,
223+
TotalStops: len(srcs),
224+
MeanDistMeters: mean,
225+
MedianDistMeters: median,
226+
P90DistMeters: p90,
227+
MaxDistMeters: maxD,
228+
NormalizedANND: normalizedANND,
229+
}
230+
}
231+
232+
// classifyRelationship determines the relationship from the computed metrics.
233+
func classifyRelationship(result *Result, opts Options) Relationship {
234+
atob := result.AtoB.NormalizedANND
235+
btoa := result.BtoA.NormalizedANND
236+
iou := result.Bbox.IoU
237+
overlap := result.Bbox.OverlapCoefficient
238+
239+
aWellMatched := atob <= opts.ANNDRatioThreshold
240+
bWellMatched := btoa <= opts.ANNDRatioThreshold
241+
242+
if aWellMatched && bWellMatched && iou >= opts.BboxIoUThreshold {
243+
return RelationshipSame
244+
}
245+
if aWellMatched && !bWellMatched && overlap >= opts.BboxOverlapThreshold && result.StopCountA < result.StopCountB {
246+
return RelationshipSubset
247+
}
248+
if bWellMatched && !aWellMatched && overlap >= opts.BboxOverlapThreshold && result.StopCountA > result.StopCountB {
249+
return RelationshipSuperset
250+
}
251+
if iou > 0.10 {
252+
return RelationshipOverlapping
253+
}
254+
return RelationshipDisjoint
255+
}
256+
257+
// collectStops reads stops from a reader, filtering null-island and (optionally) non-boarding stops.
258+
func collectStops(reader adapters.Reader, boardingOnly bool) ([]tlxy.Point, error) {
259+
var pts []tlxy.Point
260+
for stop := range reader.Stops() {
261+
if boardingOnly && stop.LocationType.Val != 0 {
262+
continue
263+
}
264+
p := stop.ToPoint()
265+
if p.Lon == 0 && p.Lat == 0 {
266+
continue
267+
}
268+
pts = append(pts, p)
269+
}
270+
return pts, nil
271+
}
272+
273+
// CompareReaders computes geometric comparison metrics between two open readers.
274+
func CompareReaders(nameA string, readerA adapters.Reader, nameB string, readerB adapters.Reader, opts Options) (*Result, error) {
275+
stopsA, err := collectStops(readerA, opts.BoardingStopsOnly)
276+
if err != nil {
277+
return nil, err
278+
}
279+
stopsB, err := collectStops(readerB, opts.BoardingStopsOnly)
280+
if err != nil {
281+
return nil, err
282+
}
283+
284+
idxA := newPointIndex(stopsA)
285+
idxB := newPointIndex(stopsB)
286+
287+
bboxMetrics := computeBboxMetrics(idxA.bbox, idxB.bbox)
288+
289+
diagA := bboxDiagonalMeters(idxA.bbox)
290+
diagB := bboxDiagonalMeters(idxB.bbox)
291+
largerDiag := math.Max(diagA, diagB)
292+
293+
atob := computeDirectionStats(nameA+"→"+nameB, stopsA, idxB, largerDiag)
294+
btoa := computeDirectionStats(nameB+"→"+nameA, stopsB, idxA, largerDiag)
295+
296+
result := &Result{
297+
FeedA: nameA,
298+
FeedB: nameB,
299+
StopCountA: len(stopsA),
300+
StopCountB: len(stopsB),
301+
AtoB: atob,
302+
BtoA: btoa,
303+
Bbox: bboxMetrics,
304+
}
305+
result.Relationship = classifyRelationship(result, opts)
306+
return result, nil
307+
}

0 commit comments

Comments
 (0)