|
| 1 | +// Package stopcompare provides geometric comparison of GTFS feeds based on stop point clouds. |
| 2 | +package stopcompare |
| 3 | + |
| 4 | +import ( |
| 5 | + "math" |
| 6 | + "sort" |
| 7 | + |
| 8 | + "github.com/golang/geo/s2" |
| 9 | + "github.com/interline-io/transitland-lib/adapters" |
| 10 | + "github.com/interline-io/transitland-lib/tlxy" |
| 11 | + "github.com/tidwall/rtree" |
| 12 | +) |
| 13 | + |
| 14 | +// Relationship describes the spatial relationship between two feeds' stop sets. |
| 15 | +type Relationship int |
| 16 | + |
| 17 | +const ( |
| 18 | + RelationshipUnknown Relationship = iota |
| 19 | + RelationshipSame // A and B cover the same stops |
| 20 | + RelationshipSubset // A ⊆ B: A's stops are geographically contained in B |
| 21 | + RelationshipSuperset // A ⊇ B: B's stops are contained in A |
| 22 | + RelationshipOverlapping // Partial geographic overlap |
| 23 | + RelationshipDisjoint // No meaningful geographic overlap |
| 24 | +) |
| 25 | + |
| 26 | +func (r Relationship) MarshalJSON() ([]byte, error) { |
| 27 | + return []byte(`"` + r.String() + `"`), nil |
| 28 | +} |
| 29 | + |
| 30 | +func (r Relationship) String() string { |
| 31 | + switch r { |
| 32 | + case RelationshipSame: |
| 33 | + return "same" |
| 34 | + case RelationshipSubset: |
| 35 | + return "subset" |
| 36 | + case RelationshipSuperset: |
| 37 | + return "superset" |
| 38 | + case RelationshipOverlapping: |
| 39 | + return "overlapping" |
| 40 | + case RelationshipDisjoint: |
| 41 | + return "disjoint" |
| 42 | + default: |
| 43 | + return "unknown" |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +// Options controls the thresholds used for classification. |
| 48 | +type Options struct { |
| 49 | + // Normalized ANND at or below which stops are "well matched". Default: 0.02 |
| 50 | + ANNDRatioThreshold float64 |
| 51 | + // Bounding box IoU at or above which feeds cover the "same" region. Default: 0.75 |
| 52 | + BboxIoUThreshold float64 |
| 53 | + // Bounding box overlap coefficient at or above which one feed is a geographic subset. Default: 0.90 |
| 54 | + BboxOverlapThreshold float64 |
| 55 | + // If true, only consider stops with LocationType==0 (boarding stops). Default: false |
| 56 | + BoardingStopsOnly bool |
| 57 | +} |
| 58 | + |
| 59 | +// DefaultOptions returns Options with recommended defaults. |
| 60 | +func DefaultOptions() Options { |
| 61 | + return Options{ |
| 62 | + ANNDRatioThreshold: 0.02, |
| 63 | + BboxIoUThreshold: 0.75, |
| 64 | + BboxOverlapThreshold: 0.90, |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +// DirectionStats holds nearest-neighbor statistics from one feed to another. |
| 69 | +type DirectionStats struct { |
| 70 | + Direction string `json:"direction"` |
| 71 | + TotalStops int `json:"total_stops"` |
| 72 | + MeanDistMeters float64 `json:"mean_dist_meters"` |
| 73 | + MedianDistMeters float64 `json:"median_dist_meters"` |
| 74 | + P90DistMeters float64 `json:"p90_dist_meters"` |
| 75 | + MaxDistMeters float64 `json:"max_dist_meters"` |
| 76 | + // NormalizedANND is the mean nearest-neighbor distance divided by the bounding |
| 77 | + // box diagonal of the larger feed. ~0 means tight match; ~1 means far apart. |
| 78 | + NormalizedANND float64 `json:"normalized_annd"` |
| 79 | +} |
| 80 | + |
| 81 | +// BboxMetrics holds bounding-box overlap statistics. |
| 82 | +type BboxMetrics struct { |
| 83 | + IoU float64 `json:"iou"` |
| 84 | + OverlapCoefficient float64 `json:"overlap_coefficient"` |
| 85 | +} |
| 86 | + |
| 87 | +// Result is the full output of comparing two feeds. |
| 88 | +type Result struct { |
| 89 | + FeedA string `json:"feed_a"` |
| 90 | + FeedB string `json:"feed_b"` |
| 91 | + StopCountA int `json:"stop_count_a"` |
| 92 | + StopCountB int `json:"stop_count_b"` |
| 93 | + AtoB DirectionStats `json:"a_to_b"` |
| 94 | + BtoA DirectionStats `json:"b_to_a"` |
| 95 | + Bbox BboxMetrics `json:"bbox"` |
| 96 | + Relationship Relationship `json:"relationship"` |
| 97 | +} |
| 98 | + |
| 99 | +// stopEntry is stored in the R-tree. |
| 100 | +type stopEntry struct { |
| 101 | + pt tlxy.Point |
| 102 | +} |
| 103 | + |
| 104 | +// pointIndex is a spatial index over a set of stop points. |
| 105 | +type pointIndex struct { |
| 106 | + idx rtree.Generic[stopEntry] |
| 107 | + bbox s2.Rect |
| 108 | + count int |
| 109 | +} |
| 110 | + |
| 111 | +func newPointIndex(pts []tlxy.Point) *pointIndex { |
| 112 | + pi := &pointIndex{ |
| 113 | + bbox: s2.EmptyRect(), |
| 114 | + } |
| 115 | + for _, p := range pts { |
| 116 | + pi.insert(p) |
| 117 | + } |
| 118 | + return pi |
| 119 | +} |
| 120 | + |
| 121 | +func (pi *pointIndex) insert(p tlxy.Point) { |
| 122 | + xy := [2]float64{p.Lon, p.Lat} |
| 123 | + pi.idx.Insert(xy, xy, stopEntry{pt: p}) |
| 124 | + ll := s2.LatLngFromDegrees(p.Lat, p.Lon) |
| 125 | + pi.bbox = pi.bbox.AddPoint(ll) |
| 126 | + pi.count++ |
| 127 | +} |
| 128 | + |
| 129 | +// nearestDist returns the distance in meters from p to the nearest point in the index. |
| 130 | +func (pi *pointIndex) nearestDist(p tlxy.Point) float64 { |
| 131 | + // Start with a small search radius and expand until a candidate is found. |
| 132 | + approx := tlxy.NewApprox(p) |
| 133 | + radiusMeters := 1000.0 |
| 134 | + for i := 0; i < 20; i++ { |
| 135 | + dLon := radiusMeters / approx.LonMeters() |
| 136 | + dLat := radiusMeters / approx.LatMeters() |
| 137 | + minPt := [2]float64{p.Lon - dLon, p.Lat - dLat} |
| 138 | + maxPt := [2]float64{p.Lon + dLon, p.Lat + dLat} |
| 139 | + best := -1.0 |
| 140 | + pi.idx.Search(minPt, maxPt, func(min, max [2]float64, entry stopEntry) bool { |
| 141 | + d := tlxy.DistanceHaversine(p, entry.pt) |
| 142 | + if best < 0 || d < best { |
| 143 | + best = d |
| 144 | + } |
| 145 | + return true |
| 146 | + }) |
| 147 | + if best >= 0 && best <= radiusMeters { |
| 148 | + return best |
| 149 | + } |
| 150 | + radiusMeters *= 4 |
| 151 | + } |
| 152 | + return math.MaxFloat64 |
| 153 | +} |
| 154 | + |
| 155 | +// bboxDiagonalMeters returns the length of the bounding box diagonal in meters. |
| 156 | +func bboxDiagonalMeters(r s2.Rect) float64 { |
| 157 | + if r.IsEmpty() { |
| 158 | + return 0 |
| 159 | + } |
| 160 | + lo := r.Lo() |
| 161 | + hi := r.Hi() |
| 162 | + a := tlxy.Point{Lon: lo.Lng.Degrees(), Lat: lo.Lat.Degrees()} |
| 163 | + b := tlxy.Point{Lon: hi.Lng.Degrees(), Lat: hi.Lat.Degrees()} |
| 164 | + return tlxy.DistanceHaversine(a, b) |
| 165 | +} |
| 166 | + |
| 167 | +// computeBboxMetrics computes IoU and overlap coefficient for two bounding boxes. |
| 168 | +func computeBboxMetrics(rectA, rectB s2.Rect) BboxMetrics { |
| 169 | + if rectA.IsEmpty() || rectB.IsEmpty() { |
| 170 | + return BboxMetrics{} |
| 171 | + } |
| 172 | + intersect := rectA.Intersection(rectB) |
| 173 | + areaA := rectA.Area() |
| 174 | + areaB := rectB.Area() |
| 175 | + areaI := intersect.Area() |
| 176 | + areaUnion := areaA + areaB - areaI |
| 177 | + iou := 0.0 |
| 178 | + if areaUnion > 0 { |
| 179 | + iou = areaI / areaUnion |
| 180 | + } |
| 181 | + minArea := math.Min(areaA, areaB) |
| 182 | + overlap := 0.0 |
| 183 | + if minArea > 0 { |
| 184 | + overlap = areaI / minArea |
| 185 | + } |
| 186 | + return BboxMetrics{ |
| 187 | + IoU: iou, |
| 188 | + OverlapCoefficient: overlap, |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +// computeDirectionStats computes nearest-neighbor statistics from each point in |
| 193 | +// srcs to the nearest point in the dstIdx spatial index. |
| 194 | +func computeDirectionStats(direction string, srcs []tlxy.Point, dstIdx *pointIndex, largerDiagonal float64) DirectionStats { |
| 195 | + if len(srcs) == 0 { |
| 196 | + return DirectionStats{Direction: direction} |
| 197 | + } |
| 198 | + dists := make([]float64, 0, len(srcs)) |
| 199 | + for _, p := range srcs { |
| 200 | + d := dstIdx.nearestDist(p) |
| 201 | + if d < math.MaxFloat64 { |
| 202 | + dists = append(dists, d) |
| 203 | + } |
| 204 | + } |
| 205 | + if len(dists) == 0 { |
| 206 | + return DirectionStats{Direction: direction, TotalStops: len(srcs)} |
| 207 | + } |
| 208 | + sort.Float64s(dists) |
| 209 | + sum := 0.0 |
| 210 | + for _, d := range dists { |
| 211 | + sum += d |
| 212 | + } |
| 213 | + mean := sum / float64(len(dists)) |
| 214 | + median := dists[len(dists)/2] |
| 215 | + p90 := dists[int(math.Min(float64(len(dists)-1), math.Ceil(float64(len(dists))*0.90)))] |
| 216 | + maxD := dists[len(dists)-1] |
| 217 | + normalizedANND := 0.0 |
| 218 | + if largerDiagonal > 0 { |
| 219 | + normalizedANND = mean / largerDiagonal |
| 220 | + } |
| 221 | + return DirectionStats{ |
| 222 | + Direction: direction, |
| 223 | + TotalStops: len(srcs), |
| 224 | + MeanDistMeters: mean, |
| 225 | + MedianDistMeters: median, |
| 226 | + P90DistMeters: p90, |
| 227 | + MaxDistMeters: maxD, |
| 228 | + NormalizedANND: normalizedANND, |
| 229 | + } |
| 230 | +} |
| 231 | + |
| 232 | +// classifyRelationship determines the relationship from the computed metrics. |
| 233 | +func classifyRelationship(result *Result, opts Options) Relationship { |
| 234 | + atob := result.AtoB.NormalizedANND |
| 235 | + btoa := result.BtoA.NormalizedANND |
| 236 | + iou := result.Bbox.IoU |
| 237 | + overlap := result.Bbox.OverlapCoefficient |
| 238 | + |
| 239 | + aWellMatched := atob <= opts.ANNDRatioThreshold |
| 240 | + bWellMatched := btoa <= opts.ANNDRatioThreshold |
| 241 | + |
| 242 | + if aWellMatched && bWellMatched && iou >= opts.BboxIoUThreshold { |
| 243 | + return RelationshipSame |
| 244 | + } |
| 245 | + if aWellMatched && !bWellMatched && overlap >= opts.BboxOverlapThreshold && result.StopCountA < result.StopCountB { |
| 246 | + return RelationshipSubset |
| 247 | + } |
| 248 | + if bWellMatched && !aWellMatched && overlap >= opts.BboxOverlapThreshold && result.StopCountA > result.StopCountB { |
| 249 | + return RelationshipSuperset |
| 250 | + } |
| 251 | + if iou > 0.10 { |
| 252 | + return RelationshipOverlapping |
| 253 | + } |
| 254 | + return RelationshipDisjoint |
| 255 | +} |
| 256 | + |
| 257 | +// collectStops reads stops from a reader, filtering null-island and (optionally) non-boarding stops. |
| 258 | +func collectStops(reader adapters.Reader, boardingOnly bool) ([]tlxy.Point, error) { |
| 259 | + var pts []tlxy.Point |
| 260 | + for stop := range reader.Stops() { |
| 261 | + if boardingOnly && stop.LocationType.Val != 0 { |
| 262 | + continue |
| 263 | + } |
| 264 | + p := stop.ToPoint() |
| 265 | + if p.Lon == 0 && p.Lat == 0 { |
| 266 | + continue |
| 267 | + } |
| 268 | + pts = append(pts, p) |
| 269 | + } |
| 270 | + return pts, nil |
| 271 | +} |
| 272 | + |
| 273 | +// CompareReaders computes geometric comparison metrics between two open readers. |
| 274 | +func CompareReaders(nameA string, readerA adapters.Reader, nameB string, readerB adapters.Reader, opts Options) (*Result, error) { |
| 275 | + stopsA, err := collectStops(readerA, opts.BoardingStopsOnly) |
| 276 | + if err != nil { |
| 277 | + return nil, err |
| 278 | + } |
| 279 | + stopsB, err := collectStops(readerB, opts.BoardingStopsOnly) |
| 280 | + if err != nil { |
| 281 | + return nil, err |
| 282 | + } |
| 283 | + |
| 284 | + idxA := newPointIndex(stopsA) |
| 285 | + idxB := newPointIndex(stopsB) |
| 286 | + |
| 287 | + bboxMetrics := computeBboxMetrics(idxA.bbox, idxB.bbox) |
| 288 | + |
| 289 | + diagA := bboxDiagonalMeters(idxA.bbox) |
| 290 | + diagB := bboxDiagonalMeters(idxB.bbox) |
| 291 | + largerDiag := math.Max(diagA, diagB) |
| 292 | + |
| 293 | + atob := computeDirectionStats(nameA+"→"+nameB, stopsA, idxB, largerDiag) |
| 294 | + btoa := computeDirectionStats(nameB+"→"+nameA, stopsB, idxA, largerDiag) |
| 295 | + |
| 296 | + result := &Result{ |
| 297 | + FeedA: nameA, |
| 298 | + FeedB: nameB, |
| 299 | + StopCountA: len(stopsA), |
| 300 | + StopCountB: len(stopsB), |
| 301 | + AtoB: atob, |
| 302 | + BtoA: btoa, |
| 303 | + Bbox: bboxMetrics, |
| 304 | + } |
| 305 | + result.Relationship = classifyRelationship(result, opts) |
| 306 | + return result, nil |
| 307 | +} |
0 commit comments