-
Notifications
You must be signed in to change notification settings - Fork 16
Open
Description
I was comparing how sf::st_distance handled NAs in coordinates depending on the underlying distance method used when I found a strange result from {s2}.
First, the comparison for context:
library(sf)
#> Linking to GEOS 3.13.1, GDAL 3.11.3, PROJ 9.6.0; sf_use_s2() is TRUE
# if st_point is used, and either coordinate columns are NA, returns POINT EMPTY
p1 <- st_point(c(1, 1))
p2 <- st_point(c(2, NA_real_))
p3 <- st_point(c(NA_real_, NA_real_))
print(p1)
#> POINT (1 1)
print(p2)
#> POINT EMPTY
print(p3)
#> POINT EMPTY
# But despite returning POINT EMPTY, st_coordinates shows there are still coords
st_coordinates(p1)
#> X Y
#> [1,] 1 1
st_coordinates(p2)
#> X Y
#> [1,] 2 NA
st_coordinates(p3)
#> X Y
#> [1,] NA NA
# Depending on the method, NAs are variably returned for just the point with NAs for X and Y
# or also the point with only NA for Y
st_distance(st_sfc(p1, p2, p3), which = 'Hausdorff')
#> [,1] [,2] [,3]
#> [1,] 0 NA NA
#> [2,] NA NA NA
#> [3,] NA NA NA
st_distance(st_sfc(p1, p2, p3), which = 'Euclidean')
#> 1 2 3
#> 1 0.000000 1.414214 NA
#> 2 1.414214 0.000000 NA
#> 3 NA NA 0
st_distance(st_sfc(p1, p2, p3), which = 'Frechet')
#> [,1] [,2] [,3]
#> [1,] 0 NA NA
#> [2,] NA NA NA
#> [3,] NA NA NA
sf_use_s2(FALSE)
#> Spherical geometry (s2) switched off
st_distance(st_sfc(p1, p2, p3, crs = 4326))
#> Units: [m]
#> [,1] [,2] [,3]
#> [1,] 0 NA NA
#> [2,] NA NA NA
#> [3,] NA NA NA
sf_use_s2(TRUE)
#> Spherical geometry (s2) switched on
st_distance(st_sfc(p1, p2, p3, crs = 4326))
#> Units: [m]
#> [,1] [,2] [,3]
#> [1,] 0 20015118 NA
#> [2,] 20015118 20015118 NA
#> [3,] NA NA NAFocusing on {s2}, we see s2::as_s2_geography returns: POINT (1 1), POINT (nan nan) and POINT EMPTY
library(sf)
#> Linking to GEOS 3.13.1, GDAL 3.11.3, PROJ 9.6.0; sf_use_s2() is TRUE
library(s2)
p1 <- st_point(c(1, 1))
p2 <- st_point(c(2, NA_real_))
p3 <- st_point(c(NA_real_, NA_real_))
# Results for s2_distance_matrix
s2_distance_matrix(st_sfc(p1, p2, p3), st_sfc(p1, p2, p3))
#> [,1] [,2] [,3]
#> [1,] 0 20015118 NA
#> [2,] 20015118 20015118 NA
#> [3,] NA NA NA
# And same results for s2_distance
s2_distance(p1, p1)
#> [1] 0
s2_distance(p1, p2)
#> [1] 20015118
s2_distance(p1, p3)
#> [1] NA
s2_distance(p2, p2)
#> [1] 20015118
s2_distance(p2, p3)
#> [1] NA
# s2_distance_* use as_s2_geography to setup x, y
# we see a mix of valid points, POINT (nan nan), and POINT EMPTY
as_s2_geography(p1)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT (1 1)
as_s2_geography(p2)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT (nan nan)
as_s2_geography(p3)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT EMPTY
# And results are the same when we work directly in characters (passed to as_s2_geography)
p1 <- 'POINT (1 1)'
p2 <- 'POINT (2 nan)'
p3 <- 'POINT (nan nan)'
s2_distance_matrix(c(p1, p2, p3), c(p1, p2, p3))
#> [,1] [,2] [,3]
#> [1,] 0 20015118 NA
#> [2,] 20015118 20015118 NA
#> [3,] NA NA NAThe distance being returned in the case of a POINT with nan is equal to pi multiplied by s2::s2_earth_radius_meters().
library(s2)
identical(s2_distance('POINT (1 1)', 'POINT (1 nan)'), pi * s2_earth_radius_meters())
[1] TRUE
identical(s2_distance('POINT (9999 9999)', 'POINT (1 nan)'), pi * s2_earth_radius_meters())
[1] TRUEMy expectation as a user is that NA would be returned and not 20015118. I'm not sure where this value is coming from.
I have added tests for NAs being returned instead to match the other tests in test-s2-accessors (#290)
Let me know what you think, thanks!
Metadata
Metadata
Assignees
Labels
No labels