Skip to content

Calculation of distance with NA in X and/or Y coordinates #289

@robitalec

Description

@robitalec

I was comparing how sf::st_distance handled NAs in coordinates depending on the underlying distance method used when I found a strange result from {s2}.

First, the comparison for context:

library(sf)
#> Linking to GEOS 3.13.1, GDAL 3.11.3, PROJ 9.6.0; sf_use_s2() is TRUE

# if st_point is used, and either coordinate columns are NA, returns POINT EMPTY
p1 <- st_point(c(1, 1))
p2 <- st_point(c(2, NA_real_))
p3 <- st_point(c(NA_real_, NA_real_))
print(p1)
#> POINT (1 1)
print(p2)
#> POINT EMPTY
print(p3)
#> POINT EMPTY

# But despite returning POINT EMPTY, st_coordinates shows there are still coords
st_coordinates(p1)
#>      X Y
#> [1,] 1 1
st_coordinates(p2)
#>      X  Y
#> [1,] 2 NA
st_coordinates(p3)
#>       X  Y
#> [1,] NA NA

# Depending on the method, NAs are variably returned for just the point with NAs for X and Y 
#  or also the point with only NA for Y
st_distance(st_sfc(p1, p2, p3), which = 'Hausdorff')
#>      [,1] [,2] [,3]
#> [1,]    0   NA   NA
#> [2,]   NA   NA   NA
#> [3,]   NA   NA   NA
st_distance(st_sfc(p1, p2, p3), which = 'Euclidean')
#>          1        2  3
#> 1 0.000000 1.414214 NA
#> 2 1.414214 0.000000 NA
#> 3       NA       NA  0
st_distance(st_sfc(p1, p2, p3), which = 'Frechet')
#>      [,1] [,2] [,3]
#> [1,]    0   NA   NA
#> [2,]   NA   NA   NA
#> [3,]   NA   NA   NA
sf_use_s2(FALSE)
#> Spherical geometry (s2) switched off
st_distance(st_sfc(p1, p2, p3, crs = 4326))
#> Units: [m]
#>      [,1] [,2] [,3]
#> [1,]    0   NA   NA
#> [2,]   NA   NA   NA
#> [3,]   NA   NA   NA
sf_use_s2(TRUE)
#> Spherical geometry (s2) switched on
st_distance(st_sfc(p1, p2, p3, crs = 4326))
#> Units: [m]
#>          [,1]     [,2] [,3]
#> [1,]        0 20015118   NA
#> [2,] 20015118 20015118   NA
#> [3,]       NA       NA   NA

Focusing on {s2}, we see s2::as_s2_geography returns: POINT (1 1), POINT (nan nan) and POINT EMPTY

library(sf)
#> Linking to GEOS 3.13.1, GDAL 3.11.3, PROJ 9.6.0; sf_use_s2() is TRUE
library(s2)

p1 <- st_point(c(1, 1))
p2 <- st_point(c(2, NA_real_))
p3 <- st_point(c(NA_real_, NA_real_))

# Results for s2_distance_matrix
s2_distance_matrix(st_sfc(p1, p2, p3), st_sfc(p1, p2, p3))
#>          [,1]     [,2] [,3]
#> [1,]        0 20015118   NA
#> [2,] 20015118 20015118   NA
#> [3,]       NA       NA   NA

# And same results for s2_distance
s2_distance(p1, p1)
#> [1] 0
s2_distance(p1, p2)
#> [1] 20015118
s2_distance(p1, p3)
#> [1] NA
s2_distance(p2, p2)
#> [1] 20015118
s2_distance(p2, p3)
#> [1] NA

# s2_distance_* use as_s2_geography to setup x, y
# we see a mix of valid points, POINT (nan nan), and POINT EMPTY
as_s2_geography(p1)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT (1 1)
as_s2_geography(p2)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT (nan nan)
as_s2_geography(p3)
#> <geodesic s2_geography[1] with CRS=OGC:CRS84>
#> [1] POINT EMPTY

# And results are the same when we work directly in characters (passed to as_s2_geography)
p1 <- 'POINT (1 1)'
p2 <- 'POINT (2 nan)'
p3 <- 'POINT (nan nan)'

s2_distance_matrix(c(p1, p2, p3), c(p1, p2, p3))
#>          [,1]     [,2] [,3]
#> [1,]        0 20015118   NA
#> [2,] 20015118 20015118   NA
#> [3,]       NA       NA   NA

The distance being returned in the case of a POINT with nan is equal to pi multiplied by s2::s2_earth_radius_meters().

library(s2)
identical(s2_distance('POINT (1 1)', 'POINT (1 nan)'), pi * s2_earth_radius_meters())
[1] TRUE

identical(s2_distance('POINT (9999 9999)', 'POINT (1 nan)'), pi * s2_earth_radius_meters())
[1] TRUE

My expectation as a user is that NA would be returned and not 20015118. I'm not sure where this value is coming from.
I have added tests for NAs being returned instead to match the other tests in test-s2-accessors (#290)

Let me know what you think, thanks!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions