Skip to content

Commit 9958406

Browse files
authored
fix: remove wk package dependency and implement internal WKT validation (#827) (#828)
1 parent 8736b7c commit 9958406

File tree

4 files changed

+193
-7
lines changed

4 files changed

+193
-7
lines changed

DESCRIPTION

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Description: A programmatic interface to the Web Service methods
88
retrieving information on data providers, getting species occurrence
99
records, getting counts of occurrence records, and using the GBIF
1010
tile map service to make rasters summarizing huge amounts of data.
11-
Version: 3.8.4.2
11+
Version: 3.8.4.3
1212
License: MIT + file LICENSE
1313
Authors@R: c(
1414
person("Scott", "Chamberlain", role = "aut", comment = c("0000-0003-1444-9135")),
@@ -41,8 +41,7 @@ Imports:
4141
tibble,
4242
lazyeval,
4343
R6,
44-
stats,
45-
wk
44+
stats
4645
Suggests:
4746
testthat,
4847
png,

R/bbox.R

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,52 @@ gbif_bbox2wkt <- function(minx=NA, miny=NA, maxx=NA, maxy=NA, bbox=NULL){
4444
#' @export
4545
#' @rdname gbif_bbox2wkt
4646
gbif_wkt2bbox <- function(wkt = NULL){
47-
stopifnot(!is.null(wkt))
48-
as.numeric(wk::wk_bbox(wk::wkt(wkt)))
47+
# legacy code using wk package
48+
# stopifnot(!is.null(wkt))
49+
# as.numeric(wk::wk_bbox(wk::wkt(wkt)))
50+
stopifnot(is.character(wkt))
51+
52+
one <- function(s) {
53+
if (is.na(s)) return(rep(NA_real_, 4))
54+
s <- trimws(s)
55+
56+
# Handle EMPTY
57+
if (grepl("\\bEMPTY\\b", s, ignore.case = TRUE)) return(rep(NA_real_, 4))
58+
59+
# Remove optional SRID=...; prefix
60+
s <- sub("^\\s*SRID\\s*=\\s*\\d+\\s*;\\s*", "", s, ignore.case = TRUE)
61+
62+
# Extract all numbers (incl. scientific notation)
63+
nums <- regmatches(
64+
s,
65+
gregexpr("[-+]?(?:\\d+\\.?\\d*|\\.\\d+)(?:[eE][-+]?\\d+)?", s, perl = TRUE)
66+
)[[1]]
67+
68+
if (length(nums) < 2) return(rep(NA_real_, 4))
69+
70+
vals <- as.numeric(nums)
71+
72+
# WKT coordinates are grouped like (x y [z [m]]), repeated.
73+
# We take the first two of each group; assume constant dimension across tuples.
74+
# Try to detect dimension from tokens like "POINT Z", "LINESTRING ZM", etc.
75+
dim_guess <- 2L
76+
if (grepl("\\bZM\\b", s, ignore.case = TRUE)) dim_guess <- 4L
77+
else if (grepl("\\bZ\\b", s, ignore.case = TRUE) || grepl("\\bM\\b", s, ignore.case = TRUE)) dim_guess <- 3L
78+
79+
if (length(vals) %% dim_guess != 0L) {
80+
# Fallback if guess doesn't divide cleanly: assume XY pairs
81+
dim_guess <- 2L
82+
}
83+
84+
mat <- matrix(vals, ncol = dim_guess, byrow = TRUE)
85+
x <- mat[, 1]
86+
y <- mat[, 2]
87+
c(min(x, na.rm = TRUE), min(y, na.rm = TRUE),
88+
max(x, na.rm = TRUE), max(y, na.rm = TRUE))
89+
}
90+
91+
res <- t(vapply(wkt, one, numeric(4)))
92+
if (nrow(res) == 1) as.numeric(res[1, ]) else res
4993
}
94+
95+

R/check_wkt.r

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ check_wkt <- function(wkt = NULL, skip_validate = FALSE){
3737

3838
for (i in seq_along(wkt)) {
3939
if (!extracted_wkts[i] %in% accepted_wkts) stop(paste0("WKT must be one of the types: ",paste0(accepted_wkts, collapse = ", ")))
40-
if (!skip_validate) { res <- wk::wk_problems(wk::new_wk_wkt(wkt[i]))
40+
if (!skip_validate) { res <- wk_problems(wkt[i])
4141
if (!is.na(res)) stop(res) # print error
4242
}
4343
}
@@ -46,3 +46,79 @@ check_wkt <- function(wkt = NULL, skip_validate = FALSE){
4646
NULL
4747
}
4848
}
49+
50+
wk_problems <- function(wkt) {
51+
if (!is.character(wkt) || length(wkt) != 1L) {
52+
return("not_character_scalar")
53+
}
54+
55+
wkt <- trimws(wkt)
56+
57+
if (is.na(wkt) || wkt == "") {
58+
return("missing_or_empty")
59+
}
60+
61+
problems <- character()
62+
63+
# 1. EMPTY keyword misuse
64+
if (grepl("\\bEMPTY\\b", wkt, ignore.case = TRUE) &&
65+
!grepl("\\b(POINT|LINESTRING|POLYGON|MULTI|GEOMETRYCOLLECTION)\\s+EMPTY\\b",
66+
wkt, ignore.case = TRUE)) {
67+
problems <- c(problems, "invalid_EMPTY_usage")
68+
}
69+
70+
# 2. Unbalanced parentheses
71+
n_open <- lengths(regmatches(wkt, gregexpr("\\(", wkt)))
72+
n_close <- lengths(regmatches(wkt, gregexpr("\\)", wkt)))
73+
if (n_open != n_close) {
74+
problems <- c(problems, "unbalanced_parentheses")
75+
}
76+
77+
# 3. Geometry type missing or invalid
78+
if (!grepl(
79+
"^\\s*(SRID\\s*=\\s*\\d+\\s*;\\s*)?(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\\b",
80+
wkt, ignore.case = TRUE
81+
)) {
82+
problems <- c(problems, "invalid_or_missing_geometry_type")
83+
}
84+
85+
# 4. Non-numeric coordinates
86+
coord_text <- gsub(
87+
"^[^\\(]*\\(|\\)[^\\)]*$", "", wkt
88+
)
89+
bad_nums <- grepl("[A-Za-z]", coord_text) &&
90+
!grepl("\\b(Z|M|ZM)\\b", wkt, ignore.case = TRUE)
91+
if (bad_nums) {
92+
problems <- c(problems, "non_numeric_coordinates")
93+
}
94+
95+
# 5. Comma / coordinate separator issues
96+
if (grepl(",\\s*,", wkt)) {
97+
problems <- c(problems, "double_comma")
98+
}
99+
if (grepl("\\(\\s*,|,\\s*\\)", wkt)) {
100+
problems <- c(problems, "dangling_comma")
101+
}
102+
103+
# 6. Odd number of coordinate values (XY expected)
104+
nums <- regmatches(
105+
wkt,
106+
gregexpr("[-+]?(?:\\d+\\.?\\d*|\\.\\d+)(?:[eE][-+]?\\d+)?", wkt, perl = TRUE)
107+
)[[1]]
108+
109+
if (length(nums) > 0 && length(nums) %% 2 != 0) {
110+
problems <- c(problems, "odd_number_of_coordinates")
111+
}
112+
113+
# 7. Empty coordinate lists
114+
if (grepl("\\(\\s*\\)", wkt)) {
115+
problems <- c(problems, "empty_coordinate_list")
116+
}
117+
118+
if (!all(is.na(problems))) {
119+
stop(paste(problems, collapse = "; "))
120+
}
121+
122+
if (length(problems) == 0L) NA_character_ else problems
123+
}
124+

tests/testthat/test-check_wkt.r

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ test_that("regular wkt works", {
44
aa <- check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))')
55
bb <- check_wkt('POINT(30.1 10.1)')
66
cc <- check_wkt('LINESTRING(3 4,10 50,20 25)')
7+
mm <- check_wkt('MULTIPOLYGON (((-86.1328125 59.459488841393735, -56.953125 59.459488841393735, -27.7734375 59.459488841393735, -27.7734375 34.38154915808676, -27.7734375 9.303609474779796, -56.953125 9.303609474779796, -86.1328125 9.303609474779796, -86.1328125 34.38154915808676, -86.1328125 59.459488841393735)), ((78.3984375 54.690225821818856, 56.42578125 45.86648167444278, 34.453125 37.04273752706671, 15.8203125 6.671845441515563, -2.8125 -23.699046644035587, 41.484375 -27.589246771779514, 85.78125 -31.47944689952344, 101.953125 -8.508652190421842, 118.125 14.462142518679757, 112.32421875 36.59973091797452, 106.5234375 58.737319317269275, 106.5234375 58.737319317269275, 106.5234375 58.737319317269275, 92.4609375 56.71377256954406, 78.3984375 54.690225821818856)))')
8+
9+
expect_is(mm, "character")
10+
expect_equal(mm, 'MULTIPOLYGON (((-86.1328125 59.459488841393735, -56.953125 59.459488841393735, -27.7734375 59.459488841393735, -27.7734375 34.38154915808676, -27.7734375 9.303609474779796, -56.953125 9.303609474779796, -86.1328125 9.303609474779796, -86.1328125 34.38154915808676, -86.1328125 59.459488841393735)), ((78.3984375 54.690225821818856, 56.42578125 45.86648167444278, 34.453125 37.04273752706671, 15.8203125 6.671845441515563, -2.8125 -23.699046644035587, 41.484375 -27.589246771779514, 85.78125 -31.47944689952344, 101.953125 -8.508652190421842, 118.125 14.462142518679757, 112.32421875 36.59973091797452, 106.5234375 58.737319317269275, 106.5234375 58.737319317269275, 106.5234375 58.737319317269275, 92.4609375 56.71377256954406, 78.3984375 54.690225821818856)))')
711

812
expect_is(aa, "character")
913
expect_equal(aa, 'POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1))')
@@ -40,10 +44,71 @@ test_that("many wkt's, semi-colon separated, for many repeated geometry args", {
4044

4145

4246
test_that("bad WKT fails well", {
47+
# Non-numeric coordinates
4348
expect_error(
4449
check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 a))'),
45-
"Expected a number but found 'a' at byte 46"
50+
"non_numeric_coordinates; odd_number_of_coordinates"
51+
)
52+
53+
# Unbalanced parentheses - missing closing
54+
expect_error(
55+
check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1)'),
56+
"unbalanced_parentheses"
57+
)
58+
59+
# Unbalanced parentheses - extra closing
60+
expect_error(
61+
check_wkt('POLYGON((30.1 10.1, 10 20, 20 60, 60 60, 30.1 10.1)))'),
62+
"unbalanced_parentheses"
63+
)
64+
65+
# Odd number of coordinates
66+
expect_error(
67+
check_wkt('LINESTRING(10 20, 30 40, 50)'),
68+
"odd_number_of_coordinates"
69+
)
70+
71+
# Empty coordinate list
72+
expect_error(
73+
check_wkt('POLYGON(())'),
74+
"empty_coordinate_list"
75+
)
76+
77+
# Double comma
78+
expect_error(
79+
check_wkt('LINESTRING(10 20,, 30 40)'),
80+
"double_comma"
81+
)
82+
83+
# Dangling comma
84+
expect_error(
85+
check_wkt('POLYGON((30.1 10.1, 10 20, 20 60,))'),
86+
"dangling_comma"
87+
)
88+
89+
# Invalid geometry type
90+
expect_error(
91+
check_wkt('INVALIDTYPE((30.1 10.1))'),
92+
"WKT must be one of the types"
4693
)
4794

95+
# Multiple issues - non-numeric and unbalanced
96+
expect_error(
97+
check_wkt('POLYGON((30.1 10.1, abc def, 20 60)'),
98+
"unbalanced_parentheses; non_numeric_coordinates"
99+
)
100+
101+
# Point with multiple coordinates (should be single coordinate pair)
102+
expect_error(
103+
check_wkt('POINT(30.1 10.1, 20 40, )'),
104+
"dangling_comma"
105+
)
106+
107+
# Coordinates with letters mixed in
108+
expect_error(
109+
check_wkt('POLYGON((30.1 10.1, 10x 20, 20 60, 60 60, 30.1 10.1))'),
110+
"non_numeric_coordinates"
111+
)
112+
48113
})
49114

0 commit comments

Comments
 (0)