Skip to content

Commit e8cc08a

Browse files
committed
adding support for verbatim extension downloads
1 parent 9958406 commit e8cc08a

File tree

8 files changed

+126
-22
lines changed

8 files changed

+126
-22
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Description: A programmatic interface to the Web Service methods
88
retrieving information on data providers, getting species occurrence
99
records, getting counts of occurrence records, and using the GBIF
1010
tile map service to make rasters summarizing huge amounts of data.
11-
Version: 3.8.4.3
11+
Version: 3.8.4.4
1212
License: MIT + file LICENSE
1313
Authors@R: c(
1414
person("Scott", "Chamberlain", role = "aut", comment = c("0000-0003-1444-9135")),

R/download_predicate_dsl.R

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ sub_str <- function(str, max = 100) {
581581
if (nchar(str) < max) return(str)
582582
paste0(substring(str, 1, max), " ... ", sprintf("(N chars: %s)", nchar(str)))
583583
}
584-
parse_predicates <- function(user, email, type, format, ...) {
584+
parse_predicates <- function(user, email, type, format, verbatim_extensions, ...) {
585585
tmp <- list(...)
586586
if(length(tmp) == 0) {
587587
stop("You are requesting a full download. Please use a predicate to filter the data. For example, pred_default().")
@@ -593,12 +593,25 @@ clzzs <- vapply(tmp,
593593
if (!all(clzzs))
594594
stop("all inputs must be class occ_predicate/occ_predicate_list; ?occ_download",
595595
call. = FALSE)
596-
payload <- list(
597-
creator = unbox(user),
598-
notification_address = email,
599-
format = unbox(format),
600-
predicate = list()
601-
)
596+
if (!is.null(verbatim_extensions)) {
597+
if(format != "DWCA") {
598+
warning("verbatim_extensions can only be used with format = 'DWCA'. Ignoring verbatim_extensions.")
599+
}
600+
payload <- list(
601+
creator = unbox(user),
602+
notification_address = email,
603+
format = unbox(format),
604+
verbatimExtensions = verbatim_extensions,
605+
predicate = list()
606+
)
607+
} else {
608+
payload <- list(
609+
creator = unbox(user),
610+
notification_address = email,
611+
format = unbox(format),
612+
predicate = list()
613+
)
614+
}
602615
if (any(vapply(tmp, function(w) "predicates" %in% names(w), logical(1)))) {
603616
payload$predicate <- list(unclass(tmp[[1]]))
604617
} else {

R/occ_download.R

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#' within, not (!), like, isNotNull
1818
#' @param format (character) The download format. One of 'DWCA' (default),
1919
#' 'SIMPLE_CSV', or 'SPECIES_LIST'
20+
#' @param verbatim_extensions (character vector) A character vector of verbatim
21+
#' extensions to include in the download.
2022
#' @param user (character) User name within GBIF's website. Required. See
2123
#' "Authentication" below
2224
#' @param pwd (character) User password within GBIF's website. Required. See
@@ -177,12 +179,19 @@
177179
#' # unique(df$countryCode)
178180
#' # sort(unique(df$year))
179181
#' }
180-
occ_download <- function(..., body = NULL, type = "and", format = "DWCA",
181-
user = NULL, pwd = NULL, email = NULL,
182+
occ_download <- function(...,
183+
body = NULL,
184+
type = "and",
185+
format = "DWCA",
186+
verbatim_extensions = NULL,
187+
user = NULL,
188+
pwd = NULL,
189+
email = NULL,
182190
curlopts = list(http_version = 2)) {
183191

184-
z <- occ_download_prep(..., body = body, type = type, format = format,
185-
user = user, pwd = pwd, email = email, curlopts = curlopts)
192+
z <- occ_download_prep(..., body = body, type = type, format = format,
193+
verbatim_extensions = verbatim_extensions, user = user,
194+
pwd = pwd, email = email, curlopts = curlopts)
186195
out <- rg_POST(z$url, req = z$request, user = z$user, pwd = z$pwd, curlopts)
187196
md <- occ_download_meta(out) # get meta_data for printing
188197
citation <- gbif_citation(md)$download # get citation
@@ -204,8 +213,15 @@ download_formats <- c("DWCA", "SIMPLE_CSV", "SPECIES_LIST", "SIMPLE_PARQUET")
204213

205214
#' @export
206215
#' @rdname occ_download
207-
occ_download_prep <- function(..., body = NULL, type = "and", format = "DWCA",
208-
user = NULL, pwd = NULL, email = NULL, curlopts = list(http_version = 2)) {
216+
occ_download_prep <- function(...,
217+
body = NULL,
218+
type = "and",
219+
format = "DWCA",
220+
verbatim_extensions = NULL,
221+
user = NULL,
222+
pwd = NULL,
223+
email = NULL,
224+
curlopts = list(http_version = 2)) {
209225

210226
url <- paste0(gbif_base(), '/occurrence/download/request')
211227
user <- check_user(user)
@@ -220,7 +236,7 @@ occ_download_prep <- function(..., body = NULL, type = "and", format = "DWCA",
220236
if (!is.null(body)) {
221237
req <- body
222238
} else {
223-
req <- parse_predicates(user, email, type, format, ...)
239+
req <- parse_predicates(user, email, type, format, verbatim_extensions, ...)
224240
}
225241
structure(list(
226242
url = url,

man/occ_download.Rd

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/fixtures/occ_download_9.yml

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-download_parsing.R

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ email <- '[email protected]'
66
type <- 'and'
77

88
test_that("occ_download input parsing", {
9-
aa <- parse_predicates(user, email, type, "DWCA", pred("taxonKey", 7228682))
9+
aa <- parse_predicates(user, email, type, "DWCA", NULL, pred("taxonKey", 7228682))
1010
expect_is(aa, "list")
1111
expect_named(aa, c("creator", "notification_address", "format", "predicate"))
1212
expect_is(aa$predicate$type, "character")
@@ -17,7 +17,7 @@ test_that("occ_download input parsing", {
1717
expect_equal(unclass(aa$predicate$value), "7228682")
1818
expect_null(aa$predicate$predicates)
1919

20-
bb <- parse_predicates(user, email, type, "DWCA", pred("hasCoordinate", TRUE))
20+
bb <- parse_predicates(user, email, type, "DWCA", NULL, pred("hasCoordinate", TRUE))
2121
expect_is(bb, "list")
2222
expect_is(bb$predicate$type, "character")
2323
expect_is(bb$predicate$type, "scalar")
@@ -27,7 +27,7 @@ test_that("occ_download input parsing", {
2727
expect_equal(unclass(bb$predicate$value), "true")
2828
expect_null(bb$predicate$predicates)
2929

30-
cc <- parse_predicates(user, email, type, "DWCA",
30+
cc <- parse_predicates(user, email, type, "DWCA", NULL,
3131
pred_within("POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))"))
3232
expect_is(cc, "list")
3333
expect_is(cc$predicate$type, "character")
@@ -39,7 +39,7 @@ test_that("occ_download input parsing", {
3939
"POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))")
4040
expect_null(cc$predicate$predicates)
4141

42-
aa <- parse_predicates(user, email, type, "DWCA",
42+
aa <- parse_predicates(user, email, type, "DWCA", NULL,
4343
pred('taxonKey', 7228682),
4444
pred('hasCoordinate', TRUE),
4545
pred('hasGeospatialIssue', FALSE),
@@ -55,7 +55,7 @@ test_that("occ_download input parsing", {
5555
expect_is(aa$predicate$predicates[[4]]$geometry[1], "character")
5656

5757
# format=SIMPLE_CSV
58-
aa <- parse_predicates(user, email, type, "SIMPLE_CSV",
58+
aa <- parse_predicates(user, email, type, "SIMPLE_CSV", NULL,
5959
pred_gte('decimalLatitude', 82))
6060
expect_is(aa, "list")
6161
expect_named(aa, c("creator", "notification_address", "format", "predicate"))
@@ -68,7 +68,7 @@ test_that("occ_download input parsing", {
6868
expect_null(aa$predicate$predicates)
6969

7070
# format=SPECIES_LIST
71-
aa <- parse_predicates(user, email, "not", "SPECIES_LIST",
71+
aa <- parse_predicates(user, email, "not", "SPECIES_LIST", NULL,
7272
pred_lt('decimalLatitude', 2000))
7373
expect_is(aa, "list")
7474
expect_named(aa, c("creator", "notification_address", "format", "predicate"))
@@ -83,7 +83,18 @@ test_that("occ_download input parsing", {
8383

8484
test_that("parse_predicates fails well", {
8585
expect_error(
86-
parse_predicates(user, email, type, "DWCA", 'hasCoordinate = TRUE'),
86+
parse_predicates(user, email, type, "DWCA", NULL, 'hasCoordinate = TRUE'),
8787
"all inputs must be"
8888
)
8989
})
90+
91+
test_that("parse_predicates verbatim_extensions works", {
92+
ve <- c("http://rs.tdwg.org/dwc/terms/MeasurementOrFact",
93+
"http://rs.gbif.org/terms/1.0/Multimedia")
94+
95+
aa <- parse_predicates("john", "email", "and", "DWCA", ve, pred("taxonKey", 22))
96+
97+
expect_is(aa, "list")
98+
expect_named(aa, c("creator", "notification_address", "format", "verbatimExtensions", "predicate"))
99+
expect_equal(unclass(aa$verbatimExtensions), ve)
100+
})

tests/testthat/test-occ_download.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,24 @@ test_that("occ_download: real requests work", {
185185
# check that full downloads fail well
186186
expect_error(occ_download(), "You are requesting a full download. Please use a predicate to filter the data. For example, pred_default().")
187187

188+
vcr::use_cassette("occ_download_9", {
189+
bbb <- occ_download(
190+
pred("taxonKey",22),
191+
format = "DWCA",
192+
verbatim_extensions=
193+
c("http://rs.gbif.org/terms/1.0/DNADerivedData",
194+
"http://rs.tdwg.org/dwc/terms/MeasurementOrFact")
195+
)
196+
}, match_requests_on = c("method", "uri", "body"))
197+
expect_is(unclass(bbb), "character")
198+
expect_match(unclass(bbb)[1], "^[0-9]{7}-[0-9]{15}$")
199+
expect_equal(attr(bbb, "user"), Sys.getenv("GBIF_USER"))
200+
expect_equal(attr(bbb, "email"), Sys.getenv("GBIF_EMAIL"))
201+
expect_equal(attr(bbb, "format"), "DWCA")
202+
expect_is(attr(bbb,"citation"),"character")
203+
expect_is(attr(bbb,"downloadLink"),"character")
204+
expect_output(print.occ_download(bbb),"<<gbif download>>")
205+
expect_equal(length(capture.output(print(bbb))),22)
188206
})
189207

190208

vignettes/getting_occurrence_data.Rmd

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,20 @@ wkt %>%
274274

275275
When generating polygons from public data sources, check the WKT is what you want using a site like [WKT Geometry Plotter](https://www.geometrymapper.com/). A common mistake is requesting a polygon for the United Kingdom, but finding it includes the UK's territories of Bermuda, Pitcairn and so on. (`pred("country", "GB")` or `pred_in("country", c("GB", "IM", "GG", "JE"))` is much faster anyway.)
276276

277+
## Downloading verbatim DWCA extensions
278+
279+
GBIF now allows users to download DWCA extensions. These tables are combined and returned to the user as-is ("verbatim"). The `verbatim_extensions` argument only works when `format="DWCA"`. Otherwise, you will get a warning.
280+
281+
```r
282+
occ_download(
283+
pred("taxonKey",22),
284+
format = "DWCA",
285+
verbatim_extensions = "http://rs.tdwg.org/dwc/terms/MeasurementOrFact"
286+
)
287+
```
288+
289+
To view all of the available extensions use `rgbif::occ_download_describe("dwca")$verbatimExtensions`.
290+
277291
## Get just the EEZ waters of a country/area
278292

279293
Sometimes GBIF users just want the **EEZ** (Exclusive Economic Zone) waters of a country. This is possible using the `gadm` filter and `pred_not`.

0 commit comments

Comments
 (0)