Skip to content

Commit 3f61adb

Browse files
committed
add mapineq.skip_filter_check option for debugging
1 parent 13575c8 commit 3f61adb

File tree

2 files changed

+145
-85
lines changed

2 files changed

+145
-85
lines changed

R/data.R

Lines changed: 143 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#' @param limit An `integer` specifying the maximum number of results to return. Default is 2500. This default should be enough for most uses, as it is well above the number of NUTS 3 regions in the EU. The maximum limited by the API is 10000.
1212
#'
1313
#' @return A `tibble` with the following columns:
14-
#'
14+
#'
1515
#' * `geo`: code for the (NUTS) region at the requested level.
1616
#' * `geo_name`: name of the (NUTS) region at the requested level.
1717
#' * `geo_source`: source (type) of the spatial units at the requested level.
@@ -20,7 +20,7 @@
2020
#' * `y_year` (optional): The year of the outcome variable (Y), included in bivariate requests (only included when `y_source` is provided).
2121
#' * `x`: the value of the univariate variable.
2222
#' * `y` (optional): the value of the y variable (only included when `y_source` is provided).
23-
#'
23+
#'
2424
#' @importFrom rlang .data
2525
#' @export
2626
#'
@@ -58,15 +58,19 @@ mi_data <- function(
5858
checkmate::assert_character(level, len = 1)
5959
checkmate::assert_list(x_filters, types = c("character", "NULL"))
6060
checkmate::assert_integerish(year, null.ok = TRUE, max.len = 1)
61-
checkmate::assert_list(y_filters, types = c("character", "NULL"), null.ok = TRUE)
61+
checkmate::assert_list(
62+
y_filters,
63+
types = c("character", "NULL"),
64+
null.ok = TRUE
65+
)
6266
checkmate::assert_number(limit, lower = 1, upper = 10000)
6367
if (!is.null(y_source)) checkmate::assert_string(y_source)
64-
68+
6569
# Build filter JSONs for X and Y
6670
x_conditions <- lapply(names(x_filters), function(name) {
6771
list(field = name, value = x_filters[[name]])
6872
})
69-
73+
7074
x_json <- list(
7175
source = x_source,
7276
conditions = x_conditions
@@ -89,15 +93,15 @@ mi_data <- function(
8993
jsonlite::toJSON(y_json, auto_unbox = TRUE)
9094
)
9195
}
92-
96+
9397
# Build API endpoint
9498
base_api_endpoint <- getOption("mapineqr.base_api_endpoint")
9599
url_endpoint <- if (is.null(y_source)) {
96100
paste0(base_api_endpoint, "get_x_data/items.json")
97101
} else {
98102
paste0(base_api_endpoint, "get_xy_data/items.json")
99103
}
100-
104+
101105
# Prepare query parameters
102106
query_params <- list(
103107
`_level` = level,
@@ -116,7 +120,7 @@ mi_data <- function(
116120
if (!is.null(y_source) && !is.null(y_filters)) {
117121
query_params$`Y_JSON` <- y_json_string
118122
}
119-
123+
120124
# Perform API request
121125
request <- httr2::request(url_endpoint) |>
122126
httr2::req_headers(
@@ -127,102 +131,157 @@ mi_data <- function(
127131
httr2::req_method("GET")
128132

129133
response <- request |> httr2::req_perform()
130-
134+
131135
# Parse response
132-
response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |>
136+
response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |>
133137
tibble::as_tibble()
134-
135-
# Check for duplicate values within each geo for x and (if applicable) y.
136-
duplicate_issues <- response_data |>
137-
dplyr::group_by(.data$geo) |>
138-
dplyr::summarise(
139-
distinct_x = dplyr::n_distinct(.data$x),
140-
distinct_y = if ("y" %in% names(response_data)) dplyr::n_distinct(.data$y) else NA_integer_,
141-
.groups = "drop"
142-
)
143-
144-
x_issue <- any(duplicate_issues$distinct_x > 1)
145-
y_issue <- if ("y" %in% names(response_data)) any(duplicate_issues$distinct_y > 1) else FALSE
146-
147-
# Only perform additional filter checking if duplicate geos exist
148-
if (x_issue || y_issue) {
149-
# --- For the x variable ---
150-
missing_x_filters <- character(0)
151-
if (x_issue) {
152-
available_filters <- mi_source_filters(source_name = x_source, year = year, level = level)
153-
# Determine which filter fields have more than one option
154-
multi_option_fields <- available_filters |>
155-
dplyr::group_by(.data$field) |>
156-
dplyr::summarise(n_options = dplyr::n_distinct(.data$value), .groups = "drop") |>
157-
dplyr::filter(.data$n_options > 1) |>
158-
dplyr::pull(.data$field)
159-
# Only require filters for those fields with multiple options.
160-
missing_x_filters <- setdiff(multi_option_fields, names(x_filters))
161-
}
162-
163-
# --- For the y variable (if applicable) ---
164-
missing_y_filters <- character(0)
165-
if (y_issue) {
166-
available_y_filters <- mi_source_filters(source_name = y_source, year = year, level = level)
167-
multi_option_y_fields <- available_y_filters |>
168-
dplyr::group_by(.data$field) |>
169-
dplyr::summarise(n_options = dplyr::n_distinct(.data$value), .groups = "drop") |>
170-
dplyr::filter(.data$n_options > 1) |>
171-
dplyr::pull(.data$field)
172-
missing_y_filters <- setdiff(multi_option_y_fields, names(y_filters))
173-
}
174-
175-
# Only raise an error if any missing filter is found among fields with multiple options.
176-
if (length(missing_x_filters) > 0 || length(missing_y_filters) > 0) {
177-
msg <- "The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
178-
if (length(missing_x_filters) > 0) {
179-
msg <- paste0(
180-
msg,
181-
"\n\nFor the 'x' variable (source: '", x_source, "'):",
182-
"\n The following filter fields (with multiple available options) were not specified: ",
183-
paste(missing_x_filters, collapse = ", "),
184-
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", x_source, "', year = ", year, ", level = '", level, "')"
138+
139+
if (getOption("mapineq.skip_filter_check") == FALSE) {
140+
# Check for duplicate values within each geo for x and (if applicable) y.
141+
duplicate_issues <- response_data |>
142+
dplyr::group_by(.data$geo) |>
143+
dplyr::summarise(
144+
distinct_x = dplyr::n_distinct(.data$x),
145+
distinct_y = if ("y" %in% names(response_data))
146+
dplyr::n_distinct(.data$y) else NA_integer_,
147+
.groups = "drop"
148+
)
149+
150+
x_issue <- any(duplicate_issues$distinct_x > 1)
151+
y_issue <- if ("y" %in% names(response_data))
152+
any(duplicate_issues$distinct_y > 1) else FALSE
153+
154+
# Only perform additional filter checking if duplicate geos exist
155+
if (x_issue || y_issue) {
156+
# --- For the x variable ---
157+
missing_x_filters <- character(0)
158+
if (x_issue) {
159+
available_filters <- mi_source_filters(
160+
source_name = x_source,
161+
year = year,
162+
level = level
185163
)
164+
# Determine which filter fields have more than one option
165+
multi_option_fields <- available_filters |>
166+
dplyr::group_by(.data$field) |>
167+
dplyr::summarise(
168+
n_options = dplyr::n_distinct(.data$value),
169+
.groups = "drop"
170+
) |>
171+
dplyr::filter(.data$n_options > 1) |>
172+
dplyr::pull(.data$field)
173+
# Only require filters for those fields with multiple options.
174+
missing_x_filters <- setdiff(multi_option_fields, names(x_filters))
186175
}
187-
if (length(missing_y_filters) > 0) {
188-
msg <- paste0(
189-
msg,
190-
"\n\nFor the 'y' variable (source: '", y_source, "'):",
191-
"\n The following filter fields (with multiple available options) were not specified: ",
192-
paste(missing_y_filters, collapse = ", "),
193-
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", y_source, "', year = ", year, ", level = '", level, "')"
176+
177+
# --- For the y variable (if applicable) ---
178+
missing_y_filters <- character(0)
179+
if (y_issue) {
180+
available_y_filters <- mi_source_filters(
181+
source_name = y_source,
182+
year = year,
183+
level = level
194184
)
185+
multi_option_y_fields <- available_y_filters |>
186+
dplyr::group_by(.data$field) |>
187+
dplyr::summarise(
188+
n_options = dplyr::n_distinct(.data$value),
189+
.groups = "drop"
190+
) |>
191+
dplyr::filter(.data$n_options > 1) |>
192+
dplyr::pull(.data$field)
193+
missing_y_filters <- setdiff(multi_option_y_fields, names(y_filters))
194+
}
195+
196+
# Only raise an error if any missing filter is found among fields with multiple options.
197+
if (length(missing_x_filters) > 0 || length(missing_y_filters) > 0) {
198+
msg <- "The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
199+
if (length(missing_x_filters) > 0) {
200+
msg <- paste0(
201+
msg,
202+
"\n\nFor the 'x' variable (source: '",
203+
x_source,
204+
"'):",
205+
"\n The following filter fields (with multiple available options) were not specified: ",
206+
paste(missing_x_filters, collapse = ", "),
207+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '",
208+
x_source,
209+
"', year = ",
210+
year,
211+
", level = '",
212+
level,
213+
"')"
214+
)
215+
}
216+
if (length(missing_y_filters) > 0) {
217+
msg <- paste0(
218+
msg,
219+
"\n\nFor the 'y' variable (source: '",
220+
y_source,
221+
"'):",
222+
"\n The following filter fields (with multiple available options) were not specified: ",
223+
paste(missing_y_filters, collapse = ", "),
224+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '",
225+
y_source,
226+
"', year = ",
227+
year,
228+
", level = '",
229+
level,
230+
"')"
231+
)
232+
}
233+
stop(msg)
195234
}
196-
stop(msg)
197235
}
198236
}
199237

200-
201-
202238
# Define expected columns based on whether y_source is specified
203239
if (is.null(y_source)) {
204-
expected_columns <- c("geo", "geo_name", "geo_source", "geo_year", "data_year", "x")
240+
expected_columns <- c(
241+
"geo",
242+
"geo_name",
243+
"geo_source",
244+
"geo_year",
245+
"data_year",
246+
"x"
247+
)
205248
} else {
206-
expected_columns <- c("geo", "geo_name", "geo_source", "geo_year",
207-
"predictor_year", "outcome_year", "x", "y")
249+
expected_columns <- c(
250+
"geo",
251+
"geo_name",
252+
"geo_source",
253+
"geo_year",
254+
"predictor_year",
255+
"outcome_year",
256+
"x",
257+
"y"
258+
)
208259
}
209260

210261
# Check for missing expected columns
211262
missing_columns <- setdiff(expected_columns, colnames(response_data))
212263

213264
if (length(missing_columns) > 0) {
214-
stop("The following expected columns are missing from the response: ", paste(missing_columns, collapse = ", "), ". The API may be down or might have changed. Please try again later. If the error persists, please open an issue on GitHub at <https://github.com/e-kotov/mapineqr/issues>.")
265+
stop(
266+
"The following expected columns are missing from the response: ",
267+
paste(missing_columns, collapse = ", "),
268+
". The API may be down or might have changed. Please try again later. If the error persists, please open an issue on GitHub at <https://github.com/e-kotov/mapineqr/issues>."
269+
)
215270
}
216271

217272
# Select and reorder columns using dplyr
218-
response_data <- response_data |>
219-
dplyr::select(dplyr::all_of(expected_columns)) |>
220-
dplyr::rename_with(~ dplyr::case_when(
221-
.x == "predictor_year" ~ "x_year",
222-
.x == "data_year" & !"predictor_year" %in% colnames(response_data) ~ "x_year",
223-
.x == "outcome_year" ~ "y_year",
224-
TRUE ~ .x
225-
), .cols = dplyr::any_of(c("predictor_year", "outcome_year", "data_year")))
226-
273+
response_data <- response_data |>
274+
dplyr::select(dplyr::all_of(expected_columns)) |>
275+
dplyr::rename_with(
276+
~ dplyr::case_when(
277+
.x == "predictor_year" ~ "x_year",
278+
.x == "data_year" & !"predictor_year" %in% colnames(response_data) ~
279+
"x_year",
280+
.x == "outcome_year" ~ "y_year",
281+
TRUE ~ .x
282+
),
283+
.cols = dplyr::any_of(c("predictor_year", "outcome_year", "data_year"))
284+
)
285+
227286
return(response_data)
228287
}

R/onLoad.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
op.mapineqr <- list(
44
mapineqr.api_spec_json = "https://mapineqfeatures.web.rug.nl/api.json",
55
mapineqr.base_api_endpoint = "https://mapineqfeatures.web.rug.nl/functions/postgisftw.",
6-
mapineq.user_agent = "mapineqr R package https://github.com/e-kotov/mapineqr"
6+
mapineq.user_agent = "mapineqr R package https://github.com/e-kotov/mapineqr",
7+
mapineq.skip_filter_check = FALSE
78
)
89
toset <- !(names(op.mapineqr) %in% names(op))
910
if (any(toset)) options(op.mapineqr[toset])

0 commit comments

Comments
 (0)