1111# ' @param limit An `integer` specifying the maximum number of results to return. Default is 2500. This default should be enough for most uses, as it is well above the number of NUTS 3 regions in the EU. The maximum limited by the API is 10000.
1212# '
1313# ' @return A `tibble` with the following columns:
14- # '
14+ # '
1515# ' * `geo`: code for the (NUTS) region at the requested level.
1616# ' * `geo_name`: name of the (NUTS) region at the requested level.
1717# ' * `geo_source`: source (type) of the spatial units at the requested level.
2020# ' * `y_year` (optional): The year of the outcome variable (Y), included in bivariate requests (only included when `y_source` is provided).
2121# ' * `x`: the value of the univariate variable.
2222# ' * `y` (optional): the value of the y variable (only included when `y_source` is provided).
23- # '
23+ # '
2424# ' @importFrom rlang .data
2525# ' @export
2626# '
@@ -58,15 +58,19 @@ mi_data <- function(
5858 checkmate :: assert_character(level , len = 1 )
5959 checkmate :: assert_list(x_filters , types = c(" character" , " NULL" ))
6060 checkmate :: assert_integerish(year , null.ok = TRUE , max.len = 1 )
61- checkmate :: assert_list(y_filters , types = c(" character" , " NULL" ), null.ok = TRUE )
61+ checkmate :: assert_list(
62+ y_filters ,
63+ types = c(" character" , " NULL" ),
64+ null.ok = TRUE
65+ )
6266 checkmate :: assert_number(limit , lower = 1 , upper = 10000 )
6367 if (! is.null(y_source )) checkmate :: assert_string(y_source )
64-
68+
6569 # Build filter JSONs for X and Y
6670 x_conditions <- lapply(names(x_filters ), function (name ) {
6771 list (field = name , value = x_filters [[name ]])
6872 })
69-
73+
7074 x_json <- list (
7175 source = x_source ,
7276 conditions = x_conditions
@@ -89,15 +93,15 @@ mi_data <- function(
8993 jsonlite :: toJSON(y_json , auto_unbox = TRUE )
9094 )
9195 }
92-
96+
9397 # Build API endpoint
9498 base_api_endpoint <- getOption(" mapineqr.base_api_endpoint" )
9599 url_endpoint <- if (is.null(y_source )) {
96100 paste0(base_api_endpoint , " get_x_data/items.json" )
97101 } else {
98102 paste0(base_api_endpoint , " get_xy_data/items.json" )
99103 }
100-
104+
101105 # Prepare query parameters
102106 query_params <- list (
103107 `_level` = level ,
@@ -116,7 +120,7 @@ mi_data <- function(
116120 if (! is.null(y_source ) && ! is.null(y_filters )) {
117121 query_params $ `Y_JSON` <- y_json_string
118122 }
119-
123+
120124 # Perform API request
121125 request <- httr2 :: request(url_endpoint ) | >
122126 httr2 :: req_headers(
@@ -127,102 +131,157 @@ mi_data <- function(
127131 httr2 :: req_method(" GET" )
128132
129133 response <- request | > httr2 :: req_perform()
130-
134+
131135 # Parse response
132- response_data <- httr2 :: resp_body_json(response , simplifyVector = TRUE ) | >
136+ response_data <- httr2 :: resp_body_json(response , simplifyVector = TRUE ) | >
133137 tibble :: as_tibble()
134-
135- # Check for duplicate values within each geo for x and (if applicable) y.
136- duplicate_issues <- response_data | >
137- dplyr :: group_by(.data $ geo ) | >
138- dplyr :: summarise(
139- distinct_x = dplyr :: n_distinct(.data $ x ),
140- distinct_y = if (" y" %in% names(response_data )) dplyr :: n_distinct(.data $ y ) else NA_integer_ ,
141- .groups = " drop"
142- )
143-
144- x_issue <- any(duplicate_issues $ distinct_x > 1 )
145- y_issue <- if (" y" %in% names(response_data )) any(duplicate_issues $ distinct_y > 1 ) else FALSE
146-
147- # Only perform additional filter checking if duplicate geos exist
148- if (x_issue || y_issue ) {
149- # --- For the x variable ---
150- missing_x_filters <- character (0 )
151- if (x_issue ) {
152- available_filters <- mi_source_filters(source_name = x_source , year = year , level = level )
153- # Determine which filter fields have more than one option
154- multi_option_fields <- available_filters | >
155- dplyr :: group_by(.data $ field ) | >
156- dplyr :: summarise(n_options = dplyr :: n_distinct(.data $ value ), .groups = " drop" ) | >
157- dplyr :: filter(.data $ n_options > 1 ) | >
158- dplyr :: pull(.data $ field )
159- # Only require filters for those fields with multiple options.
160- missing_x_filters <- setdiff(multi_option_fields , names(x_filters ))
161- }
162-
163- # --- For the y variable (if applicable) ---
164- missing_y_filters <- character (0 )
165- if (y_issue ) {
166- available_y_filters <- mi_source_filters(source_name = y_source , year = year , level = level )
167- multi_option_y_fields <- available_y_filters | >
168- dplyr :: group_by(.data $ field ) | >
169- dplyr :: summarise(n_options = dplyr :: n_distinct(.data $ value ), .groups = " drop" ) | >
170- dplyr :: filter(.data $ n_options > 1 ) | >
171- dplyr :: pull(.data $ field )
172- missing_y_filters <- setdiff(multi_option_y_fields , names(y_filters ))
173- }
174-
175- # Only raise an error if any missing filter is found among fields with multiple options.
176- if (length(missing_x_filters ) > 0 || length(missing_y_filters ) > 0 ) {
177- msg <- " The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
178- if (length(missing_x_filters ) > 0 ) {
179- msg <- paste0(
180- msg ,
181- " \n\n For the 'x' variable (source: '" , x_source , " '):" ,
182- " \n The following filter fields (with multiple available options) were not specified: " ,
183- paste(missing_x_filters , collapse = " , " ),
184- " \n You can review available filters by running:\n mi_source_filters(source_name = '" , x_source , " ', year = " , year , " , level = '" , level , " ')"
138+
139+ if (getOption(" mapineq.skip_filter_check" ) == FALSE ) {
140+ # Check for duplicate values within each geo for x and (if applicable) y.
141+ duplicate_issues <- response_data | >
142+ dplyr :: group_by(.data $ geo ) | >
143+ dplyr :: summarise(
144+ distinct_x = dplyr :: n_distinct(.data $ x ),
145+ distinct_y = if (" y" %in% names(response_data ))
146+ dplyr :: n_distinct(.data $ y ) else NA_integer_ ,
147+ .groups = " drop"
148+ )
149+
150+ x_issue <- any(duplicate_issues $ distinct_x > 1 )
151+ y_issue <- if (" y" %in% names(response_data ))
152+ any(duplicate_issues $ distinct_y > 1 ) else FALSE
153+
154+ # Only perform additional filter checking if duplicate geos exist
155+ if (x_issue || y_issue ) {
156+ # --- For the x variable ---
157+ missing_x_filters <- character (0 )
158+ if (x_issue ) {
159+ available_filters <- mi_source_filters(
160+ source_name = x_source ,
161+ year = year ,
162+ level = level
185163 )
164+ # Determine which filter fields have more than one option
165+ multi_option_fields <- available_filters | >
166+ dplyr :: group_by(.data $ field ) | >
167+ dplyr :: summarise(
168+ n_options = dplyr :: n_distinct(.data $ value ),
169+ .groups = " drop"
170+ ) | >
171+ dplyr :: filter(.data $ n_options > 1 ) | >
172+ dplyr :: pull(.data $ field )
173+ # Only require filters for those fields with multiple options.
174+ missing_x_filters <- setdiff(multi_option_fields , names(x_filters ))
186175 }
187- if (length(missing_y_filters ) > 0 ) {
188- msg <- paste0(
189- msg ,
190- " \n\n For the 'y' variable (source: '" , y_source , " '):" ,
191- " \n The following filter fields (with multiple available options) were not specified: " ,
192- paste(missing_y_filters , collapse = " , " ),
193- " \n You can review available filters by running:\n mi_source_filters(source_name = '" , y_source , " ', year = " , year , " , level = '" , level , " ')"
176+
177+ # --- For the y variable (if applicable) ---
178+ missing_y_filters <- character (0 )
179+ if (y_issue ) {
180+ available_y_filters <- mi_source_filters(
181+ source_name = y_source ,
182+ year = year ,
183+ level = level
194184 )
185+ multi_option_y_fields <- available_y_filters | >
186+ dplyr :: group_by(.data $ field ) | >
187+ dplyr :: summarise(
188+ n_options = dplyr :: n_distinct(.data $ value ),
189+ .groups = " drop"
190+ ) | >
191+ dplyr :: filter(.data $ n_options > 1 ) | >
192+ dplyr :: pull(.data $ field )
193+ missing_y_filters <- setdiff(multi_option_y_fields , names(y_filters ))
194+ }
195+
196+ # Only raise an error if any missing filter is found among fields with multiple options.
197+ if (length(missing_x_filters ) > 0 || length(missing_y_filters ) > 0 ) {
198+ msg <- " The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
199+ if (length(missing_x_filters ) > 0 ) {
200+ msg <- paste0(
201+ msg ,
202+ " \n\n For the 'x' variable (source: '" ,
203+ x_source ,
204+ " '):" ,
205+ " \n The following filter fields (with multiple available options) were not specified: " ,
206+ paste(missing_x_filters , collapse = " , " ),
207+ " \n You can review available filters by running:\n mi_source_filters(source_name = '" ,
208+ x_source ,
209+ " ', year = " ,
210+ year ,
211+ " , level = '" ,
212+ level ,
213+ " ')"
214+ )
215+ }
216+ if (length(missing_y_filters ) > 0 ) {
217+ msg <- paste0(
218+ msg ,
219+ " \n\n For the 'y' variable (source: '" ,
220+ y_source ,
221+ " '):" ,
222+ " \n The following filter fields (with multiple available options) were not specified: " ,
223+ paste(missing_y_filters , collapse = " , " ),
224+ " \n You can review available filters by running:\n mi_source_filters(source_name = '" ,
225+ y_source ,
226+ " ', year = " ,
227+ year ,
228+ " , level = '" ,
229+ level ,
230+ " ')"
231+ )
232+ }
233+ stop(msg )
195234 }
196- stop(msg )
197235 }
198236 }
199237
200-
201-
202238 # Define expected columns based on whether y_source is specified
203239 if (is.null(y_source )) {
204- expected_columns <- c(" geo" , " geo_name" , " geo_source" , " geo_year" , " data_year" , " x" )
240+ expected_columns <- c(
241+ " geo" ,
242+ " geo_name" ,
243+ " geo_source" ,
244+ " geo_year" ,
245+ " data_year" ,
246+ " x"
247+ )
205248 } else {
206- expected_columns <- c(" geo" , " geo_name" , " geo_source" , " geo_year" ,
207- " predictor_year" , " outcome_year" , " x" , " y" )
249+ expected_columns <- c(
250+ " geo" ,
251+ " geo_name" ,
252+ " geo_source" ,
253+ " geo_year" ,
254+ " predictor_year" ,
255+ " outcome_year" ,
256+ " x" ,
257+ " y"
258+ )
208259 }
209260
210261 # Check for missing expected columns
211262 missing_columns <- setdiff(expected_columns , colnames(response_data ))
212263
213264 if (length(missing_columns ) > 0 ) {
214- stop(" The following expected columns are missing from the response: " , paste(missing_columns , collapse = " , " ), " . The API may be down or might have changed. Please try again later. If the error persists, please open an issue on GitHub at <https://github.com/e-kotov/mapineqr/issues>." )
265+ stop(
266+ " The following expected columns are missing from the response: " ,
267+ paste(missing_columns , collapse = " , " ),
268+ " . The API may be down or might have changed. Please try again later. If the error persists, please open an issue on GitHub at <https://github.com/e-kotov/mapineqr/issues>."
269+ )
215270 }
216271
217272 # Select and reorder columns using dplyr
218- response_data <- response_data | >
219- dplyr :: select(dplyr :: all_of(expected_columns )) | >
220- dplyr :: rename_with(~ dplyr :: case_when(
221- .x == " predictor_year" ~ " x_year" ,
222- .x == " data_year" & ! " predictor_year" %in% colnames(response_data ) ~ " x_year" ,
223- .x == " outcome_year" ~ " y_year" ,
224- TRUE ~ .x
225- ), .cols = dplyr :: any_of(c(" predictor_year" , " outcome_year" , " data_year" )))
226-
273+ response_data <- response_data | >
274+ dplyr :: select(dplyr :: all_of(expected_columns )) | >
275+ dplyr :: rename_with(
276+ ~ dplyr :: case_when(
277+ .x == " predictor_year" ~ " x_year" ,
278+ .x == " data_year" & ! " predictor_year" %in% colnames(response_data ) ~
279+ " x_year" ,
280+ .x == " outcome_year" ~ " y_year" ,
281+ TRUE ~ .x
282+ ),
283+ .cols = dplyr :: any_of(c(" predictor_year" , " outcome_year" , " data_year" ))
284+ )
285+
227286 return (response_data )
228287}
0 commit comments