Skip to content

Commit 56d1b1b

Browse files
committed
add duplicate checks to mi_data and suggested soultions in messages
1 parent fe71088 commit 56d1b1b

File tree

1 file changed

+49
-8
lines changed

1 file changed

+49
-8
lines changed

R/data.R

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ mi_data <- function(
7070
source = x_source,
7171
conditions = x_conditions
7272
)
73-
x_json_string <- jsonlite::toJSON(x_json, auto_unbox = TRUE)
73+
# Minify JSON to remove extra whitespace/newlines
74+
x_json_string <- jsonlite::minify(
75+
jsonlite::toJSON(x_json, auto_unbox = TRUE)
76+
)
7477

7578
# Check if it's bivariate (Y filters are provided)
7679
if (!is.null(y_source) && !is.null(y_filters)) {
@@ -81,7 +84,9 @@ mi_data <- function(
8184
source = y_source,
8285
conditions = y_conditions
8386
)
84-
y_json_string <- jsonlite::toJSON(y_json, auto_unbox = TRUE)
87+
y_json_string <- jsonlite::minify(
88+
jsonlite::toJSON(y_json, auto_unbox = TRUE)
89+
)
8590
}
8691

8792
# Build API endpoint
@@ -105,26 +110,62 @@ mi_data <- function(
105110
query_params$`_outcome_year` <- as.character(year)
106111
}
107112

108-
# Add JSON parameters as proper strings without URL encoding issues
109-
query_params$`X_JSON` <- I(x_json_string)
113+
# Add JSON parameters as proper strings so that httr2 can URL encode them automatically
114+
query_params$`X_JSON` <- x_json_string
110115
if (!is.null(y_source) && !is.null(y_filters)) {
111-
query_params$`Y_JSON` <- I(y_json_string)
116+
query_params$`Y_JSON` <- y_json_string
112117
}
113118

114119
# Perform API request
115-
response <- httr2::request(url_endpoint) |>
120+
request <- httr2::request(url_endpoint) |>
116121
httr2::req_headers(
117122
"Content-Type" = "application/json",
118123
"User-Agent" = getOption("mapineqr.user_agent")
119124
) |>
120125
httr2::req_url_query(!!!query_params) |>
121-
httr2::req_method("GET") |>
122-
httr2::req_perform()
126+
httr2::req_method("GET")
127+
128+
response <- request |> httr2::req_perform()
123129

124130
# Parse response
125131
response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |>
126132
tibble::as_tibble()
127133

134+
# Check for duplicate values within each geo for x and (if applicable) y.
135+
duplicate_issues <- response_data %>%
136+
dplyr::group_by(geo) %>%
137+
dplyr::summarise(
138+
distinct_x = dplyr::n_distinct(x),
139+
distinct_y = if ("y" %in% names(response_data)) dplyr::n_distinct(y) else NA_integer_,
140+
.groups = "drop"
141+
)
142+
143+
# Determine if any geo has multiple distinct values
144+
x_issue <- any(duplicate_issues$distinct_x > 1)
145+
y_issue <- if ("y" %in% names(response_data)) any(duplicate_issues$distinct_y > 1) else FALSE
146+
147+
if (x_issue || y_issue) {
148+
msg <- "The API returned duplicate values for some geographic regions. This likely indicates that not all necessary filters were specified for the data source(s)."
149+
if (x_issue) {
150+
msg <- paste0(
151+
msg,
152+
"\n\nFor the 'x' variable: please check the 'x_filters' argument provided to mi_data() for the data source '", x_source, "'.",
153+
"\nYou can review the available filters by running:\n",
154+
" mi_source_filters(source_name = '", x_source, "', year = ", year, ", level = '", level, "')\n"
155+
)
156+
}
157+
if (y_issue) {
158+
msg <- paste0(
159+
msg,
160+
"\n\nFor the 'y' variable: please check the 'y_filters' argument provided to mi_data() for the data source '", y_source, "'.",
161+
"\nYou can review the available filters by running:\n",
162+
" mi_source_filters(source_name = '", y_source, "', year = ", year, ", level = '", level, "')\n"
163+
)
164+
}
165+
stop(msg)
166+
}
167+
168+
128169
# Define expected columns based on whether y_source is specified
129170
if (is.null(y_source)) {
130171
expected_columns <- c("geo", "geo_name", "geo_source", "geo_year", "data_year", "x")

0 commit comments

Comments
 (0)