Skip to content

Commit 5bc2cc8

Browse files
committed
add smart duplicate checking
1 parent 56d1b1b commit 5bc2cc8

File tree

1 file changed

+48
-15
lines changed

1 file changed

+48
-15
lines changed

R/data.R

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#' * `x`: the value of the univariate variable.
2222
#' * `y` (optional): the value of the y variable (only included when `y_source` is provided).
2323
#'
24+
#' @importFrom rlang .data
2425
#' @export
2526
#'
2627
#' @examples
@@ -140,31 +141,63 @@ mi_data <- function(
140141
.groups = "drop"
141142
)
142143

143-
# Determine if any geo has multiple distinct values
144144
x_issue <- any(duplicate_issues$distinct_x > 1)
145145
y_issue <- if ("y" %in% names(response_data)) any(duplicate_issues$distinct_y > 1) else FALSE
146146

147+
# Only perform additional filter checking if duplicate geos exist
147148
if (x_issue || y_issue) {
148-
msg <- "The API returned duplicate values for some geographic regions. This likely indicates that not all necessary filters were specified for the data source(s)."
149+
# --- For the x variable ---
150+
missing_x_filters <- character(0)
149151
if (x_issue) {
150-
msg <- paste0(
151-
msg,
152-
"\n\nFor the 'x' variable: please check the 'x_filters' argument provided to mi_data() for the data source '", x_source, "'.",
153-
"\nYou can review the available filters by running:\n",
154-
" mi_source_filters(source_name = '", x_source, "', year = ", year, ", level = '", level, "')\n"
155-
)
152+
available_filters <- mi_source_filters(source_name = x_source, year = year, level = level)
153+
# Determine which filter fields have more than one option
154+
multi_option_fields <- available_filters %>%
155+
dplyr::group_by(field) %>%
156+
dplyr::summarise(n_options = dplyr::n_distinct(value), .groups = "drop") %>%
157+
dplyr::filter(n_options > 1) %>%
158+
dplyr::pull(field)
159+
# Only require filters for those fields with multiple options.
160+
missing_x_filters <- setdiff(multi_option_fields, names(x_filters))
156161
}
162+
163+
# --- For the y variable (if applicable) ---
164+
missing_y_filters <- character(0)
157165
if (y_issue) {
158-
msg <- paste0(
159-
msg,
160-
"\n\nFor the 'y' variable: please check the 'y_filters' argument provided to mi_data() for the data source '", y_source, "'.",
161-
"\nYou can review the available filters by running:\n",
162-
" mi_source_filters(source_name = '", y_source, "', year = ", year, ", level = '", level, "')\n"
163-
)
166+
available_y_filters <- mi_source_filters(source_name = y_source, year = year, level = level)
167+
multi_option_y_fields <- available_y_filters %>%
168+
dplyr::group_by(field) %>%
169+
dplyr::summarise(n_options = dplyr::n_distinct(value), .groups = "drop") %>%
170+
dplyr::filter(n_options > 1) %>%
171+
dplyr::pull(field)
172+
missing_y_filters <- setdiff(multi_option_y_fields, names(y_filters))
173+
}
174+
175+
# Only raise an error if any missing filter is found among fields with multiple options.
176+
if (length(missing_x_filters) > 0 || length(missing_y_filters) > 0) {
177+
msg <- "The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
178+
if (length(missing_x_filters) > 0) {
179+
msg <- paste0(
180+
msg,
181+
"\n\nFor the 'x' variable (source: '", x_source, "'):",
182+
"\n The following filter fields (with multiple available options) were not specified: ",
183+
paste(missing_x_filters, collapse = ", "),
184+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", x_source, "', year = ", year, ", level = '", level, "')"
185+
)
186+
}
187+
if (length(missing_y_filters) > 0) {
188+
msg <- paste0(
189+
msg,
190+
"\n\nFor the 'y' variable (source: '", y_source, "'):",
191+
"\n The following filter fields (with multiple available options) were not specified: ",
192+
paste(missing_y_filters, collapse = ", "),
193+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", y_source, "', year = ", year, ", level = '", level, "')"
194+
)
195+
}
196+
stop(msg)
164197
}
165-
stop(msg)
166198
}
167199

200+
168201

169202
# Define expected columns based on whether y_source is specified
170203
if (is.null(y_source)) {

0 commit comments

Comments
 (0)