Skip to content

Commit 629a33c

Browse files
Issue 1641 survey::olr bugs (#1642)
* Fix survey comparisons NA filtering and bump version * survey matching intercept names for ord
1 parent 915178f commit 629a33c

File tree

6 files changed

+45
-6
lines changed

6 files changed

+45
-6
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: marginaleffects
22
Title: Predictions, Comparisons, Slopes, Marginal Means, and Hypothesis Tests
3-
Version: 0.31.0.3
3+
Version: 0.31.0.4
44
Authors@R:
55
c(person(given = "Vincent",
66
family = "Arel-Bundock",

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ Bugs:
88

99
* Error when merging the original data back into `comparisons()` when the data includes some list columns. The problem is that `data.table` does not support that column type. We now return the original data.table error as a warning, and do not merge the data back. Thanks to @raffaem for report #1638.
1010
* Improve warning message for hypothesis string order. Thanks to @zakarydraper for report #1640.
11+
* `comparisons()` with survey-weighted ordinal regressions failed because `stats::na.omit()` discarded every row when auxiliary columns were all `NA`, and the remaining objects fell out of alignment. We now filter using a shared index so hi/lo predictions, weights, and posterior draws stay synchronized.
12+
* `set_coef.svyolr()` did not recognize thresholds named like `"Intercept: 1|2"`, so delta-method perturbations replaced all cutpoints with `NA` and SEs vanished for `comparisons()`/`avg_*()` on survey ordinal models. Threshold names are now matched with or without the `"Intercept:"` prefix.
1113

1214
## 0.31.0
1315

R/get_comparisons.R

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ compare_hi_lo_bayesian <- function(out, draws, draws_hi, draws_lo, draws_or, by,
298298
# drop missing otherwise get_averages() fails when trying to take a
299299
# simple mean
300300
idx_na <- !is.na(out$predicted_lo)
301-
out <- stats::na.omit(out, cols = "predicted_lo")
301+
# Build a single index and reuse it so `out` and the posterior draws remain aligned
302+
# while removing rows with missing `predicted_lo`.
303+
out <- out[idx_na]
302304

303305
# TODO: performance is probably terrrrrible here, but splitting is
304306
# tricky because grouping rows are not always contiguous, and the order
@@ -366,7 +368,11 @@ compare_hi_lo_bayesian <- function(out, draws, draws_hi, draws_lo, draws_or, by,
366368

367369

368370
compare_hi_lo_frequentist <- function(out, idx, cross, variables, fun_list, elasticities, newdata) {
369-
out <- stats::na.omit(out, cols = "predicted_lo")
371+
# When survey weights add all-NA aux columns, stats::na.omit() (which ignores
372+
# `cols`) was zeroing out `out`. Build one index and reuse it so downstream
373+
# operations rely on the same filtered rows.
374+
idx_pred <- !is.na(out$predicted_lo)
375+
out <- out[idx_pred]
370376
# We want to write the "estimate" column in-place because it safer
371377
# than group-merge; there were several bugs related to this in the past.
372378
# safefun() returns 1 value and NAs when the function retunrs a
@@ -402,13 +408,16 @@ compare_hi_lo_frequentist <- function(out, idx, cross, variables, fun_list, elas
402408
out <- subset(out, select = idx)
403409
}
404410
}
405-
out <- stats::na.omit(out, cols = "estimate")
411+
out <- out[!is.na(estimate)]
406412

407413
return(out)
408414
}
409415

410416

411417
compare_hi_lo <- function(hi, lo, y, n, term, cross, wts, tmp_idx, newdata, variables, fun_list, elasticities) {
418+
if (n == 0 || length(hi) == 0) {
419+
return(numeric(0))
420+
}
412421
tn <- term[1]
413422
eps <- variables[[tn]]$eps
414423
# when cross=TRUE, sanitize_comparison enforces a single function

R/methods_survey.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,15 @@ set_coef.svyolr <- function(model, coefs, ...) {
1616
# in basic model classes coefficients are named vector
1717
idx <- match(names(model$coefficients), names(coefs))
1818
model[["coefficients"]] <- coefs[idx]
19-
idx <- match(names(model$zeta), names(coefs))
19+
20+
# thresholds can be named either "1|2" or "Intercept: 1|2"
21+
zeta_names <- names(model$zeta)
22+
idx <- match(zeta_names, names(coefs))
23+
if (anyNA(idx)) {
24+
zeta_alt <- paste("Intercept:", zeta_names)
25+
idx_alt <- match(zeta_alt, names(coefs))
26+
idx <- ifelse(is.na(idx), idx_alt, idx)
27+
}
2028
model[["zeta"]] <- coefs[idx]
2129
model
2230
}

R/sanitize_newdata.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ add_wts_column <- function(wts, newdata, model) {
184184
flag2 <- isTRUE(checkmate::check_numeric(wts, len = nrow(newdata)))
185185
if (!flag1 && !flag2) {
186186
msg <- sprintf(
187-
"The `wts` argument must be a numeric vector of length %s, or a string which matches a column name in `newdata`. If you did not supply a `newdata` explicitly, `marginaleffects` extracted it automatically from the model object, and the `wts` variable may not have been available. The easiest strategy is often to supply a data frame such as the original data to `newdata` explicitly, and to make sure that it includes an appropriate column of weights, identified by the `wts` argument.",
187+
"The `wts` argument must be a numeric vector of length %s, or a string which matches one of the `colnames()` in the data frame that you supplied to the `newdata`, or in the `marginaleffects` objects.",
188188
nrow(newdata)
189189
)
190190
stop(msg, call. = FALSE)

inst/tinytest/test-pkg-survey.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,23 @@ m <- suppressWarnings(svyglm(
5858
))
5959
cmp <- avg_comparisons(m, variables = "education", by = c("ban", "gender"), wts = "weights", hypothesis = ~reference)
6060
expect_false(anyNA(cmp$estimate))
61+
62+
# svyolr delta-method standard errors
63+
set.seed(1234)
64+
n <- 400
65+
z <- rbinom(n, 1, 0.5)
66+
x <- factor(sample(c("1", "2", "3"), n, replace = TRUE))
67+
beta_x <- c("1" = 0, "2" = 0.4, "3" = -0.3)
68+
beta_z <- 0.6
69+
eta <- beta_x[x] + beta_z * z + rlogis(n)
70+
cuts <- c(-1.5, -0.5, 0.5, 1.5)
71+
y <- cut(eta, breaks = c(-Inf, cuts, Inf), labels = 1:5, ordered_result = TRUE)
72+
weights <- rlnorm(n, meanlog = log(50000), sdlog = 1)
73+
design <- svydesign(ids = ~1, weights = ~weights, data = data.frame(y, x, z, weights))
74+
ord_svy <- svyolr(y ~ x + z, method = "logistic", design = design)
75+
cmp <- comparisons(ord_svy, wts = "(weights)")
76+
expect_true(any(!is.na(cmp$std.error)))
77+
avg_cmp <- avg_comparisons(ord_svy, wts = "(weights)")
78+
expect_true(any(!is.na(avg_cmp$std.error)))
79+
avg_pred <- avg_predictions(ord_svy, wts = "(weights)")
80+
expect_true(any(!is.na(avg_pred$std.error)))

0 commit comments

Comments
 (0)