|
| 1 | +# Generated by `rjournal_pdf_article()` using `knitr::purl()`: do not edit by hand |
| 2 | +# Please edit RJ-2025-032.Rmd to modify this file |
| 3 | + |
| 4 | +## ----setup, include=FALSE----------------------------------------------------- |
| 5 | +knitr::opts_chunk$set( |
| 6 | + echo = TRUE, # show code by default |
| 7 | + warning = FALSE, # suppress warnings |
| 8 | + message = FALSE, # suppress messages |
| 9 | + fig.align = "center", # center figures |
| 10 | + fig.width = 6, # default figure width in inches |
| 11 | + fig.height = 4, # default figure height in inches |
| 12 | + dpi = 300, # high-res figures for PDF |
| 13 | + out.width = "100%", |
| 14 | + cache = TRUE |
| 15 | +) |
| 16 | +options(csurvey.multicore = FALSE) |
| 17 | +library(Matrix) |
| 18 | +library(data.table) |
| 19 | +library(coneproj) |
| 20 | +#library(foreign) |
| 21 | +library(tidyverse) |
| 22 | +library(csurvey) |
| 23 | +library(MASS) |
| 24 | +library(survey) |
| 25 | + |
| 26 | + |
| 27 | +## ----------------------------------------------------------------------------- |
| 28 | +library(csurvey) |
| 29 | +data(nhdat2, package = "csurvey") |
| 30 | +dstrat <- svydesign(ids = ~id, strata = ~str, data = nhdat2, weight = ~wt) |
| 31 | + |
| 32 | + |
| 33 | +## ----------------------------------------------------------------------------- |
| 34 | +ans <- csvy(chol ~ incr(age), design = dstrat, n.mix = 100) |
| 35 | + |
| 36 | + |
| 37 | +## ----------------------------------------------------------------------------- |
| 38 | +cat("CIC (constrained):", ans$CIC, "\n") |
| 39 | + |
| 40 | + |
| 41 | +## ----------------------------------------------------------------------------- |
| 42 | +cat("CIC (unconstrained):", ans$CIC.un, "\n") |
| 43 | + |
| 44 | + |
| 45 | +## ----------------------------------------------------------------------------- |
| 46 | +cat(svycontrast(ans, list(avg = c(rep(-1, 13)/13, rep(1, 12)/12))), "\n") |
| 47 | + |
| 48 | + |
| 49 | +## ----nh1big, fig.cap="Estimates of average cholesterol level for 25 ages, with 95% confidence intervals, for a stratified sample in the R dataset `nhdat2`, $n=1933$.", fig.align='center', echo=FALSE---- |
| 50 | +knitr::include_graphics("figures/nhanes1.png") |
| 51 | + |
| 52 | + |
| 53 | +## ----------------------------------------------------------------------------- |
| 54 | +set.seed(1) |
| 55 | +ans <- csvy(chol ~ incr(age)*incr(wcat)*icat, design = dstrat) |
| 56 | + |
| 57 | + |
| 58 | +## ----------------------------------------------------------------------------- |
| 59 | +domains <- data.frame(age = c(24, 35), wcat = c(2, 4), icat = c(2, 3)) |
| 60 | +pans <- predict(ans, newdata = domains, se.fit = TRUE) |
| 61 | +cat("Predicted values, confidence intervals and standard errors for specified domains:\n") |
| 62 | +print (pans) |
| 63 | + |
| 64 | + |
| 65 | +## ----nh2, fig.cap="Constrained estimates of population domain means for 400 domains in a 25x4x4 grid. The increasing population domain estimates for the 25 ages are shown within the waist size and income categories. The blue bands indicate 95% confidence intervals for the population domain means, with two specific domains, namely, (age, waist, income) = (24, 2, 2) and (35, 4, 3) marked in red. Empty domains are marked with a red 'x' sign.", fig.align='center', echo=FALSE---- |
| 66 | +knitr::include_graphics("figures/nhanes_grid3.png") |
| 67 | + |
| 68 | + |
| 69 | +## ----nh2un, fig.cap="Unconstrained estimates of population domain means for 400 domains in a 25x4x4 grid. The population domain estimates for the 25 ages are shown within the waist size and income categories. The green bands indicate 95% confidence intervals for the population domain means. Empty domains are marked with a red 'x' sign.", fig.align='center', echo=FALSE---- |
| 70 | +knitr::include_graphics("figures/nhanes_grid3_un.png") |
| 71 | + |
| 72 | + |
| 73 | +## ----surface9, fig.cap="Estimates of average log(salary) by field of study and year of degree, for observations where highest degree is a Bachelor's, for each of the nine regions.", fig.align='center', echo=FALSE, out.width="60%"---- |
| 74 | +knitr::include_graphics("figures/new_surfaces9.png") |
| 75 | + |
| 76 | + |
| 77 | +## ----NEreg, fig.cap="Estimates of average log(salary) for the 75 domains in each of three regions. The blue dots represent the constrained domain mean estimates, while the grey dots represent the unconstrained domain mean estimates. The blue band is the 95% confidence interval for the domains, using the constraints; the grey band is the 95% unconstrained domain mean confidence interval.", fig.align='center', echo=FALSE, out.width="100%"---- |
| 78 | +knitr::include_graphics("figures/newplot4.png") |
| 79 | + |
| 80 | + |
| 81 | +## ----test, fig.cap="Estimates of average log(salary) by father's education level, for each of five regions and four fields, for subjects whose degree was attained in 2016-2017. The solid blue lines connect the estimates where the average salary is constrained to be increasing in father's education, and the solid red lines connect unconstrained estimates of average salary.", fig.align='center', echo=FALSE, out.width="100%"---- |
| 82 | +knitr::include_graphics("figures/daded.png") |
| 83 | + |
| 84 | + |
| 85 | +## ----comppv, echo=FALSE, results='asis'--------------------------------------- |
| 86 | +library(knitr) |
| 87 | +library(kableExtra) |
| 88 | + |
| 89 | +years <- c("2008-09","2010-11","2012-13","2014-15","2016-17","2018-19") |
| 90 | +vals <- matrix( |
| 91 | + c(".008", "n/a", |
| 92 | + "<.001", ".018", |
| 93 | + "<.001", "<.001", |
| 94 | + "<.001", "n/a", |
| 95 | + ".003", ".417", |
| 96 | + "<.001", "n/a"), |
| 97 | + nrow = 1, byrow = TRUE |
| 98 | +) |
| 99 | +df <- as.data.frame(vals, stringsAsFactors = FALSE) |
| 100 | +colnames(df) <- rep(c("one", "two"), length(years)) |
| 101 | + |
| 102 | +kable(df, booktabs = TRUE, |
| 103 | + caption = "One-sided and two-sided $p$-values for the test of the null hypothesis that salary is constant in father's education level. The two-sided test results in n/a when the grid has at least one empty domain.", |
| 104 | + escape = TRUE) %>% |
| 105 | + add_header_above(setNames(rep(2, length(years)), years)) %>% |
| 106 | + kable_styling(latex_options = c("hold_position")) |
| 107 | + |
| 108 | + |
| 109 | +## ----------------------------------------------------------------------------- |
| 110 | +load("./nscg19_2.rda") |
| 111 | +data <- nscg2 |> |
| 112 | + dplyr::filter(hd_year %in% c(2008, 2009)) |
| 113 | + |
| 114 | +rds <- svrepdesign(data = data, repweights = dplyr::select(data, "RW0001":"RW0320"), weights = ~w, |
| 115 | + combined.weights = TRUE, mse = TRUE, type = "other", |
| 116 | + scale = 1, rscale = 0.05) |
| 117 | + |
| 118 | +set.seed(1) |
| 119 | +ans <- csvy(logSalary ~ incr(daded) * field * region, design = rds, test = TRUE) |
| 120 | + |
| 121 | + |
| 122 | +## ----eval=T------------------------------------------------------------------- |
| 123 | +summary(ans) |
| 124 | + |
| 125 | + |
| 126 | +## ----------------------------------------------------------------------------- |
| 127 | +data(nhdat, package = "csurvey") |
| 128 | +dstrat <- svydesign(ids = ~ id, strata = ~ str, data = nhdat, weight = ~ wt) |
| 129 | +set.seed(1) |
| 130 | +ans <- csvy(chol ~ incr(age) * incr(wcat) * gender, design = dstrat, |
| 131 | + family = binomial(link = "logit"), test = TRUE) |
| 132 | + |
| 133 | + |
| 134 | +## ----------------------------------------------------------------------------- |
| 135 | +summary(ans) |
| 136 | + |
| 137 | + |
| 138 | +## ----eval=FALSE, echo=FALSE--------------------------------------------------- |
| 139 | +# ctl <- list(angle = 0, x1size = 2, x2size = 2, x1lab = "waist", x2_labels = c("male", "female"), |
| 140 | +# subtitle.size=6) |
| 141 | +# plot(ans, x1 = "wcat", x2 = "gender", type="both", control = ctl) |
| 142 | + |
| 143 | + |
| 144 | +## ----nhanesbin, fig.cap="Estimates of probability of high cholesterol level for each combination of age, waist and gender. The blue dots represent the constrained domain mean estimates, while the green dots represent the unconstrained domain mean estimates. The blue band is the 95% confidence interval for the domains, using the constraints; the green band is the 95% unconstrained domain mean confidence interval.", fig.align='center', echo=FALSE---- |
| 145 | +knitr::include_graphics("figures/nhanes_bin.png") |
| 146 | + |
0 commit comments