Skip to content

Commit c1a6017

Browse files
authored
Variable population size update (#78)
* Variable pop size for simulate functions * Update GeoTox.print * Message format consistency * Ran devtools::document() * Added set_population function * Updated paper doi * Increment version number to 0.2.0.9001 * Updated NEWS * Added set_population to _pkgdown.yml * Updated documentation * Fix for devtools::check() note
1 parent f2b0809 commit c1a6017

26 files changed

+567
-155
lines changed

DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: GeoTox
22
Title: Spatiotemporal Mixture Risk Assessment
3-
Version: 0.2.0.9000
3+
Version: 0.2.0.9001
44
Authors@R: c(
55
person("Skylar", "Marvel", , "[email protected]", role = c("aut"),
66
comment = c(ORCID = "0000-0002-2971-9743")),
@@ -12,8 +12,8 @@ Authors@R: c(
1212
)
1313
Description: Connecting spatiotemporal exposure to individual and
1414
population-level risk via source-to-outcome continuum modeling. The package,
15-
methods, and case-studies are described in Messier, Reif, and Marvel (2024)
16-
<doi:10.1101/2024.09.23.24314096> and Eccles et al. (2023)
15+
methods, and case-studies are described in Messier, Reif, and Marvel (2025)
16+
<doi:10.1186/s40246-024-00711-8> and Eccles et al. (2023)
1717
<doi:10.1016/j.scitotenv.2022.158905>.
1818
License: MIT + file LICENSE
1919
URL: https://niehs.github.io/GeoTox/, https://github.com/NIEHS/GeoTox
@@ -23,6 +23,7 @@ Imports:
2323
dplyr,
2424
ggplot2,
2525
ggridges,
26+
purrr,
2627
rlang,
2728
sf,
2829
stats,
@@ -37,7 +38,6 @@ Suggests:
3738
httk,
3839
httr2,
3940
knitr,
40-
purrr,
4141
readr,
4242
readxl,
4343
rmarkdown,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export(sample_Css)
2626
export(sensitivity_analysis)
2727
export(set_boundaries)
2828
export(set_hill_params)
29+
export(set_population)
2930
export(simulate_age)
3031
export(simulate_exposure)
3132
export(simulate_inhalation_rate)

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# GeoTox (development version)
22

3+
* Added set_population() function.
4+
5+
* The simulate\_\* functions can now handle population sizes that vary
6+
across regions.
7+
38
# GeoTox 0.2.0
49

510
* Initial CRAN submission.

R/GeoTox.R

Lines changed: 88 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@
4141
#' # Plot exposure data
4242
#' plot(geoTox, type = "exposure", ncol = 5)
4343
#' # Plot response data
44-
#' plot(geoTox)
4544
#' plot(geoTox, assays = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
4645
#' # Plot sensitivity data
47-
#' plot(geoTox, type = "sensitivity")
48-
#' plot(geoTox, type = "sensitivity", assay = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
46+
#' plot(geoTox,
47+
#' type = "sensitivity",
48+
#' assay = "TOX21_H2AX_HTRF_CHO_Agonist_ratio")
4949
GeoTox <- function() {
5050
structure(
5151
list(
@@ -70,78 +70,104 @@ GeoTox <- function() {
7070
#' @export
7171
print.GeoTox <- function(x, ...) {
7272

73-
names_simulated <- c("age", "IR", "obesity", "C_ext", "C_ss")
74-
names_computed <- c("D_int", "C_invitro", "resp", "sensitivity")
75-
names_other <- setdiff(names(x),
76-
c(names_simulated, names_computed))
77-
78-
get_info <- function(names) {
79-
info <- lapply(names, \(name) {
80-
class <- dim <- ""
81-
if (is.null(x[[name]])) {
82-
return(data.frame(Name = name, Class = "", Dim = ""))
83-
}
84-
is_list <- inherits(x[[name]], "list")
85-
if (is_list && length(x[[name]]) > 0) {
86-
item <- x[[name]][[1]]
87-
} else if (!is_list) {
88-
item <- x[[name]]
89-
} else {
90-
item <- NULL
91-
}
92-
class <- class(item)
93-
if (any(c("matrix", "data.frame") %in% class)) {
94-
dim <- paste(dim(item), collapse = " x ")
95-
} else {
96-
dim <- length(item)
97-
}
98-
if (is_list) {
99-
dim <- paste0(length(x[[name]]), " x (", dim, ")")
100-
class <- paste0("list(", class[[1]], ")")
101-
} else {
102-
class <- paste(class, collapse = ", ")
103-
}
104-
data.frame(Name = name, Class = class, Dim = dim)
105-
})
106-
do.call(rbind, info)
107-
}
108-
109-
info_simulated <- get_info(names_simulated)
110-
info_simulated <- info_simulated[info_simulated$Class != "", , drop = FALSE]
111-
info_computed <- get_info(names_computed)
112-
info_computed <- info_computed[info_computed$Class != "", , drop = FALSE]
113-
114-
cat("GeoTox object\n")
73+
# Get n_assay and n_chem from GeoTox()$hill_params
11574
if (is.null(x$hill_params)) {
116-
n_assays <- 0
117-
n_chems <- 0
75+
n_assay <- 0
76+
n_chem <- 0
11877
} else {
11978
if ("assay" %in% names(x$hill_params)) {
120-
n_assays <- length(unique(x$hill_params$assay))
79+
n_assay <- length(unique(x$hill_params$assay))
12180
} else {
122-
n_assays <- 1
81+
n_assay <- 1
12382
}
12483
if ("chem" %in% names(x$hill_params)) {
125-
n_chems <- length(unique(x$hill_params$chem))
84+
n_chem <- length(unique(x$hill_params$chem))
12685
} else {
127-
n_chems <- 1
86+
n_chem <- 1
87+
}
88+
}
89+
90+
# Categorize different GeoTox() fields
91+
names_data_vec <- c("age", "IR", "obesity")
92+
names_data_mat <- c("C_ext", "C_ss")
93+
names_computed_mat <- c("D_int", "C_invitro")
94+
names_computed_df <- c("resp")
95+
names_computed_list <- c("sensitivity")
96+
names_other <- setdiff(names(x),
97+
c(names_data_vec, names_data_mat,
98+
names_computed_mat, names_computed_df,
99+
names_computed_list))
100+
101+
# Functions to get size info for each type of field
102+
# m = number of regions
103+
# n = population size
104+
get_info_vec <- function(name) {
105+
size <- ifelse(is.null(x[[name]]), "", "m * (n)")
106+
data.frame(Name = name, Size = size)
107+
}
108+
get_info_mat <- function(name) {
109+
size <- ""
110+
if (!is.null(x[[name]])) {
111+
dim <- dim(x[[name]][[1]])
112+
size <- paste0("m * (n x ", dim[2], ")")
113+
}
114+
data.frame(Name = name, Size = size)
115+
}
116+
get_info_df <- function(name) {
117+
size <- ""
118+
if (!is.null(x[[name]])) {
119+
dim <- dim(x[[name]][[1]])
120+
size <- paste0("m * (", n_assay, " * n x ", dim[2], ")")
128121
}
122+
data.frame(Name = name, Size = size)
129123
}
130-
cat("Assays: ", n_assays, "\n", sep = "")
131-
cat("Chemicals: ", n_chems, "\n", sep = "")
132-
if (nrow(info_simulated) > 0) {
133-
n_regions <- length(x[[info_simulated$Name[1]]])
134-
} else if (nrow(info_computed) > 0) {
135-
n_regions <- length(x[[info_computed$Name[1]]])
124+
get_info_list <- function(name) {
125+
size <- ""
126+
if (!is.null(x[[name]])) {
127+
n_list <- length(x[[name]])
128+
dim <- dim(x[[name]][[1]][[1]])
129+
size <- paste0(n_list, " * (m * (", n_assay, " * n x ", dim[2], "))")
130+
}
131+
data.frame(Name = name, Size = size)
132+
}
133+
134+
# Get size info for each type of field
135+
info_data <- dplyr::bind_rows(
136+
purrr::map(names_data_vec, \(name) get_info_vec(name)),
137+
purrr::map(names_data_mat, \(name) get_info_mat(name))) |>
138+
dplyr::filter(.data$Size != "")
139+
140+
info_computed <- dplyr::bind_rows(
141+
purrr::map(names_computed_mat, \(name) get_info_mat(name)),
142+
purrr::map(names_computed_df, \(name) get_info_df(name)),
143+
purrr::map(names_computed_list, \(name) get_info_list(name))) |>
144+
dplyr::filter(.data$Size != "")
145+
146+
# Get population size from GeoTox()$par$n
147+
if (is.null(x$par$n)) {
148+
n_pop <- 0
149+
} else if (length(unique(x$par$n)) == 1) {
150+
n_pop <- x$par$n[[1]]
136151
} else {
137-
n_regions <- 0
152+
n_pop <- paste0("[", paste(range(x$par$n), collapse = ", "), "]")
138153
}
139-
cat("Regions: ", n_regions, "\n", sep = "")
140-
cat("Population: ", x$par$n, "\n", sep = "")
154+
155+
# Get number of regions from potential data fields
156+
n_region <- purrr::map_int(c(names_data_vec, names_data_mat,
157+
names_computed_mat, names_computed_df),
158+
\(name) length(x[[name]])) |>
159+
max()
160+
161+
# Output info
162+
cat("GeoTox object\n")
163+
cat("Assays: ", n_assay, "\n", sep = "")
164+
cat("Chemicals: ", n_chem, "\n", sep = "")
165+
cat("Regions: m = ", n_region, "\n", sep = "")
166+
cat("Population: n = ", n_pop, "\n", sep = "")
141167
cat("Data Fields:")
142-
if (nrow(info_simulated) > 0) {
168+
if (nrow(info_data) > 0) {
143169
cat("\n")
144-
print(info_simulated, row.names = FALSE, print.gap = 2)
170+
print(info_data, row.names = FALSE, print.gap = 2)
145171
} else {
146172
cat(" None\n")
147173
}

R/set_population.R

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#' Set population data
2+
#'
3+
#' @param x GeoTox object.
4+
#' @param age numeric vector or list of numeric vectors of age values.
5+
#' @param obesity character vector or list of character vectors of obesity
6+
#' status.
7+
#'
8+
#' @return The same object with simulated fields added.
9+
#' @export
10+
#'
11+
#' @examples
12+
#' # Single region
13+
#' age <- round(runif(10, 1, 100))
14+
#' obesity <- sample(c("Normal", "Obese"), 10, replace = TRUE)
15+
#' geoTox <- set_population(GeoTox(), age = age, obesity = obesity)
16+
#'
17+
#' # Multiple regions
18+
#' age <- list("37001" = round(runif(10, 1, 100)),
19+
#' "37007" = round(runif(8, 1, 100)))
20+
#' obesity <- list("37001" = sample(c("Normal", "Obese"), 10, replace = TRUE),
21+
#' "37007" = sample(c("Normal", "Obese"), 8, replace = TRUE))
22+
#' geoTox <- set_population(GeoTox(), age = age, obesity = obesity)
23+
set_population <- function(x, age = NULL, obesity = NULL) {
24+
25+
set_age <- !is.null(age)
26+
set_obesity <- !is.null(obesity)
27+
28+
if (set_age) {
29+
age <- .check_types(age,
30+
c("numeric", "integer"),
31+
paste0("`age` must be a numeric vector or list of ",
32+
"numeric vectors"))
33+
}
34+
35+
if (set_obesity) {
36+
obesity <- .check_types(obesity,
37+
"character",
38+
paste0("`obesity` must be a character vector or ",
39+
"list of character vectors"))
40+
if (any(purrr::map_lgl(obesity,
41+
\(x) !all(x %in% c("Normal", "Obese"))))) {
42+
stop("`obesity` values must be 'Normal' or 'Obese'", call. = FALSE)
43+
}
44+
}
45+
46+
# Update population size
47+
if (set_age) n_age <- purrr::map_int(age, length)
48+
if (set_obesity) n_obesity <- purrr::map_int(obesity, length)
49+
if (set_age & set_obesity) {
50+
if (!identical(n_age, n_obesity)) {
51+
stop("Population sizes for `age` and `obesity` do not match",
52+
call. = FALSE)
53+
}
54+
x$par$n <- n_age
55+
} else if (set_age) {
56+
x$par$n <- n_age
57+
} else if (set_obesity) {
58+
x$par$n <- n_obesity
59+
}
60+
61+
# Set fields
62+
if (set_age) x$age <- age
63+
if (set_obesity) x$obesity <- obesity
64+
65+
# Clear downstream fields
66+
if (set_age & !is.null(x$IR)) {
67+
warning("Clearing `IR` field", call. = FALSE)
68+
x$IR <- NULL
69+
}
70+
if ((set_age | set_obesity) &
71+
!(is.null(x$C_ss) & is.null(x$css_sensitivity))) {
72+
warning("Clearing `C_ss` and `css_sensitivity` fields", call. = FALSE)
73+
x$C_ss <- NULL
74+
x$css_sensitivity <- NULL
75+
}
76+
77+
x
78+
}

R/simulate_age.R

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22
#'
33
#' @param x data frame or list of data frames containing population data for age
44
#' groups. Each data frame must contain columns "AGEGRP" and "TOT_POP".
5-
#' @param n simulated sample size.
5+
#' @param n simulated sample size(s).
66
#'
77
#' @details
88
#' Each data frame must contain 19 rows. The first row represents the total
99
#' population of all age groups while the next 18 rows represent age groups
1010
#' from 0 to 89 in increments of 5 years.
1111
#'
12+
#' The sample size can be either a single value or a vector the same length as
13+
#' the number of data frames in x. If a single value is provided, the same
14+
#' sample size is used for all data frames. If a vector is provided, each
15+
#' element corresponds to the sample size for each data frame in x.
16+
#'
1217
#' @return List of arrays containing simulated ages.
1318
#'
1419
#' @examples
@@ -25,25 +30,30 @@
2530
#' # set population total for all age groups
2631
#' y$TOT_POP[1] <- sum(y$TOT_POP)
2732
#' simulate_age(list(x = x, y = y), 15)
33+
#' # different sample sizes
34+
#' simulate_age(list(x = x, y = y), c(15, 10))
2835
#'
2936
#' @export
3037
simulate_age <- function(x, n = 1e3) {
3138

32-
if (!any(c("data.frame", "list") %in% class(x))) {
33-
stop("x must be a data.frame or list")
34-
}
35-
36-
if (is.data.frame(x)) x <- list(x)
39+
x <- .check_types(x,
40+
"data.frame",
41+
"`x` must be a data frame or list of data frames")
3742

3843
if (.check_names(x, c("AGEGRP", "TOT_POP"))) {
39-
stop("x data frames must contain columns 'AGEGRP' and 'TOT_POP'")
44+
stop("`x` data frames must contain columns 'AGEGRP' and 'TOT_POP'")
45+
}
46+
47+
if (any(unlist(lapply(x, \(y) nrow(y) != 19)))) {
48+
stop("`x` data frames must contain 19 rows")
4049
}
4150

42-
if (any(unlist(lapply(x, function(y) nrow(y) != 19)))) {
43-
stop("x data frames must contain 19 rows")
51+
if (!(length(n) == 1 | length(n) == length(x))) {
52+
stop("`n` must be a single value or a vector with values for ",
53+
"each data frame in `x`")
4454
}
4555

46-
lapply(x, function(df) .simulate_age(df, n))
56+
purrr::pmap(list(x, n), \(df, n) .simulate_age(df, n))
4757

4858
}
4959

0 commit comments

Comments
 (0)