Skip to content

Commit d17999b

Browse files
authored
Merge pull request #334 from USEPA/326-add-zscore-back-into-tcplVarMat
326 add zscore matrix back to tcplVarMat
2 parents a9f92ed + 2c153a3 commit d17999b

File tree

2 files changed

+60
-22
lines changed

2 files changed

+60
-22
lines changed

Diff for: R/tcplVarMat.R

+36-15
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
#'
1010
#' @param dsstox_substance_id Integer, chemical ID values to subset on
1111
#' @param aeid Integer, assay endpoint ID values to subset on
12+
#' @param std.vars Character, standard set of matrices; use this parameter to
13+
#' subset this list
1214
#' @param add.vars Character, mc4 or mc5 field(s) not included in the standard
1315
#' list to add additional matrices
1416
#' @param flag Integer or Logical of length 1, passed to
1517
#' \code{\link{tcplSubsetChid}}
18+
#' @param cyto.pars List, named list of arguments passed to
19+
#' \code{\link{tcplCytoPt}} for z-score matrix
1620
#'
1721
#' @details
1822
#' The \code{tcplVarMat} function is used to create chemical by assay matrices
@@ -27,9 +31,12 @@
2731
#' winning model.
2832
#' \item "acc_verbose" -- The ACC for the winning model, with text describing
2933
#' some situations.
30-
#' \item "mc_hitc" -- The hit-call for the winning model in
34+
#' \item "hitc_mc" -- The hit-call for the winning model in
3135
#' multiple-concentration (mc) screening.
32-
#' \item "sc_hitc" -- The hit-call in single concentration (sc) screening.
36+
#' \item "hitc_sc" -- The hit-call in single concentration (sc) screening.
37+
#' \item "zscore" -- The z-score based on the output from \code{tcplCytoPt}.
38+
#' The formula used for calculating the z-score is
39+
#' \eqn{-(\mathit{ac50} - \mathit{cyto\_pt})/\mathit{global\_mad}}
3340
#' }
3441
#'
3542
#' \code{tcplVarMat} produces matrices of combined sc-mc output. For the ac50
@@ -57,10 +64,14 @@
5764
#'
5865
#' When more than one sample is included for a chemical/assay pair,
5966
#' \code{tcplVarMat} aggregates multiple samples to a chemical level call
60-
#' utilizing \code{\link{tcplSubsetChid}}. The input
61-
#' for the \code{tcplVarMat} 'flag' parameter is passed to the
62-
#' \code{tcplSubsetChid} call and used to parse down the data to create the
63-
#' matrices.
67+
#' utilizing \code{\link{tcplSubsetChid}}. The tcplVarMat function calls both
68+
#' \code{tcplSubsetChid} and \code{tcplCytoPt} (which separately calls
69+
#' \code{tcplSubsetChid}). The input for the \code{tcplVarMat} 'flag' parameter
70+
#' is passed to the \code{tcplSubsetChid} call and used to parse down the data
71+
#' to create the matrices. The \code{tcplSubsetChid} called within \code{tcplCytoPt}
72+
#' (to parse down the cytotoxicity data used to define the "zscore" matrix) can
73+
#' be modified by passing a separate 'flag' element in the list defined by the
74+
#' 'cyto.pars' parameter.
6475
#'
6576
#' @return A list of chemical by assay matrices (data.tables) where the
6677
#' rows are given by the dsstox_substance_id and corresponding chnm (chemical
@@ -76,6 +87,7 @@
7687
#' dtxsid <- c("DTXSID4034653", "DTXSID2032683", "DTXSID6032358",
7788
#' "DTXSID0032651", "DTXSID8034401")
7889
#' varmat <- tcplVarMat(aeid = aeids, dsstox_substance_id = dtxsid)
90+
#' varmat <- tcplVarMat(aeid = aeids, std.vars = c("ac50", "zscore"))
7991
#' varmat <- tcplVarMat(aeid = aeids, add.vars = c("m4id", "resp_max", "max_med"))
8092
#'
8193
#' ## To save output to file
@@ -92,10 +104,12 @@
92104

93105
tcplVarMat <- function(dsstox_substance_id = NULL,
94106
aeid = NULL,
107+
std.vars = c("ac50", "ac50_verbose", "acc", "acc_verbose", "hitc_mc", "hitc_sc", "zscore"),
95108
add.vars = NULL,
96-
flag = TRUE) {
109+
flag = TRUE,
110+
cyto.pars = list()) {
97111
#variable binding
98-
hitc <- aenm <- chnm <- NULL
112+
hitc <- aenm <- chnm <- zscore <- chid <- cyto_pt <- global_mad <- actc <- ac50 <- NULL
99113

100114
# check input
101115
if (!is.null(aeid) & !is.vector(aeid)) stop("'aeid' must be a vector.")
@@ -107,9 +121,7 @@ tcplVarMat <- function(dsstox_substance_id = NULL,
107121

108122
if (!all(add.vars %in% valid_var)) stop("Invald add.vars value(s).")
109123

110-
ac50str = ifelse(check_tcpl_db_schema(),"ac50","modl_ga")
111-
112-
std.vars <- c(ac50str, paste0(ac50str, "_verbose"), "acc", "acc_verbose", "hitc", "hitc.y")
124+
std.vars[std.vars == "ac50"] = ifelse(check_tcpl_db_schema(),"ac50","modl_ga")
113125
vars <- c(std.vars, add.vars)
114126

115127
## Load all possibilities to create matrix dimensions
@@ -157,8 +169,15 @@ tcplVarMat <- function(dsstox_substance_id = NULL,
157169
# subset to one sample per chemical
158170
mc5 <- tcplSubsetChid(dat = mc5, flag = flag)
159171

172+
# run tcplCytoPt
173+
if (is.null(cyto.pars)) cyto.pars <- list()
174+
zdst <- do.call(what = tcplCytoPt, args = cyto.pars)
175+
mc5 <- merge(zdst[, list(chid,cyto_pt,global_mad)], mc5, by = "chid")
176+
mc5[actc == TRUE, zscore := -(log10(ac50) - cyto_pt)/global_mad]
177+
mc5[actc == FALSE, zscore := NA]
178+
160179
# build matrices
161-
mc5 <- mc5[hitc %in% c(0,-1), c("ac50", "acc") := 1e6]
180+
mc5 <- mc5[actc == FALSE, c("ac50", "acc") := 1e6]
162181
long_sc2 <- sc2 |> group_by(dsstox_substance_id,aenm,chnm)
163182
if (nrow(long_sc2) > 0) {
164183
long_sc2 <- long_sc2 |> summarise(hitc = max(hitc)) |> filter(!is.na(dsstox_substance_id))
@@ -169,9 +188,11 @@ tcplVarMat <- function(dsstox_substance_id = NULL,
169188
var <- sub("_verbose", "", var)
170189
verbose = TRUE
171190
}
191+
mc_var <- sub("\\_mc|\\_sc", "", var)
192+
if (!mc_var %in% colnames(mc5)) stop(paste(mc_var, "is not a valid column in mc4 or mc5."))
172193
long_mc5 <- mc5 |> group_by(dsstox_substance_id,aenm,chnm) |>
173-
summarise(across(all_of(sub("\\.y", "", var)), mean)) |> filter(!is.na(dsstox_substance_id))
174-
long_all <- long_mc5 |> full_join(long_sc2, by = c("dsstox_substance_id","aenm", "chnm"))
194+
summarise(across(all_of(mc_var), mean)) |> filter(!is.na(dsstox_substance_id))
195+
long_all <- long_mc5 |> full_join(long_sc2, suffix = c("_mc", "_sc"), by = c("dsstox_substance_id","aenm", "chnm"))
175196
long_res <- if (substr(var, 1, 2) == "ac") long_all |>
176197
mutate("{var}" := case_when(is.na(get(var)) && hitc == 0 ~ 1e8,
177198
is.na(get(var)) && hitc == 1 ~ 1e7,
@@ -188,7 +209,7 @@ tcplVarMat <- function(dsstox_substance_id = NULL,
188209

189210
mat_list <- lapply(vars, build_matrix)
190211

191-
names(mat_list) = c("ac50", "ac50_verbose", "acc", "acc_verbose", "mc_hitc", "sc_hitc", add.vars)
212+
names(mat_list) <- vars
192213

193214
mat_list
194215

Diff for: man/tcplVarMat.Rd

+24-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)