Skip to content

Commit a09fd5e

Browse files
authored
Merge pull request #325 from USEPA/311-update-tcplPlot-for-loec
Refactored tcplPlot and auxiliary functions to support any number of curves on a comparison plot
2 parents 0aad3da + 6b4364f commit a09fd5e

35 files changed

+3012
-1204
lines changed

Diff for: DESCRIPTION

+6-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ Imports:
4242
gridExtra,
4343
stringr,
4444
rlang,
45-
ctxR (>= 1.1.0)
45+
ctxR (>= 1.1.0),
46+
viridis,
47+
gt
4648
Suggests:
4749
roxygen2,
4850
knitr,
@@ -51,16 +53,16 @@ Suggests:
5153
htmlTable,
5254
testthat (>= 3.0.0),
5355
reshape2,
54-
viridis,
5556
kableExtra,
5657
colorspace,
5758
magrittr,
5859
vdiffr,
5960
httptest,
60-
rmdformats
61+
rmdformats,
62+
gtable
6163
License: MIT + file LICENSE
6264
LazyData: true
63-
RoxygenNote: 7.3.1
65+
RoxygenNote: 7.3.2
6466
VignetteBuilder: knitr
6567
Encoding: UTF-8
6668
Config/testthat/edition: 3

Diff for: NAMESPACE

+7
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export(tcpldbStats)
6161
import(DBI)
6262
import(data.table)
6363
import(gridExtra)
64+
import(gt)
6465
import(stringr)
6566
importFrom(RColorBrewer,brewer.pal)
6667
importFrom(RMariaDB,MariaDB)
@@ -85,10 +86,12 @@ importFrom(dplyr,left_join)
8586
importFrom(dplyr,matches)
8687
importFrom(dplyr,mutate)
8788
importFrom(dplyr,mutate_if)
89+
importFrom(dplyr,n)
8890
importFrom(dplyr,pull)
8991
importFrom(dplyr,rowwise)
9092
importFrom(dplyr,select)
9193
importFrom(dplyr,summarise)
94+
importFrom(dplyr,summarize)
9295
importFrom(dplyr,tibble)
9396
importFrom(dplyr,ungroup)
9497
importFrom(ggplot2,aes)
@@ -103,6 +106,7 @@ importFrom(ggplot2,geom_vline)
103106
importFrom(ggplot2,ggplot)
104107
importFrom(ggplot2,ggsave)
105108
importFrom(ggplot2,guide_legend)
109+
importFrom(ggplot2,is.ggplot)
106110
importFrom(ggplot2,labs)
107111
importFrom(ggplot2,margin)
108112
importFrom(ggplot2,scale_color_manual)
@@ -157,6 +161,8 @@ importFrom(stats,median)
157161
importFrom(stats,optim)
158162
importFrom(stats,quantile)
159163
importFrom(stats,setNames)
164+
importFrom(stringr,str_count)
165+
importFrom(stringr,str_trunc)
160166
importFrom(tcplfit2,tcplfit2_core)
161167
importFrom(tcplfit2,tcplhit2_core)
162168
importFrom(tidyr,pivot_longer)
@@ -169,3 +175,4 @@ importFrom(utils,read.csv)
169175
importFrom(utils,read.table)
170176
importFrom(utils,tail)
171177
importFrom(utils,write.table)
178+
importFrom(viridis,viridis)

Diff for: R/mc5.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ mc5 <- function(ae, wr = FALSE) {
3333
modl_prob <- cnst_prob <- hill_er <- modl_tp <- modl_ga <- modl_gw <- NULL
3434
hill_rmse <- hill_prob <- modl_acb <- modl_acc <- gnls_er <- modl_la <- NULL
3535
gnls_la <- modl_lw <- gnls_lw <- gnls_rmse <- gnls_prob <- actp <- NULL
36-
modl_ac10 <- model_type <- NULL
36+
modl_ac10 <- model_type <- conc <- loec_hitc <- NULL
3737

3838
overwrite_osd <- FALSE
3939

Diff for: R/tcplLoadData.R

-3
Original file line numberDiff line numberDiff line change
@@ -218,14 +218,11 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
218218

219219
if (length(colnames(dat))) {
220220
if (lvl == 3 | lvl == "agg") {
221-
dat$resp <- lapply(dat$resp, unlist)
222-
dat$logc <- lapply(dat$logc, unlist)
223221
if (lvl == 3) dat <- unnest_longer(dat, c(conc, logc, resp)) %>% as.data.table()
224222
else dat <- unnest_longer(dat, c(logc, resp)) %>% as.data.table()
225223
}
226224

227225
if (lvl == 6) {
228-
dat$flag <- lapply(dat$flag, unlist)
229226
dat <- unnest_longer(dat, flag) %>% filter(flag != "NULL") %>% as.data.table()
230227
}
231228
}

Diff for: R/tcplPlot.R

+276-772
Large diffs are not rendered by default.

Diff for: R/tcplPlotLoadData.R

+45-10
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,14 @@
5959
#' @seealso \code{\link{tcplPlot}}
6060
#'
6161
#' @import data.table
62+
#' @importFrom dplyr mutate group_by summarize rowwise
63+
#' @importFrom stringr str_count
6264
#' @export
6365
tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
64-
#variable binding
65-
lvl <- m4id <- conc <- resp <- conc_unit <- NULL
66+
67+
# variable binding for R CMD check
68+
lvl <- m4id <- s2id <- conc <- resp <- conc_unit <- flag_count <- NULL
69+
bmd <- top <- bmr <- model_type <- coff <- max_med <- hitc <- NULL
6670

6771
# Validate vars based on some assumed properties
6872
validated_vars <- tcplPlotValidate(type = type,flags = flags)
@@ -73,13 +77,6 @@ tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
7377
dat <- tcplLoadData(lvl = lvl, fld = fld, val = val, type = type)
7478
if (nrow(dat) == 0) stop("No data for fld/val provided")
7579

76-
# set order to given order
77-
dat <- dat[order(match(get(fld[1]), if(is.list(val)) val[[1]] else val))]
78-
if (getOption("TCPL_DRVR") == "API" && tolower(fld) == "aeid") {
79-
dat <- dat %>% arrange(m4id)
80-
}
81-
dat$order <- 1:nrow(dat)
82-
8380
mcLoadDat <- function(m4id = NULL,flags) {
8481
l4 <- tcplLoadData(lvl = 4, fld = "m4id", val = m4id, add.fld = T)
8582
dat <- l4[dat, on = "m4id"]
@@ -103,8 +100,10 @@ tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
103100
if (getOption("TCPL_DRVR") != "API") {
104101
if (type == "mc") {
105102
dat <- mcLoadDat(dat$m4id,flags = flags)
103+
setorder(dat, m4id)
106104
agg <- tcplLoadData(lvl = "agg", fld = "m4id", val = dat$m4id)
107105
} else { # type == 'sc'
106+
setorder(dat, s2id)
108107
agg <- tcplLoadData(lvl = "agg", fld = "s2id", val = dat$s2id, type = "sc")
109108
}
110109

@@ -113,11 +112,15 @@ tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
113112

114113
#determine if we're single conc or multiconc based on dat
115114
join_condition <- c("m4id","s2id")[c("m4id","s2id") %in% colnames(dat)]
116-
conc_resp_table <- agg %>% group_by(.data[[join_condition]]) %>% summarise(conc = list(conc), resp = list(resp)) %>% as.data.table()
115+
conc_resp_table <- agg %>% group_by(.data[[join_condition]]) %>% summarize(conc = list(conc), resp = list(resp)) %>% as.data.table()
117116
dat <- dat[conc_resp_table, on = join_condition]
118117

118+
# get chemical and sample information
119119
dat <- tcplPrepOtpt(dat)
120120

121+
# determine missing chem info and replace with string description of well type(s)
122+
dat <- tcplPlotLoadWllt(dat, type)
123+
121124
} else {
122125
# fix flags from API for plotting
123126
if (flags == TRUE) {
@@ -129,6 +132,12 @@ tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
129132
dat$conc_unit <- dat$tested_conc_unit
130133
}
131134

135+
# add flag_count
136+
if (flags == TRUE) {
137+
dat$flag_count <- 0
138+
dat[flag != "None", flag_count := stringr::str_count(flag, "\n") + 1]
139+
}
140+
132141
# add normalized data type for y axis
133142
ndt <- tcplLoadAeid(fld = "aeid", val = dat$aeid, add.fld = "normalized_data_type")
134143
dat <- dat[ndt, on = "aeid"]
@@ -138,5 +147,31 @@ tcplPlotLoadData <- function(type = "mc", fld = "m4id", val, flags = FALSE){
138147
dat <- dat[conc_unit=="uM", conc_unit:="\u03BCM"]
139148
dat <- dat[conc_unit=="mg/l", conc_unit:="mg/L"]
140149

150+
# remove zero width space if exists
151+
dat$chnm <- gsub("\u200b", "", dat$chnm)
152+
153+
# replace null bmd in dat table
154+
dat <- dat[is.null(dat$bmd), bmd:=NA]
155+
156+
#replace null top with 0
157+
dat[is.null(dat$top), top := 0]
158+
dat[is.na(top), top := 0]
159+
160+
# correct bmr and coff direction
161+
if (type == "mc") {
162+
# if top if less than 0, flip bmr no matter what
163+
dat[top < 0, bmr := bmr * -1]
164+
# if model type is loss, flip cut off
165+
dat[model_type == 4, coff := coff * -1]
166+
# if model type is bidirectional, flip cut off if top is less than 0
167+
dat[model_type == 2 & top < 0, coff := coff * -1]
168+
} else { # sc
169+
# if max median is less than 0, flip cut off to align with it
170+
dat[max_med < 0, coff := coff * -1]
171+
# if hitc is less than 0, max median is in the opposite of intended direction,
172+
# so flip cut off (possibly again)
173+
dat[hitc < 0, coff := coff * -1]
174+
}
175+
141176
dat
142177
}

Diff for: R/tcplPlotUtils.R

+115-9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,46 @@
1+
#' tcplPlotLoadWllt
2+
#' Replaces NA dtxsid and chnm with a string description of the sample's well type(s)
3+
#'
4+
#' @param dat dataset
5+
#' @param type mc or sc
6+
#'
7+
#' @return dat with updated dtxsid/chnm if they are NA
8+
#' @import data.table
9+
#' @importFrom dplyr group_by summarize select left_join case_when
10+
tcplPlotLoadWllt <- function(dat = NULL, type = "mc") {
11+
12+
# variable binding for R CMD check
13+
dsstox_substance_id <- chnm <- aeid <- acid <- wllt <- wllt_desc <- NULL
14+
15+
# determine missing chemical info
16+
missing <- dat[is.na(dsstox_substance_id) | is.na(chnm), .(spid, aeid)]
17+
if (nrow(missing) > 0) {
18+
acid_map <- tcplLoadAcid(fld = "aeid", val = missing$aeid)[,.(aeid,acid)]
19+
missing <- missing[acid_map, on="aeid"]
20+
l0_dat <- tcplLoadData(type = type, lvl = 0, fld = list("spid","acid"),
21+
list(missing$spid, missing$acid))
22+
23+
# replace with string describing the well type(s)
24+
wllts <- l0_dat |> select(acid, spid, wllt) |> group_by(acid, spid) |>
25+
summarize(wllt_desc = dplyr::case_when(
26+
all(unique(wllt) %in% c('c', 'p')) ~ "Gain-of-signal control",
27+
all(unique(wllt) %in% c('m', 'o')) ~ "Loss-of-signal control",
28+
all(unique(wllt) == 'n') ~ "Neutral/negative control",
29+
all(unique(wllt) == 'b') ~ "Blank",
30+
all(unique(wllt) == 'v') ~ "Viability control",
31+
all(!is.na(unique(wllt))) && all(!is.null(unique(wllt))) ~ paste0("Well type: ", paste(unique(wllt), collapse = ", ")),
32+
.default = NA
33+
), .groups="drop") |> left_join(acid_map, by = "acid") |> as.data.table()
34+
no_wllt <- wllts[is.na(wllt_desc), spid]
35+
if (length(no_wllt)) warning(paste0("wllt for SPID(s): ", paste(no_wllt, collapse = ", "), " missing. Leaving dsstox_substance_id and chnm as NA."))
36+
dat <- left_join(dat, wllts, by = c("aeid", "spid"))
37+
dat[is.na(dsstox_substance_id) | is.na(chnm), c("dsstox_substance_id", "chnm") := wllt_desc]
38+
}
39+
dat
40+
41+
}
42+
43+
144
#' tcplPlotSetYRange
245
#'
346
#' @param dat dataset
@@ -47,17 +90,69 @@ tcplPlotSetYRange <- function(dat, yuniform, yrange, type) {
4790
yrange
4891
}
4992

93+
#' tcplPlotCalcAspectRatio
94+
#'
95+
#' @param type string of mc or sc indicating if it is single or multi conc
96+
#' @param verbose should the plot return a table with parameters
97+
#' @param multi Boolean, by default TRUE for "pdf" if the number of plots exceeds
98+
#' one. Prints variable number of plots per page depending on 'verbose' and 'type'
99+
#' settings.
100+
#' @param nrows Integer, number of rows each compare plot uses
101+
#' @param output How should the plot be presented. To work with the plot in
102+
#' environment, use "ggplot"; to interact with the plot in application, use
103+
#' "console"; or to save as a file type, use "pdf", "jpg", "png", "svg", or "tiff".
104+
#' @param group.threshold Integer of length 1, minimum number of samples in a
105+
#' given plot where comparison plots, instead of coloring models by sample, should
106+
#' delineate curve color by a given group.fld. By default 9.
107+
#' @param nrow Integer, number of rows in multiplot. By default 2.
108+
#' @param ncol Integer, number of columns in multiplot. By default 3, 2 if verbose,
109+
#' 1 for compare plots.
110+
#' @param flags Boolean, by default FALSE. If TRUE, level 6 flags are displayed
111+
#' within output.
112+
#'
113+
#' @return a list of validated parameters for plotting
114+
tcplPlotCalcAspectRatio <- function(type = "mc", verbose = FALSE, multi = FALSE, nrows = 0,
115+
output = c("ggplot", "console", "pdf", "png", "jpg", "svg", "tiff"),
116+
group.threshold = 9, nrow = NULL, ncol = NULL, flags = FALSE) {
117+
118+
# assign nrow = ncol = 1 for output="pdf" and multi=FALSE to plot one plot per page
119+
if(output == "pdf" && multi == FALSE)
120+
nrow <- ncol <- 1
121+
w <- 7
122+
h <- 5
123+
if (verbose && any(nrows > 1 & nrows < group.threshold))
124+
w <- 12
125+
if (type == "sc" && any(!verbose, all(nrows == 1 | nrows >= group.threshold)))
126+
w <- 5
127+
if (type == "sc")
128+
h <- 6
129+
if (verbose && any(nrows > 1 & nrows < group.threshold))
130+
h <- max(2 + max(nrows[nrows > 1 & nrows < group.threshold]), 6)
131+
if (any(nrows >= group.threshold) && flags)
132+
h <- max(h, 8)
133+
if(is.null(nrow))
134+
nrow <- max(round(10/h), 1)
135+
if(is.null(ncol))
136+
ncol <- max(round(14/w), 1)
137+
list(w=w,h=h,nrow=nrow,ncol=ncol)
138+
}
50139

51140
#' tcplPlotValidate
52141
#'
142+
#' @param dat data.table containing plot-prepared data
53143
#' @param type string of mc or sc indicating if it is single or multi conc
144+
#' @param compare Character vector, the field(s) to join samples on to create comparison
145+
#' plots
146+
#' @param by Parameter to divide files into e.g. "aeid".
54147
#' @param flags bool - should we return flags
55148
#' @param output how should the plot be formatted
56149
#' @param multi are there multiple plots
57150
#' @param verbose should the plot return a table with parameters
58151
#'
59152
#' @return a list of validated parameters for plotting
60-
tcplPlotValidate <- function(type = "mc", flags = NULL, output = "none", multi = NULL, verbose = FALSE) {
153+
tcplPlotValidate <- function(dat = NULL, type = "mc", compare = "m4id", by = NULL,
154+
flags = NULL, output = c("ggplot", "console", "pdf", "png", "jpg", "svg", "tiff"),
155+
multi = NULL, verbose = FALSE) {
61156
# set lvl based on type
62157
lvl <- 5
63158
if (type == "sc") {
@@ -66,28 +161,39 @@ tcplPlotValidate <- function(type = "mc", flags = NULL, output = "none", multi =
66161
warning("'flags' was set to TRUE - no flags exist for plotting single concentration")
67162
flags <- FALSE
68163
}
164+
if (compare == "m4id") compare <- "s2id"
69165
}
70-
71-
# default assign multi=TRUE for output="pdf"
72-
if (output == "pdf" && is.null(multi)) {
73-
multi <- TRUE
166+
167+
if (!is.null(by) && length(by) > 1) stop("'by' must be of length 1.")
168+
if (length(output) > 1) output <- output[1] else if (length(output) == 0) stop("'output' cannot be NULL")
169+
170+
if (!is.null(dat) && !is.data.table(dat)) {
171+
if (!is.list(dat) || !is.data.table(dat[[1]])) {
172+
stop("'dat' must be a data.table or a list of data.tables.")
173+
}
174+
if (!compare %in% c("m4id", "s2id")) {
175+
warning("'dat' provided as list of list of data tables, meaning compare plots are already subdivided. 'compare' field will be ignored and have no effect.")
176+
}
177+
if (!is.null(by)) {
178+
warning("Using 'by' can have unintended consequences when 'dat' is provided as a list of data.tables. Instead, consider adding a custom field to group comparison plots, and specify using the 'compare' parameter. Then, use 'by' to split plots into files.")
179+
}
74180
}
181+
75182
# forced assign multi=FALSE for output = c("console","png","jpg","svg","tiff"), verbose=FALSE for output="console"
76183
if (output != "pdf") {
77184
multi <- FALSE
78-
if (output == "console") {
79-
verbose <- FALSE
80-
}
185+
if (output == "console") verbose <- FALSE
81186
}
82187

83-
list(lvl = lvl, type = type, flags = flags, output = output, multi = multi, verbose = verbose)
188+
list(lvl = lvl, compare = compare, flags = flags, output = output, multi = multi, verbose = verbose)
84189
}
85190

86191

87192
#' tcplLegacyPlot
88193
#'
89194
#' @return a ggplot based on old plotting methodology
90195
tcplLegacyPlot <- function() {
196+
stop("Legacy plotting is currently unsupported.")
91197
# VARIABLE BINDING
92198
fld <- val <- lvl <- multi <- fileprefix <- NULL
93199
if (length(output) > 1) output <- output[1]

Diff for: R/tcplQueryAPI.R

+8-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,14 @@ tcplQueryAPI <- function(resource = "data", fld = NULL, val = NULL, return_flds
4444

4545
dat$dsstox_substance_id <- dat$dtxsid
4646

47-
# unlist logc to conc
48-
dat <- dat %>% rowwise() %>% mutate(conc = list(10^unlist(logc))) %>% as.data.table()
47+
# unlist logc, resp, flag, mc6mthdid
48+
unlist_cols <- c("logc", "resp", "flag", "mc6MthdId")
49+
for (c in unlist_cols) {
50+
dat[[c]] <- lapply(dat[[c]], unlist)
51+
}
52+
53+
# unlog logc to conc
54+
dat <- dat %>% rowwise() %>% mutate(conc = list(10^logc)) %>% as.data.table()
4955

5056
} else if (resource == "assay") {
5157

Diff for: R/tcplSubsetChid.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
tcplSubsetChid <- function(dat, flag = TRUE, type = "mc", export_ready = FALSE) {
5959
## Variable-binding to pass R CMD Check
6060
chit <- hitc <- aeid <- casn <- fitc <- fitc.ordr <- m4id <- nflg <- NULL
61-
chid <- conc <- minc <- NULL
61+
chid <- conc <- minc <- actc <- NULL
6262

6363
if (!type %in% c("mc", "sc")) {
6464
stop("type must be sc (single concentration) or mc (multi-concentration)")

0 commit comments

Comments
 (0)