Skip to content

Commit dd327d0

Browse files
95_waterfall_plot_bugs (#100)
* 95_waterfall_plot_bugs gestion des NA dans target_sum_diff_first + création de test-waterfall-plot.R * select instead of rename, with all variables * fct_reorder2(.na_rm=FALSE) solves the bug * warn in case of NA * remove useless group aesthetic feels wrong in plotly * more versatile y label (not always target_sum_diff_first) * use .get_fill_scale() on the right column (not always named best_response) * dont check for missing values on shape * doc: improve help * test: add snapshots * tests: add tests on NA and on renaming * fix for chk --------- Co-authored-by: Dan Chaltiel <15105152+DanChaltiel@users.noreply.github.com>
1 parent 1eda0f0 commit dd327d0

File tree

9 files changed

+858
-80
lines changed

9 files changed

+858
-80
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ importFrom(dplyr,desc)
5656
importFrom(dplyr,distinct)
5757
importFrom(dplyr,ends_with)
5858
importFrom(dplyr,filter)
59+
importFrom(dplyr,if_any)
5960
importFrom(dplyr,if_else)
6061
importFrom(dplyr,intersect)
6162
importFrom(dplyr,lag)
@@ -78,6 +79,7 @@ importFrom(dplyr,setdiff)
7879
importFrom(dplyr,slice_min)
7980
importFrom(dplyr,summarise)
8081
importFrom(dplyr,transmute)
82+
importFrom(dplyr,where)
8183
importFrom(flextable,align)
8284
importFrom(flextable,as_chunk)
8385
importFrom(flextable,as_flextable)
@@ -106,6 +108,7 @@ importFrom(forcats,fct_drop)
106108
importFrom(forcats,fct_infreq)
107109
importFrom(forcats,fct_relevel)
108110
importFrom(forcats,fct_reorder)
111+
importFrom(forcats,fct_reorder2)
109112
importFrom(forcats,fct_rev)
110113
importFrom(fs,dir_create)
111114
importFrom(fs,dir_ls)

R/waterfall_plot.R

Lines changed: 55 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,57 @@
11

2-
#' Waterfall plot for RECIST data
2+
#' Waterfall plot for RECIST best response data
33
#'
4-
#' Creates a waterfall plot showing the change from baseline in target lesion size
5-
#' for individual patients, optionally grouped by treatment arm.
4+
#' Creates a waterfall plot showing the best percent change from baseline in
5+
#' target lesion size for each subject.
66
#'
7-
#' @param data A dataset containing RECIST best response data. Use [calc_best_response] to format your raw data.
8-
#' @param ... Not used. Ensures that only named arguments are passed.
9-
#' @param y The column representing the numeric outcome (typically change in tumor size). Default is `"target_sum_diff_first"`.
10-
#' @param fill The column indicating the filling color. Default is `"best_response"`, the best response category.
11-
#' @param shape The column to use for an shape layer (e.g., indicating mutation status).
12-
#' @param arm The column indicating treatment arms for faceting.
13-
#' @param subjid The column identifying subjects. Default is `"SUBJID"`.
14-
#' @param resp_colors Colors assigned to response categories.
15-
#' @param warnings Whether to display warnings.
7+
#' The input data must contain **one row per subject**, use [calc_best_response()]
8+
#' to convert from long-format RECIST data.
9+
#'
10+
#' Bars are drawn for individual subjects, optionally faceted by treatment arm.
11+
#' Horizontal dashed reference lines are added at -30\% and +20\%, corresponding
12+
#' to common RECIST response thresholds.
1613
#'
17-
#' @return A `ggplot` object representing a waterfall plot of tumor size change by patient.
14+
#' @param data A data frame with one row per subject, typically produced by
15+
#' [calc_best_response()].
16+
#' @param ... Not used. Ensures that only named arguments are passed.
17+
#' @param y Name of the numeric column used for the bar height. Defaults to
18+
#' `"target_sum_diff_first"`.
19+
#' @param fill Name of the categorical column used for bar fill color. Defaults
20+
#' to `"best_response"`.
21+
#' @param shape Optional name of a categorical column used to add a symbol
22+
#' above or below each bar.
23+
#' @param arm Optional name of a column used to facet the plot by treatment arm.
24+
#' @param subjid Name of the subject identifier column. Defaults to `"SUBJID"`.
25+
#' @param resp_colors Named vector of colors used for RECIST response categories.
26+
#' @param warnings Logical. If `TRUE`, warnings are emitted when missing values
27+
#' are detected in plotted variables.
1828
#'
1929
#' @export
20-
#' @importFrom dplyr all_of mutate rename
30+
#' @importFrom dplyr all_of if_any mutate rename where
31+
#' @importFrom forcats fct_reorder2
2132
#' @importFrom ggplot2 aes facet_wrap geom_col geom_hline ggplot labs scale_fill_manual scale_x_discrete scale_y_continuous theme_minimal
2233
#' @importFrom rlang check_dots_empty
2334
#' @importFrom scales breaks_width label_percent
35+
#' @importFrom cli cli_warn
36+
#'
37+
#' @return A `ggplot` object.
38+
#'
39+
#' @seealso [calc_best_response()]
2440
#'
2541
#' @examples
2642
#' db = grstat_example(N=50)
2743
#' data_best_resp = calc_best_response(db$recist)
2844
#'
29-
#' #simple example
45+
#' # Basic waterfall plot
3046
#' waterfall_plot(data_best_resp)
3147
#'
32-
#' #facet by arm
48+
#' # Facet by arm
3349
#' data_best_resp %>%
3450
#' dplyr::left_join(db$enrolres, by="subjid") %>%
3551
#' waterfall_plot(arm="ARM")
3652
#'
3753
#'
38-
#' #add symbols
39-
#' #use the NA level to not show the case
54+
#' # Add symbols
4055
#' set.seed(0)
4156
#' data_symbols = db$recist %>%
4257
#' dplyr::summarise(
@@ -53,7 +68,6 @@
5368
#' dplyr::left_join(data_symbols, by="subjid") %>%
5469
#' waterfall_plot(shape="example_event") +
5570
#' ggplot2::labs(shape="Event")
56-
#'
5771
waterfall_plot = function(data, ...,
5872
y="target_sum_diff_first", fill="best_response",
5973
shape=NULL, arm=NULL, subjid="SUBJID",
@@ -70,25 +84,38 @@ waterfall_plot = function(data, ...,
7084
check_dots_empty()
7185
assert_names_exists(data, c(y, fill, subjid))
7286

87+
y_lab = "Target lesions reduction from baseline"
88+
if(y!="target_sum_diff_first") y_lab=y
7389
fill_lab = "Best Global Response \n(RECIST v1.1)"
74-
fill_scale = .get_fill_scale(data, resp_colors)
75-
7690

7791
db_wf = data %>%
78-
rename(shape=any_of2(shape), resp=all_of(fill), y=all_of(y)) %>%
79-
mutate(subjid = forcats::fct_reorder2(as.character(subjid),
80-
as.numeric(resp), y))
92+
select(subjid=any_of2(subjid), shape=any_of2(shape), arm=any_of2(arm),
93+
resp=all_of(fill), y=all_of(y)) %>%
94+
mutate(subjid = fct_reorder2(as.character(subjid), as.numeric(resp),
95+
y, .na_rm=FALSE))
96+
fill_scale = .get_fill_scale(db_wf, resp_colors)
8197

98+
db_wf_missing = db_wf %>%
99+
filter(if_any(-any_of("shape"), ~is.na(.x) & !is.nan(.x))) %>%
100+
select(subjid, where(~any(is.na(.x) & !is.nan(.x))), -any_of("shape"))
101+
if(nrow(db_wf_missing) > 0 && warnings){
102+
cli_warn(c("!" = "Missing values detected in {.fun waterfall_plot}.",
103+
"i" = "Subjects with missing values: {.val {db_wf_missing$subjid}}.",
104+
"i" = "Columns with missing values: {.val {colnames(db_wf_missing)[-1]}}."),
105+
class="waterfall_plot_missing_warning")
106+
}
107+
82108
p =
83109
db_wf %>%
84-
ggplot(aes(x=subjid, y=y, group=subjid, fill=resp)) +
110+
ggplot() +
111+
aes(x=subjid, y=y, fill=resp) +
85112
geom_hline(yintercept=c(-.3, .2), linetype="dashed") +
86113
geom_col(color='black') +
87114
.get_shape_layer(shape, shape_nudge=0.05) +
88115
scale_x_discrete(labels = NULL, breaks = NULL) +
89116
scale_y_continuous(labels=label_percent(), breaks=breaks_width(0.2)) +
90117
scale_fill_manual(values=fill_scale) +
91-
labs(x = "", y="Target lesions reduction from baseline", fill=fill_lab) +
118+
labs(x = "", y=y_lab, fill=fill_lab) +
92119
theme_minimal() +
93120
guides(
94121
color = guide_legend(order = 1),
@@ -135,8 +162,8 @@ waterfall_plot = function(data, ...,
135162
resp_colors = c("CR"="#42b540", "PR"="#006dd8", "SD"="#925e9f", "PD"="#ed0000", "NA"="white")
136163
resp_colors = resp_colors[c("CR", "PR", "SD", "PD", "NA")]
137164
fill_scale = data %>%
138-
distinct(best_response, resp_num = .recist_to_num(best_response)) %>%
165+
distinct(resp, resp_num = .recist_to_num(resp)) %>%
139166
mutate(color=resp_colors[resp_num]) %>%
140-
select(best_response, color) %>%
167+
select(resp, color) %>%
141168
deframe()
142169
}

man/waterfall_plot.Rd

Lines changed: 31 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)