dv.edish/R/helper_functions.R at 83ae9105b77b2460482aa722a4ea1a6507a53770 · Boehringer-Ingelheim/dv.edish · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
utils::globalVariables(".")

#' Prepare initial data
#'
#' `prepare_initial_data()` prepares data initially by restructuring
#' and joining DM and LB dataset into one.
#'
#' @param dataset_list `[list(data.frame))]`
#'
#' A list of datasets, containing a Demographics and a Lab Value dataset.
#' @param subjectid_var `[character(1)]`
#'
#' Name of the variable containing the unique subject IDs.
#' @param arm_var `[character(1)]`
#'
#' Name of the variable containing the arm/treatment information.
#' @param visit_var `[character(1)]`
#'
#' Name of the variable containing the visit information.
#' @param baseline_visit_val `[character(1)]`
#'
#' Character indicating which visit should be used as baseline visit.
#' @param lb_test_var `[character(1)]`
#'
#' Name of the variable containing the laboratory test information.
#' @param lb_test_choices `[character(1+)]`
#'
#' Character vector specifying the possible choices of the laboratory test.
#' @param lb_result_var `[character(1)]`
#'
#' Name of the variable containing results of the laboratory test.
#' @param ref_range_upper_lim_var `[character(1)]`
#'
#' Name of the variable containing the reference range upper limits.
#'
#' @return A single dataframe including columns defined by `subjectid_var`,
#' `arm_var`, `visit_var`, `lb_test_var`, `lb_result_var`, and `ref_range_upper_lim_var`,
#' as well as the column "BASE" containing the corresponding baseline values.
#' In case of multiple values in `lb_result_var` per `subjectid_var`, `visit_var`, and
#' `lb_test_var`, only the maximum value will be used. Note that a NA value in the considered values
#' will cause a value of NA to be returned as maximum value.
#'
#' @importFrom rlang .data
#' @keywords internal
prepare_initial_data <- function(
    dataset_list,
    subjectid_var,
    arm_var,
    visit_var,
    baseline_visit_val,
    lb_test_var,
    lb_test_choices,
    lb_result_var,
    ref_range_upper_lim_var) {
  sel_dataset_list <- lapply(dataset_list, function(x) {
    x %>%
      dplyr::select(
        dplyr::any_of(
          c(
            subjectid_var,
            arm_var,
            visit_var,
            lb_test_var,
            lb_result_var,
            ref_range_upper_lim_var
          )
        )
      )
  })

  dataset <- Reduce(dplyr::full_join, sel_dataset_list) %>%
    dplyr::filter(.data[[lb_test_var]] %in% lb_test_choices) %>%
    dplyr::group_by(.data[[subjectid_var]], .data[[arm_var]], .data[[lb_test_var]], .data[[visit_var]]) %>%
    dplyr::filter(.data[[lb_result_var]] == max(.data[[lb_result_var]], na.rm = TRUE)) %>%
    dplyr::distinct() %>%
    dplyr::ungroup()

  base_data <- dataset %>%
    dplyr::filter(.data[[visit_var]] == baseline_visit_val) %>%
    dplyr::mutate(BASE = .data[[lb_result_var]]) %>%
    dplyr::select(dplyr::all_of(c(subjectid_var, lb_test_var, arm_var, "BASE")))

  dataset <- dataset %>%
    dplyr::left_join(base_data, by = c(subjectid_var, lb_test_var, arm_var))

  return(dataset)
}


#' Filter data
#'
#' `filter_data()` filters `dataset` to only contain the values of `sel_lb_test`
#' in the `lb_test_var` column and the values of `sel_arm` in the `arm_var` column.
#'
#' @param dataset `[data.frame]`
#'
#' A dataframe containing the columns specified by `lb_test_var` and `arm_var`.
#' @param arm_var `[character(1)]`
#'
#' Name of the variable containing the arm/treatment information.
#' @param sel_arm `[character(1+)]`
#'
#' Character vector specifying a selection of arms/treatments.
#' @param lb_test_var `[character(1)]`
#'
#' Name of the variable containing the laboratory test information.
#' @param sel_lb_test `[character(1+)]`
#'
#' Character vector specifying a selection of laboratory tests.
#'
#' @return The filtered dataset.
#'
#' @keywords internal
filter_data <- function(dataset, arm_var, sel_arm, lb_test_var, sel_lb_test) {
  dataset <- dataset %>%
    dplyr::filter(
      .data[[lb_test_var]] %in% sel_lb_test,
      .data[[arm_var]] %in% sel_arm
    )

  return(dataset)
}


#' Derive required variables
#'
#' `derive_req_vars()` restructures the stated dataset to include variables containing
#' the ratio of a lab result divided by ULN or the baseline value. The corresponding variable
#' names are shaped as follows: "r_<ULN or Baseline>_<selected lab test>.
#'
#' @param dataset `[data.frame]`
#'
#' A data frame containing the variables listed below as columns.
#' @param subjectid_var `[character(1)]`
#'
#' Name of the variable containing the unique subject IDs.
#' @param arm_var `[character(1)]`
#'
#' Name of the variable containing the arm/treatment information.
#' @param visit_var `[character(1)]`
#'
#' Name of the variable containing the visit information.
#' @param lb_test_var `[character(1)]`
#'
#' Name of the variable containing the laboratory test information.
#' @param lb_result_var `[character(1)]`
#'
#' Name of the variable containing results of the laboratory test.
#' @param ref_range_upper_lim_var `[character(1)]`
#'
#' Name of the variable containing the reference range upper limits.
#' @param sel_x `[character(1)]`
#'
#' Character specifying the laboratory test selected for the x-axis.
#' @param sel_y `[character(1)]`
#'
#' Character specifying the laboratory test selected for the y-axis.
#'
#' @return The restructured dataset.
#'
#' @keywords internal
derive_req_vars <- function(
    dataset,
    subjectid_var,
    arm_var,
    visit_var,
    lb_test_var,
    lb_result_var,
    ref_range_upper_lim_var,
    sel_x,
    sel_y) {
  if (nrow(dataset) == 0) {
    return(NULL)
  }

  # Get the data frame in required structure (Pivot wider grouped by certain variables)
  dataset <- dataset %>%
    dplyr::filter(.data[[lb_test_var]] %in% c(sel_x, sel_y)) %>%
    dplyr::mutate(
      r_ULN = .data[[lb_result_var]] / .data[[ref_range_upper_lim_var]],
      r_Baseline = .data[[lb_result_var]] / .data[["BASE"]]
    ) %>%
    dplyr::select(dplyr::all_of(c(subjectid_var, arm_var, lb_test_var, visit_var, "r_ULN", "r_Baseline"))) %>%
    dplyr::group_by(.data[[subjectid_var]], .data[[arm_var]], .data[[lb_test_var]], .data[[visit_var]]) %>%
    dplyr::mutate(row = dplyr::row_number()) %>%
    tidyr::pivot_wider(names_from = tidyr::all_of(lb_test_var), values_from = c("r_ULN", "r_Baseline")) %>%
    dplyr::select(-dplyr::all_of("row")) %>%
    dplyr::mutate(
      "r_ULN_{{sel_x}}" = as.numeric(.data[[paste0("r_ULN_", sel_x)]]),
      "r_ULN_{{sel_y}}" = as.numeric(.data[[paste0("r_ULN_", sel_y)]]),
      "r_Baseline_{{sel_x}}" = as.numeric(.data[[paste0("r_Baseline_", sel_x)]]),
      "r_Baseline_{{sel_y}}" = as.numeric(.data[[paste0("r_Baseline_", sel_y)]])
    )

  return(dataset)
}


#' Generate plot
#'
#' `generate_plot()` generates an eDISH plot by means of the \pkg{plotly} package.
#'
#' @param dataset `[data.frame]`
#'
#' A data frame containing the variables listed below as columns.
#' @param subjectid_var `[character(1)]`
#'
#' Name of the variable containing the unique subject IDs.
#' @param arm_var `[character(1)]`
#'
#' Name of the variable containing the arm/treatment information.
#' @param visit_var `[character(1)]`
#'
#' Name of the variable containing the visit information.
#' @param sel_x `[character(1)]`
#'
#' Character specifying the laboratory test to be displayed on the x-axis.
#' @param sel_y `[character(1)]`
#'
#' Character specifying the laboratory test to be displayed on the y-axis.
#' @param x_plot_type `[character(1)]`
#'
#' Character specifying the plot type for the x-axis. This leads to
#' using the `dataset`'s column "r_<x_plot_type>_<x_sel>" for the x-values.
#' @param y_plot_type `[character(1)]`
#'
#' Character specifying the plot type for the y-axis. This leads to
#' using the `dataset`'s column "r_<y_plot_type>_<y_sel>" for the y-values.
#' @param x_ref_line_num `[numeric(1)]`
#'
#' Numeric specifying the reference line for the x-axis.
#' @param y_ref_line_num `[numeric(1)]`
#'
#' Numeric specifying the reference line for the y-axis.
#' @param x_rng_lower `[numeric(1)]`
#'
#' Numeric specifying the lower limit in the x-axis range.
#' @param x_rng_upper `[numeric(1)]`
#'
#' Numeric specifying the upper limit in the x-axis range.
#' @param y_rng_lower `[numeric(1)]`
#'
#' Numeric specifying the lower limit in the y-axis range.
#' @param y_rng_upper `[numeric(1)]`
#'
#' Numeric specifying the upper limit in the y-axis range.
#'
#' @return A plotly object specifying the generated eDISH plot.
#'
#' @keywords internal
generate_plot <- function(
    dataset,
    subjectid_var,
    arm_var,
    visit_var,
    sel_x,
    sel_y,
    x_plot_type,
    y_plot_type,
    x_ref_line_num,
    y_ref_line_num,
    x_rng_lower,
    x_rng_upper,
    y_rng_lower,
    y_rng_upper) {
  if (is.null(dataset)) {
    return(dataset)
  }

  # Prepare x-axis layout based on whether range has been specified
  layout_xaxis <- list(title = paste0(sel_x, "/", x_plot_type))
  if (!is.null(x_rng_lower) && !is.null(x_rng_upper)) {
    layout_xaxis <- c(layout_xaxis,
                      list(range = c(x_rng_lower, x_rng_upper)))
  }

  # Prepare y-axis layout based on whether range has been specified
  layout_yaxis <- list(title = paste0(sel_y, "/", y_plot_type))
  if (!is.null(y_rng_lower) && !is.null(y_rng_upper)) {
    layout_yaxis <- c(layout_yaxis,
                      list(range = c(y_rng_lower, y_rng_upper)))
  }

  plt_obj <- dataset %>%
    plotly::plot_ly(type = "scatter", mode = "markers", color = .[[arm_var]]) %>%
    plotly::add_trace(
      x = ~ .data[[paste0("r_", x_plot_type, "_", sel_x)]],
      y = ~ .data[[paste0("r_", y_plot_type, "_", sel_y)]],
      hovertext = ~ paste0(
        "Subject: ", .data[[subjectid_var]],
        "<br>Arm: ", .data[[arm_var]],
        "<br>Visit: ", .data[[visit_var]],
        "<br>x-axis: ", round(.data[[paste0("r_", x_plot_type, "_", sel_x)]], digits = 3),
        "<br>y-axis: ", round(.data[[paste0("r_", y_plot_type, "_", sel_y)]], digits = 3)
      ),
      hoverinfo = "text"
    ) %>%
    plotly::layout(
      xaxis = layout_xaxis,
      yaxis = layout_yaxis,
      shapes = list(
        list( # vline
          type = "line",
          y0 = 0,
          y1 = 1,
          yref = "paper",
          x0 = x_ref_line_num,
          x1 = x_ref_line_num,
          line = list(color = "gray", dash = "dot")
        ),
        list( # hline
          type = "line",
          x0 = 0,
          x1 = 1,
          xref = "paper",
          y0 = y_ref_line_num,
          y1 = y_ref_line_num,
          line = list(color = "gray", dash = "dot")
        )
      )
    )

  return(plt_obj)
}