|
| 1 | +#' Age distribution |
| 2 | +#' |
| 3 | +#' @param dl The data in long format |
| 4 | +#' @param type The type of plot to produce. Either "histogram", "hist", "h" or "ridge", "density", "r", "d". |
| 5 | +#' @param grouping_variable A string specifying the variable to group by (allowed values: "condition_id", "study_id", "paper_cond_id", or "paper_study_id"). |
| 6 | +#' |
| 7 | +#' @return A ggplot object. |
| 8 | +#' @export |
| 9 | +age <- function( |
| 10 | + dl, |
| 11 | + type = "histogram", |
| 12 | + grouping_variable = "study_id" |
| 13 | +) { |
| 14 | + # Process Data |
| 15 | + data_age <- dl |> |
| 16 | + filter(measure == "age") |> |
| 17 | + select(study_id, participant_id, value, measure) |> |
| 18 | + mutate( |
| 19 | + age = as.numeric(value), |
| 20 | + across(study_id, as.factor) |
| 21 | + ) |> |
| 22 | + filter(!is.na(age)) |
| 23 | + |
| 24 | + # Plot |
| 25 | + study_order <- data_age |> |
| 26 | + group_by(.data[[grouping_variable]]) |> |
| 27 | + summarise(mean_age = median(age)) |> |
| 28 | + arrange(desc(mean_age)) |> |
| 29 | + pull(.data[[grouping_variable]]) |
| 30 | + |
| 31 | + if (tolower(type) %in% c("histogram", "hist", "h")) { |
| 32 | + data_age <- data_age |> |
| 33 | + group_by(age, .data[[grouping_variable]]) |> |
| 34 | + summarise(n = n()) |
| 35 | + |
| 36 | + data_age[[grouping_variable]] <- factor( |
| 37 | + data_age[[grouping_variable]], |
| 38 | + levels = study_order |
| 39 | + ) |
| 40 | + |
| 41 | + graph <- data_age |> |
| 42 | + ggplot(aes(x = age, y = n)) + |
| 43 | + geom_bar( |
| 44 | + aes(fill = .data[[grouping_variable]]), |
| 45 | + stat = "identity", |
| 46 | + color = "white", |
| 47 | + linewidth = .2 |
| 48 | + ) + |
| 49 | + labs(x = "Age", y = "Number of Participants", fill = "Study ID") |
| 50 | + } else if (tolower(type) %in% c("ridge", "density", "r", "d")) { |
| 51 | + data_age[[grouping_variable]] <- factor( |
| 52 | + data_age[[grouping_variable]], |
| 53 | + levels = study_order |
| 54 | + ) |
| 55 | + |
| 56 | + graph <- data_age |> |
| 57 | + ggplot(aes( |
| 58 | + x = age, |
| 59 | + y = .data[[grouping_variable]], |
| 60 | + group = .data[[grouping_variable]], |
| 61 | + fill = .data[[grouping_variable]] |
| 62 | + )) + |
| 63 | + ggridges::geom_density_ridges() + |
| 64 | + labs(x = "Age", y = "Study ID", fill = "Study ID") + |
| 65 | + theme(legend.position = "none") |
| 66 | + } else { |
| 67 | + stop("unknown argument type") |
| 68 | + } |
| 69 | + return(graph) |
| 70 | +} |
| 71 | + |
| 72 | +#' Age descriptives |
| 73 | +#' |
| 74 | +#' @param dl The data in long format. |
| 75 | +#' |
| 76 | +#' @return A data frame with mean, sd, min and max age. |
| 77 | +#' @export |
| 78 | +ageDescriptives <- function(dl, grouping_variable = "study_id") { |
| 79 | + age <- dl |> |
| 80 | + filter(measure == "age") |> |
| 81 | + select(.data[[grouping_variable]], participant_id, value, measure) |> |
| 82 | + mutate( |
| 83 | + age = as.numeric(value), |
| 84 | + !!grouping_variable := as.factor(.data[[grouping_variable]]) |
| 85 | + ) |> |
| 86 | + filter(!is.na(age)) |> |
| 87 | + summarise( |
| 88 | + mean_age = mean(age), |
| 89 | + sd = sd(age), |
| 90 | + min = min(age), |
| 91 | + max = max(age) |
| 92 | + ) |
| 93 | + |
| 94 | + return(age) |
| 95 | +} |
0 commit comments