|
1 | 1 | #' Get population estimates |
2 | 2 | #' |
3 | | -#' @param level one of "datazone", "intzone", "hscp", "ca" or "hb" |
4 | | -#' @param version default is "latest" |
5 | | -#' @param min_year,max_year (optional) filter years |
6 | | -#' @param age_groups should age groups be used |
7 | | -#' @param ... arguments passed to [phsmethods::create_age_groups()] |
| 3 | +#' This function retrieves population estimates based on various parameters. |
| 4 | +#' It reads population data from a specified file and filters it based on the |
| 5 | +#' input parameters. The function also allows for grouping by age and pivoting |
| 6 | +#' the data for wider format. |
| 7 | +#' @param level The geographic level for which to retrieve population estimates. |
| 8 | +#' One of "datazone", "intzone", "hscp", "ca", or "hb". |
| 9 | +#' @param version The version of the population estimates to use (default: "latest"). |
| 10 | +#' @param min_year,max_year (optional) The minimum and maximum years to include in the results. |
| 11 | +#' @param age_groups Logical, indicating whether to aggregate population estimates by age groups. |
| 12 | +#' If `TRUE`, the `phsmethods::create_age_groups` function is used. |
| 13 | +#' @param pivot_wider Optionally reshape the data into a wider format, summarising population counts by the specified columns. |
| 14 | +#' Allowed values: |
| 15 | +#' * `FALSE` (default): Do not pivot. |
| 16 | +#' * `TRUE` or `"all"`: Pivot by both sex and age/age group. |
| 17 | +#' * `"age"`: Pivot by age/age group only. |
| 18 | +#' * `"age-only"`: Pivot by age/age group and aggregate to remove sex. |
| 19 | +#' * `"sex"`: Pivot by sex only. |
| 20 | +#' * `"sex-only"`: Pivot by sex group and aggregate to remove age/age group |
| 21 | +#' @param ... Additional arguments passed to [phsmethods::create_age_groups()]. |
| 22 | +#' |
| 23 | +#' @return A tibble containing the filtered and possibly transformed population data. |
| 24 | +#' |
| 25 | +#' @note |
| 26 | +#' Depending on the values for `age_groups` and `pivot_wider`, the resulting |
| 27 | +#' columns in the returned tibble will vary. Refer to the examples below for |
| 28 | +#' illustration. |
8 | 29 | #' |
9 | | -#' @return the pop data as a tibble |
10 | 30 | #' @export |
11 | 31 | #' |
12 | 32 | #' @examples |
| 33 | +#' # Basic Usage: Datazone Population Estimates (no filtering) |
13 | 34 | #' get_pop_est("datazone") |
14 | | -#' get_pop_est("hb", min_year = 1995, max_year = 2020) |
15 | | -#' get_pop_est("ca", age_groups = TRUE, by = 10) |
| 35 | +#' |
| 36 | +#' # Filter by Year: |
| 37 | +#' get_pop_est("ca", min_year = 1995, max_year = 2020) |
| 38 | +#' |
| 39 | +#' # Age Groups: Health Board (HB) Population Estimates by Age Group |
| 40 | +#' get_pop_est("hb", age_groups = TRUE) |
| 41 | +#' |
| 42 | +#' # Age Groups with Custom Settings: |
| 43 | +#' # Aggregate into 5-year age groups, with an open-ended final group "85+" |
| 44 | +#' get_pop_est("hb", age_groups = TRUE, by = 5, to = "85+") |
| 45 | +#' |
| 46 | +#' # Pivot Wider (All): CA Population Estimates, Reshaped by Sex and Age Group |
| 47 | +#' # The result will have columns for each combination of sex and age group, |
| 48 | +#' # e.g., "pop_f_0_4", "pop_m_5_9", etc. |
| 49 | +#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "all") |
| 50 | +#' |
| 51 | +#' # Pivot Wider (Age Only): CA Population Estimates, Reshaped by Age Group Only |
| 52 | +#' # This is useful if you only need the total population for each age group, regardless of sex. |
| 53 | +#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "age-only") |
| 54 | +#' |
| 55 | +#' # Combined Filtering, Age Groups, and Pivoting: |
| 56 | +#' # CA population from 2015-2020, aggregated by 10-year age groups, and pivoted by sex |
| 57 | +#' # The result will have columns for each sex ("pop_f", "pop_m") and a row per age group. |
| 58 | +#' get_pop_est("ca", min_year = 2015, max_year = 2020, age_groups = TRUE, by = 10, pivot_wider = "sex") |
16 | 59 | get_pop_est <- function( |
17 | 60 | level = c("datazone", "intzone", "hscp", "ca", "hb"), |
18 | 61 | version = "latest", |
19 | 62 | min_year = NULL, |
20 | 63 | max_year = NULL, |
21 | 64 | age_groups = FALSE, |
| 65 | + pivot_wider = FALSE, |
22 | 66 | ...) { |
23 | 67 | level <- rlang::arg_match(level) |
| 68 | + if (!inherits(pivot_wider, "logical")) { |
| 69 | + pivot_wider <- rlang::arg_match( |
| 70 | + pivot_wider, |
| 71 | + values = c("all", "age", "age-only", "sex", "sex-only") |
| 72 | + ) |
| 73 | + } |
| 74 | + |
24 | 75 | ext <- "rds" |
25 | 76 | pop_dir <- fs::path(get_lookups_dir(), "Populations", "Estimates") |
26 | 77 |
|
@@ -64,14 +115,66 @@ get_pop_est <- function( |
64 | 115 | } |
65 | 116 |
|
66 | 117 | if (age_groups) { |
67 | | - pop_est <- pop_est %>% |
| 118 | + pop_est <- pop_est |> |
68 | 119 | dplyr::mutate( |
69 | 120 | age_group = phsmethods::create_age_groups(x = age, ...), |
70 | 121 | .keep = "unused" |
71 | | - ) %>% |
72 | | - dplyr::group_by(dplyr::across(!pop)) %>% |
| 122 | + ) |> |
| 123 | + dplyr::group_by(dplyr::across(!pop)) |> |
73 | 124 | dplyr::summarise(pop = sum(pop), .groups = "drop") |
74 | 125 | } |
75 | 126 |
|
| 127 | + if (pivot_wider %in% list(TRUE, "all")) { |
| 128 | + pop_est <- pop_est |> |
| 129 | + tidyr::pivot_wider( |
| 130 | + id_cols = -"sex", |
| 131 | + names_from = c( |
| 132 | + "sex_name", |
| 133 | + dplyr::if_else(age_groups, "age_group", "age") |
| 134 | + ), |
| 135 | + values_from = "pop", |
| 136 | + names_prefix = "pop_", |
| 137 | + names_repair = janitor::make_clean_names |
| 138 | + ) |
| 139 | + } else if (pivot_wider == "sex") { |
| 140 | + pop_est <- pop_est |> |
| 141 | + tidyr::pivot_wider( |
| 142 | + id_cols = c(-"sex", dplyr::if_else(age_groups, "age_group", "age")), |
| 143 | + names_from = "sex_name", |
| 144 | + values_from = "pop", |
| 145 | + names_prefix = "pop_", |
| 146 | + names_repair = janitor::make_clean_names |
| 147 | + ) |
| 148 | + } else if (pivot_wider == "sex-only") { |
| 149 | + pop_est <- pop_est |> |
| 150 | + tidyr::pivot_wider( |
| 151 | + id_cols = c(-"sex", -dplyr::if_else(age_groups, "age_group", "age")), |
| 152 | + names_from = "sex_name", |
| 153 | + values_from = "pop", |
| 154 | + values_fn = sum, |
| 155 | + names_prefix = "pop_", |
| 156 | + names_repair = janitor::make_clean_names |
| 157 | + ) |
| 158 | + } else if (pivot_wider == "age") { |
| 159 | + pop_est <- pop_est |> |
| 160 | + tidyr::pivot_wider( |
| 161 | + id_cols = c(-"sex", "sex_name"), |
| 162 | + names_from = dplyr::if_else(age_groups, "age_group", "age"), |
| 163 | + values_from = "pop", |
| 164 | + names_prefix = "pop_", |
| 165 | + names_repair = janitor::make_clean_names |
| 166 | + ) |
| 167 | + } else if (pivot_wider == "age-only") { |
| 168 | + pop_est <- pop_est |> |
| 169 | + tidyr::pivot_wider( |
| 170 | + id_cols = c(-"sex", -"sex_name"), |
| 171 | + names_from = dplyr::if_else(age_groups, "age_group", "age"), |
| 172 | + values_from = "pop", |
| 173 | + values_fn = sum, |
| 174 | + names_prefix = "pop_", |
| 175 | + names_repair = janitor::make_clean_names |
| 176 | + ) |
| 177 | + } |
| 178 | + |
76 | 179 | return(pop_est) |
77 | 180 | } |
0 commit comments