Skip to content

Commit fa38231

Browse files
committed
Add pivoting of the pop data
1 parent d0ec9e9 commit fa38231

3 files changed

Lines changed: 171 additions & 21 deletions

File tree

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ Imports:
1919
dplyr,
2020
fs,
2121
glue,
22+
janitor,
2223
phsmethods,
2324
readr,
2425
rlang,
25-
tibble
26+
tibble,
27+
tidyr
2628
Suggests:
2729
testthat (>= 3.0.0)
2830
Config/testthat/edition: 3

R/get_pop_est.R

Lines changed: 114 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,77 @@
11
#' Get population estimates
22
#'
3-
#' @param level one of "datazone", "intzone", "hscp", "ca" or "hb"
4-
#' @param version default is "latest"
5-
#' @param min_year,max_year (optional) filter years
6-
#' @param age_groups should age groups be used
7-
#' @param ... arguments passed to [phsmethods::create_age_groups()]
3+
#' This function retrieves population estimates based on various parameters.
4+
#' It reads population data from a specified file and filters it based on the
5+
#' input parameters. The function also allows for grouping by age and pivoting
6+
#' the data for wider format.
7+
#' @param level The geographic level for which to retrieve population estimates.
8+
#' One of "datazone", "intzone", "hscp", "ca", or "hb".
9+
#' @param version The version of the population estimates to use (default: "latest").
10+
#' @param min_year,max_year (optional) The minimum and maximum years to include in the results.
11+
#' @param age_groups Logical, indicating whether to aggregate population estimates by age groups.
12+
#' If `TRUE`, the `phsmethods::create_age_groups` function is used.
13+
#' @param pivot_wider Optionally reshape the data into a wider format, summarising population counts by the specified columns.
14+
#' Allowed values:
15+
#' * `FALSE` (default): Do not pivot.
16+
#' * `TRUE` or `"all"`: Pivot by both sex and age/age group.
17+
#' * `"age"`: Pivot by age/age group only.
18+
#' * `"age-only"`: Pivot by age/age group and aggregate to remove sex.
19+
#' * `"sex"`: Pivot by sex only.
20+
#' * `"sex-only"`: Pivot by sex group and aggregate to remove age/age group
21+
#' @param ... Additional arguments passed to [phsmethods::create_age_groups()].
22+
#'
23+
#' @return A tibble containing the filtered and possibly transformed population data.
24+
#'
25+
#' @note
26+
#' Depending on the values for `age_groups` and `pivot_wider`, the resulting
27+
#' columns in the returned tibble will vary. Refer to the examples below for
28+
#' illustration.
829
#'
9-
#' @return the pop data as a tibble
1030
#' @export
1131
#'
1232
#' @examples
33+
#' # Basic Usage: Datazone Population Estimates (no filtering)
1334
#' get_pop_est("datazone")
14-
#' get_pop_est("hb", min_year = 1995, max_year = 2020)
15-
#' get_pop_est("ca", age_groups = TRUE, by = 10)
35+
#'
36+
#' # Filter by Year:
37+
#' get_pop_est("ca", min_year = 1995, max_year = 2020)
38+
#'
39+
#' # Age Groups: Health Board (HB) Population Estimates by Age Group
40+
#' get_pop_est("hb", age_groups = TRUE)
41+
#'
42+
#' # Age Groups with Custom Settings:
43+
#' # Aggregate into 5-year age groups, with an open-ended final group "85+"
44+
#' get_pop_est("hb", age_groups = TRUE, by = 5, to = "85+")
45+
#'
46+
#' # Pivot Wider (All): CA Population Estimates, Reshaped by Sex and Age Group
47+
#' # The result will have columns for each combination of sex and age group,
48+
#' # e.g., "pop_f_0_4", "pop_m_5_9", etc.
49+
#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "all")
50+
#'
51+
#' # Pivot Wider (Age Only): CA Population Estimates, Reshaped by Age Group Only
52+
#' # This is useful if you only need the total population for each age group, regardless of sex.
53+
#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "age-only")
54+
#'
55+
#' # Combined Filtering, Age Groups, and Pivoting:
56+
#' # CA population from 2015-2020, aggregated by 10-year age groups, and pivoted by sex
57+
#' # The result will have columns for each sex ("pop_f", "pop_m") and a row per age group.
58+
#' get_pop_est("ca", min_year = 2015, max_year = 2020, age_groups = TRUE, by = 10, pivot_wider = "sex")
1659
get_pop_est <- function(
1760
level = c("datazone", "intzone", "hscp", "ca", "hb"),
1861
version = "latest",
1962
min_year = NULL,
2063
max_year = NULL,
2164
age_groups = FALSE,
65+
pivot_wider = FALSE,
2266
...) {
2367
level <- rlang::arg_match(level)
68+
if (!inherits(pivot_wider, "logical")) {
69+
pivot_wider <- rlang::arg_match(
70+
pivot_wider,
71+
values = c("all", "age", "age-only", "sex", "sex-only")
72+
)
73+
}
74+
2475
ext <- "rds"
2576
pop_dir <- fs::path(get_lookups_dir(), "Populations", "Estimates")
2677

@@ -64,14 +115,66 @@ get_pop_est <- function(
64115
}
65116

66117
if (age_groups) {
67-
pop_est <- pop_est %>%
118+
pop_est <- pop_est |>
68119
dplyr::mutate(
69120
age_group = phsmethods::create_age_groups(x = age, ...),
70121
.keep = "unused"
71-
) %>%
72-
dplyr::group_by(dplyr::across(!pop)) %>%
122+
) |>
123+
dplyr::group_by(dplyr::across(!pop)) |>
73124
dplyr::summarise(pop = sum(pop), .groups = "drop")
74125
}
75126

127+
if (pivot_wider %in% list(TRUE, "all")) {
128+
pop_est <- pop_est |>
129+
tidyr::pivot_wider(
130+
id_cols = -"sex",
131+
names_from = c(
132+
"sex_name",
133+
dplyr::if_else(age_groups, "age_group", "age")
134+
),
135+
values_from = "pop",
136+
names_prefix = "pop_",
137+
names_repair = janitor::make_clean_names
138+
)
139+
} else if (pivot_wider == "sex") {
140+
pop_est <- pop_est |>
141+
tidyr::pivot_wider(
142+
id_cols = c(-"sex", dplyr::if_else(age_groups, "age_group", "age")),
143+
names_from = "sex_name",
144+
values_from = "pop",
145+
names_prefix = "pop_",
146+
names_repair = janitor::make_clean_names
147+
)
148+
} else if (pivot_wider == "sex-only") {
149+
pop_est <- pop_est |>
150+
tidyr::pivot_wider(
151+
id_cols = c(-"sex", -dplyr::if_else(age_groups, "age_group", "age")),
152+
names_from = "sex_name",
153+
values_from = "pop",
154+
values_fn = sum,
155+
names_prefix = "pop_",
156+
names_repair = janitor::make_clean_names
157+
)
158+
} else if (pivot_wider == "age") {
159+
pop_est <- pop_est |>
160+
tidyr::pivot_wider(
161+
id_cols = c(-"sex", "sex_name"),
162+
names_from = dplyr::if_else(age_groups, "age_group", "age"),
163+
values_from = "pop",
164+
names_prefix = "pop_",
165+
names_repair = janitor::make_clean_names
166+
)
167+
} else if (pivot_wider == "age-only") {
168+
pop_est <- pop_est |>
169+
tidyr::pivot_wider(
170+
id_cols = c(-"sex", -"sex_name"),
171+
names_from = dplyr::if_else(age_groups, "age_group", "age"),
172+
values_from = "pop",
173+
values_fn = sum,
174+
names_prefix = "pop_",
175+
names_repair = janitor::make_clean_names
176+
)
177+
}
178+
76179
return(pop_est)
77180
}

man/get_pop_est.Rd

Lines changed: 54 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)