Skip to content
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export(get_boxi_extract_path)
export(get_byoc_output_files)
export(get_ch_costs_path)
export(get_combined_slf_deaths_lookup_path)
export(get_datazone_pop_data)
export(get_dd_path)
export(get_dd_period)
export(get_demographic_cohorts_path)
Expand All @@ -60,6 +61,7 @@ export(get_it_deaths_path)
export(get_it_ltc_path)
export(get_it_prescribing_path)
export(get_la_code_opendata_lookup)
export(get_locality_data)
export(get_locality_path)
export(get_lookups_dir)
export(get_ltcs_path)
Expand All @@ -76,6 +78,7 @@ export(get_sc_hc_episodes_path)
export(get_sc_sds_episodes_path)
export(get_service_use_cohorts_path)
export(get_sg_homelessness_pub_path)
export(get_simd_data)
export(get_simd_path)
export(get_slf_ch_name_lookup_path)
export(get_slf_chi_deaths_path)
Expand Down
34 changes: 19 additions & 15 deletions R/add_keep_population_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,28 @@
#'
#' @return A data frame with keep_population flags
#' @family individual_file
add_keep_population_flag <- function(individual_file, year) {
calendar_year <- paste0("20", substr(year, 1, 2)) %>% as.integer()
add_keep_population_flag <- function(individual_file,
year,
pop_estimates = get_datazone_pop_data(
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_pop_path(type = "datazone"),
BYOC_MODE
),
locality_data = get_locality_data(
denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_locality_path(),
BYOC_MODE
),
BYOC_MODE) {
# TODO: Check arguments - do get_datazone_pop_data and get_locality_data just need BYOC_MODE?

@LucyEmma22 LucyEmma22 Jun 2, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do get_datazone_pop_data and get_locality_data need all arguments to be written or is BYOC_MODE sufficient? I.e.

add_keep_population_flag <- function(individual_file,
                                     year,
                                     pop_estimates = get_datazone_pop_data(BYOC_MODE = BYOC_MODE),
                                     locality_data = get_locality_data(BYOC_MODE = BYOC_MODE),
                                     BYOC_MODE) {

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

process_lookup_sc_demographics() in #1313 just uses the BYOC_MODE argument for spd_data, uk_postcode_data and ch_name_lookup.

calendar_year <- paste0("20", substr(year, 1, 2)) %>%
as.integer()

if (!check_year_valid(year, "nsu")) {
individual_file <- individual_file %>%
dplyr::mutate(keep_population = 1L)
} else {
## Obtain the population estimates for Locality AgeGroup and Gender.
pop_estimates <-
readr::read_rds(get_pop_path(type = "datazone")) %>%
dplyr::select(
.data$year,
.data$datazone2011,
.data$sex,
.data$age0:.data$age90plus
)

# Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender
# Select out the estimates for the year of interest.
# if we don't have estimates for this year (and so have to use previous year).
Expand Down Expand Up @@ -55,8 +60,7 @@ add_keep_population_flag <- function(individual_file, year) {
dplyr::mutate(age = as.integer(.data$age)) %>%
add_age_group(.data$age) %>%
dplyr::left_join(
readr::read_rds(get_locality_path()) %>%
dplyr::select("locality" = "hscp_locality", .data$datazone2011),
locality_data,
by = "datazone2011"
) %>%
dplyr::group_by(.data$locality, .data$age_group, .data$gender) %>%
Expand Down Expand Up @@ -139,7 +143,7 @@ add_keep_population_flag <- function(individual_file, year) {
)
}

cli::cli_alert_info("Add keep population function finished at {Sys.time()}")
cli::cli_alert_info("Add keep population function finished at {Sys.time()}") # TODO: Is this being kept or changed with a logger_utils function?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is log message being kept or changed with a logger_utils function?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy for us to change it to a logger message. When I made the change of our logs to logger, I changed some of the cli:: alerts to logger since some of them sometimes don't even print in the console so I'm happy for us to change it to logger::log_info instead.

return(individual_file)
}
Expand Down
22 changes: 9 additions & 13 deletions R/fill_geographies.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
#' Postcode and GP Practice details.
fill_geographies <- function(
data,
slf_pc_lookup = read_file(get_slf_postcode_path()),
slf_pc_lookup = read_file(
get_slf_postcode_path(BYOC_MODE = BYOC_MODE)
),
slf_gpprac_lookup = read_file(
get_slf_gpprac_path(),
get_slf_gpprac_path(BYOC_MODE = BYOC_MODE),

@OluwatobiOni OluwatobiOni Jun 5, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Lucy! No need for me to do this on my branch then.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merge order: spd -> simd_locality > gpprac_lookup

col_select = c("gpprac", "cluster", "hbpraccode")
)
),
BYOC_MODE = BYOC_MODE
) {
check_variables_exist(data, c(
"anon_chi",
Expand All @@ -29,17 +32,10 @@ fill_geographies <- function(
))

data <- data %>%
fill_postcode_geogs(
slf_pc_lookup = read_file(get_slf_postcode_path())
) %>%
fill_gpprac_geographies(
slf_gpprac_lookup = read_file(
get_slf_gpprac_path(),
col_select = c("gpprac", "cluster", "hbpraccode")
)
)
fill_postcode_geogs(slf_pc_lookup) %>%
fill_gpprac_geographies(slf_gpprac_lookup)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Lucy! No need for me to do this on my branch then.


cli::cli_alert_info("Fill geographies function finished at {Sys.time()}")
cli::cli_alert_info("Fill geographies function finished at {Sys.time()}") # TODO: Is this being kept or changed with a logger_utils function?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy for us to change it to a logger message. When I made the change of our logs to logger, I changed some of the cli:: alerts to logger since some of them sometimes don't even print in the console so I'm happy for us to change it to logger::log_info instead.

return(data)
}
Expand Down
151 changes: 150 additions & 1 deletion R/get_lookup_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ get_lookups_dir <- function() {
#'
#' @inheritParams get_file_path
#'
#' @return An [fs::path()] to the Scottish Postcode Directory
#' @return An [fs::path()] to the HSCP Localities file
#' @export
#'
#' @family lookup file paths
Expand All @@ -37,6 +37,50 @@ get_locality_path <- function(file_name = NULL, ext = "rds") {
}


#' Locality data
#'
#' @description Return the data for centrally held HSCP Localities file.
#'
#' @param denodo_connect Connection to denodo
#' @param file_path Path to local HSCP Localities file
#' @param BYOC_MODE BYOC MODE
#'
#' @return a [tibble][tibble::tibble-package].
#' @export
#'
#' @family lookup files
get_locality_data <- function(denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_locality_path(),
BYOC_MODE) {

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need file_path = get_locality_path() and denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE) as arguments? denodo_connect is only used if BYOC_MODE = TRUE and file_path is only used if BYOC_MODE = FALSE so we could just have get_locality_data <- function(BYOC_MODE) then define file_path and denodo_connect within the ifelse statement? They always take the same value so is there a benefit to having them as arguments? There may be other reasons to keep them that I have not thought about.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment also applies to get_simd_data() and get_datazone_pop_data().

if (isTRUE(BYOC_MODE)) {
log_slf_event(stage = "read", status = "start", type = "HSCP Localities Lookup", year = "all") # TODO: Check whether to add hscp_locality to log_slf_event mapping list

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add add hscp_locality / simd / datazone_pop to log_slf_event mapping list?

"hscp_locality" ~ "HSCP Localities Lookup",
"simd" ~ "SIMD Lookup",
"datazone_pop" ~ "DataZone Population Lookup"

on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)

locality_data <- dplyr::tbl(
denodo_connect,
dbplyr::in_schema("sdl", "sdl_hscp_locality_source") # TODO: Check table name
) %>%
dplyr::select(
locality = "hscp_locality",
tidyselect::matches("datazone\\d{4}$")
) %>% # TODO: Check whether we need to select columns

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to select columns when data is read in (either from Denodo or disk)?Columns could be selected when the data is read in or when it is used.

NOTE: For get_simd_data() the column names within the file will change when a new version of the SIMD is released.

@LucyEmma22 LucyEmma22 Jun 2, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment also applies to get_simd_data() and get_datazone_pop_data().

collect()

log_slf_event(stage = "read", status = "complete", type = "HSCP Localities Lookup", year = "all") # TODO: Check whether to add hscp_locality to log_slf_event mapping list
} else { # TODO: Check logic - are we reading the local file when BYOC_MODE = FALSE or are we still reading from Denodo?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we reading the local file when BYOC_MODE = FALSE or are we still reading from Denodo? read_extract_XXX reads from denodo when BYOC_MODE = FALSE but get_uk_postcode_data and get_spd_data read from disk when BYOC_MODE = FALSE.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment also applies to get_simd_data() and get_datazone_pop_data().

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

locality_data <- read_file(file_path) %>%
dplyr::select(
locality = "hscp_locality",
tidyselect::matches("datazone\\d{4}$")
) # TODO: Check whether we need to select columns
}

return(locality_data)
}


#' Scottish Postcode Directory File Path
#'
#' @description Get the path to the centrally held Scottish Postcode Directory
Expand Down Expand Up @@ -95,6 +139,63 @@ get_simd_path <- function(file_name = NULL, ext = "parquet") {
}


#' SIMD data
#'
#' @description Return the data for centrally held Scottish Index of Multiple
#' Deprivation (SIMD) file.
#'
#' @param denodo_connect Connection to denodo
#' @param file_path Path to local SIMD file
#' @param BYOC_MODE BYOC MODE
#'
#' @return a [tibble][tibble::tibble-package].
#' @export
#'
#' @family lookup files
get_simd_data <- function(denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_simd_path(),
BYOC_MODE) {
if (isTRUE(BYOC_MODE)) {
log_slf_event(stage = "read", status = "start", type = "SIMD Lookup", year = "all") # TODO: Check whether to add simd to log_slf_event mapping list

on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)

simd_data <- dplyr::tbl(
denodo_connect,
dbplyr::in_schema("sdl", "sdl_simd_source") # TODO: Check table name
) %>%
dplyr::select(
"pc7",
"simd2020v2_rank",
"simd2020v2_sc_decile",
"simd2020v2_sc_quintile",
"simd2020v2_hb2019_decile",
"simd2020v2_hb2019_quintile",
"simd2020v2_hscp2019_decile",
"simd2020v2_hscp2019_quintile"
) %>% # TODO: Check whether we need to select columns. When a new version of the SIMD is released, the column names within the file will change.
collect()

log_slf_event(stage = "read", status = "complete", type = "SIMD Lookup", year = "all") # TODO: Check whether to add simd to log_slf_event mapping list
} else { # TODO: Check logic - are we reading the local file when BYOC_MODE = FALSE or are we still reading from Denodo?

simd_data <- read_file(file_path) %>%
dplyr::select(
"pc7",
"simd2020v2_rank",
"simd2020v2_sc_decile",
"simd2020v2_sc_quintile",
"simd2020v2_hb2019_decile",
"simd2020v2_hb2019_quintile",
"simd2020v2_hscp2019_decile",
"simd2020v2_hscp2019_quintile"
) # TODO: Check whether we need to select columns. When a new version of the SIMD is released, the column names within the file will change.
}

return(simd_data)
}


#' Populations File Path for different types
#'
#' @description Get the path to the populations estimates
Expand Down Expand Up @@ -138,6 +239,54 @@ get_pop_path <- function(file_name = NULL,
}


#' DataZone population data
#'
#' @description Return the data for DataZone population estimates.
#'
#' @param denodo_connect Connection to denodo
#' @param file_path Path to local DataZone population file
#' @param BYOC_MODE BYOC MODE
#'
#' @return a [tibble][tibble::tibble-package].
#' @export
#'
#' @family lookup files
get_datazone_pop_data <- function(denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE),
file_path = get_pop_path(type = "datazone"),
BYOC_MODE) {
if (isTRUE(BYOC_MODE)) {
log_slf_event(stage = "read", status = "start", type = "DataZone Population Lookup", year = "all") # TODO: Check whether to add datazone_pop to log_slf_event mapping list

on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE)

datazone_pop_data <- dplyr::tbl(
denodo_connect,
dbplyr::in_schema("sdl", "sdl_datazone_population_source") # TODO: Check table name
) %>%
dplyr::select(
"year",
"datazone2011",
"sex",
dplyr::starts_with("age")
) %>% # TODO: Check whether we need to select columns
collect()

log_slf_event(stage = "read", status = "complete", type = "DataZone Population Lookup", year = "all") # TODO: Check whether to add datazone_pop to log_slf_event mapping list
} else { # TODO: Check logic - are we reading the local file when BYOC_MODE = FALSE or are we still reading from Denodo?

datazone_pop_data <- read_file(file_path) %>%
dplyr::select(
"year",
"datazone2011",
"sex",
dplyr::starts_with("age")
) # TODO: Check whether we need to select columns
}

return(datazone_pop_data)
}


#' GP Practice Reference File Path (gpprac)
#'
#' @description Get the path for the centrally held reference file `gpprac`
Expand Down
24 changes: 17 additions & 7 deletions R/get_slf_lookup_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,23 @@
#' @export
#' @family slf lookup file path
#' @seealso [get_file_path()] for the generic function.
get_slf_postcode_path <- function(update = latest_update(), ...) {
get_file_path(
directory = fs::path(get_slf_dir(), "Lookups"),
file_name = stringr::str_glue("source_postcode_lookup_{update}"),
ext = "parquet",
...
)
get_slf_postcode_path <- function(update = latest_update(), BYOC_MODE, ...) { # TODO: Check whether to keep the update argument

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the update = latest_update() argument be kept? I seem to remember it being removed from somewhere else.

file_name <- stringr::str_glue("source_postcode_lookup_{update}.parquet")

if (BYOC_MODE) {
slf_postcode_path <- file.path(
directory = denodo_output_path(),
file_name = file_name
)
} else {
slf_postcode_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Lookups"),
file_name = file_name,
...
)
}
return(slf_postcode_path)
}

#' get uk postcode list file path
Expand Down
Loading