diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R index d1bcaec9f..fa3efc6b0 100644 --- a/R/process_extract_ae.R +++ b/R/process_extract_ae.R @@ -12,7 +12,13 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_ae <- function(data, year, write_to_disk = TRUE) { +process_extract_ae <- function(data, + year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA) { log_slf_event(stage = "process", status = "start", type = "ae", year = year) # Only run for a single year @@ -200,35 +206,35 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { # Read in data--------------------------------------- - ae_cup_file <- read_file( - path = get_boxi_extract_path(year, "ae_cup"), - col_type = readr::cols( - "ED Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"), - "ED Arrival Time" = readr::col_time(""), - "ED Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"), - "ED Discharge Time" = readr::col_time(""), - "ED Case Reference Number [C]" = readr::col_character(), - "CUP Marker" = readr::col_double(), - "CUP Pathway Name" = readr::col_character() - ) - ) %>% - # rename variables - dplyr::rename( - record_keydate1 = "ED Arrival Date", - keytime1 = "ED Arrival Time", - record_keydate2 = "ED Discharge Date", - keytime2 = "ED Discharge Time", - case_ref_number = "ED Case Reference Number [C]", - cup_marker = "CUP Marker", - cup_pathway = "CUP Pathway Name" - ) + c_year_cup <- convert_fyyear_to_year(check_year_format(year)) + on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE) + + ae_cup_file <- dplyr::tbl( + denodo_connect, + dbplyr::in_schema("sdl", "sdl_ae_ucd_cup_source") + ) %>% + dplyr::filter( + ed_arrival_financial_year == c_year_cup, + (significant_facility_code == "32" | is.na(significant_facility_code)) + ) %>% + dplyr::select( + record_keydate1 = "ed_arrival_date", + keytime1 = "ed_arrival_time", + record_keydate2 = "ed_discharge_date", + keytime2 = "ed_discharge_time", + case_ref_number = "ed_case_reference_number", + cup_marker = "cup_marker", + cup_pathway = "cup_pathway_name" + ) %>% + dplyr::collect() # Data Cleaning--------------------------------------- ae_cup_clean <- ae_cup_file %>% # Remove any duplicates - dplyr::distinct(.data$record_keydate1, + dplyr::distinct( + .data$record_keydate1, .data$keytime1, .data$record_keydate2, .data$keytime2, @@ -236,7 +242,6 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { .keep_all = TRUE ) - # Join data-------------------------------------------- matched_ae_data <- outfile %>% @@ -256,7 +261,13 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { ) ae_processed <- matched_ae_data %>% + dplyr::mutate( + run_id = run_id, + run_date_time = run_date_time + ) %>% dplyr::select( + "run_id", + "run_date_time", "year", "recid", "smrtype", @@ -312,7 +323,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { if (write_to_disk) { write_file( ae_processed, - get_source_extract_path(year, "ae", check_mode = "write"), + get_source_extract_path(year, "ae", check_mode = "write", BYOC_MODE = BYOC_MODE), + BYOC_MODE = BYOC_MODE, group_id = 3356 # sourcedev owner ) } diff --git a/R/read_extract_ae.R b/R/read_extract_ae.R index 708d743ab..d4d91fcab 100644 --- a/R/read_extract_ae.R +++ b/R/read_extract_ae.R @@ -6,89 +6,71 @@ #' read_extract_ae <- function( year, - file_path = get_boxi_extract_path(year = year, type = "ae") + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE ) { log_slf_event(stage = "read", status = "start", type = "ae", year = year) - extract_ae <- read_file(file_path, - col_type = readr::cols( - "Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"), - "DAT Date" = readr::col_date(format = "%Y/%m/%d %T"), - "anon_chi" = readr::col_character(), - "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"), - "Pat Gender Code" = readr::col_double(), - "NHS Board of Residence Code - current" = readr::col_character(), - "Treatment NHS Board Code - current" = readr::col_character(), - "Treatment Location Code" = readr::col_character(), - "GP Practice Code" = readr::col_character(), - "Council Area Code" = readr::col_character(), - "Postcode (epi) [C]" = readr::col_character(), - "Postcode (CHI) [C]" = readr::col_character(), - "HSCP of Residence Code - current" = readr::col_character(), - "Arrival Time" = readr::col_time(""), - "DAT Time" = readr::col_time(""), - "Arrival Mode Code" = readr::col_character(), - "Referral Source Code" = readr::col_character(), - "Attendance Category Code" = readr::col_character(), - "Discharge Destination Code" = readr::col_character(), - "Patient Flow Code" = readr::col_double(), - "Place of Incident Code" = readr::col_character(), - "Reason for Wait Code" = readr::col_character(), - "Disease 1 Code" = readr::col_character(), - "Disease 2 Code" = readr::col_character(), - "Disease 3 Code" = readr::col_character(), - "Bodily Location Of Injury Code" = readr::col_character(), - "Alcohol Involved Code" = readr::col_character(), - "Alcohol Related Admission" = readr::col_character(), - "Substance Misuse Related Admission" = readr::col_character(), - "Falls Related Admission" = readr::col_character(), - "Self Harm Related Admission" = readr::col_character(), - "Total Net Costs" = readr::col_double(), - "Age at Midpoint of Financial Year" = readr::col_double(), - "Case Reference Number" = readr::col_character(), - "Significant Facility Code" = readr::col_character(), - "Community Hospital Flag" = readr::col_character(), - ) + year <- check_year_format(year, format = "fyyear") + c_year <- convert_fyyear_to_year(year) + + # Specify years available for running + if (file_path == get_dummy_boxi_extract_path()) { + return(tibble::tibble()) + } + + on.exit(try(DBI::dbDisconnect(denodo_connect), silent = TRUE), add = TRUE) + + # Read Extract + extract_ae <- dplyr::tbl( + denodo_connect, + dbplyr::in_schema("sdl", "sdl_ae2_episode_level_source") ) %>% - # rename variables - dplyr::rename( - record_keydate1 = "Arrival Date", - record_keydate2 = "DAT Date", - dob = "Pat Date Of Birth [C]", - postcode_epi = "Postcode (epi) [C]", - postcode_chi = "Postcode (CHI) [C]", - age = "Age at Midpoint of Financial Year", - ae_alcohol = "Alcohol Involved Code", - alcohol_adm = "Alcohol Related Admission", - ae_arrivalmode = "Arrival Mode Code", - keytime1 = "Arrival Time", - ae_attendcat = "Attendance Category Code", - ae_bodyloc = "Bodily Location Of Injury Code", - lca = "Council Area Code", - ae_disdest = "Discharge Destination Code", - keytime2 = "DAT Time", - diag1 = "Disease 1 Code", - diag2 = "Disease 2 Code", - diag3 = "Disease 3 Code", - falls_adm = "Falls Related Admission", - gpprac = "GP Practice Code", - hscp = "HSCP of Residence Code - current", - hbrescode = "NHS Board of Residence Code - current", - hbtreatcode = "Treatment NHS Board Code - current", - anon_chi = "anon_chi", - gender = "Pat Gender Code", - ae_patflow = "Patient Flow Code", - ae_placeinc = "Place of Incident Code", - ae_reasonwait = "Reason for Wait Code", - refsource = "Referral Source Code", - selfharm_adm = "Self Harm Related Admission", - submis_adm = "Substance Misuse Related Admission", - sigfac = "Significant Facility Code", - cost_total_net = "Total Net Costs", - location = "Treatment Location Code", - case_ref_number = "Case Reference Number", - commhosp = "Community Hospital Flag" - ) + dplyr::filter( + arrival_financial_year == c_year, + significant_facility_code == "32" | is.na(significant_facility_code) + ) %>% + dplyr::select( + record_keydate1 = "arrival_date", + record_keydate2 = "dat_date", + keytime1 = "arrival_time", + keytime2 = "dat_time", + chi = "patient_chi", + gender = "patient_sex", + dob = "patient_dob", + gpprac = "gp_practice_code", + lca = "council_area_code", + hscp = "hscp_of_residence_code_curr", + location = "treatment_location_code", + hbrescode = "nhs_board_of_residence_code_curr", + hbtreatcode = "treatment_nhs_board_code_curr", + diag1 = "disease_1_code", + diag2 = "disease_2_code", + diag3 = "disease_3_code", + ae_arrivalmode = "arrival_mode_code", + refsource = "referral_source_code", + sigfac = "significant_facility_code", + ae_attendcat = "attendance_category_code", + ae_disdest = "discharge_destination_code", + ae_patflow = "patient_flow_code", + ae_placeinc = "place_of_incident_code", + ae_reasonwait = "reason_for_wait_code", + ae_bodyloc = "bodily_location_of_injury_code", + ae_alcohol = "alcohol_involved_code", + alcohol_adm = "alcohol_related_admission", + submis_adm = "substance_misuse_related_admission", + falls_adm = "falls_related_admission", + selfharm_adm = "self_harm_related_admission", + cost_total_net = "total_net_cost", + age = "age_at_midpoint_of_financial_year", + case_ref_number = "case_reference_number", + postcode_epi = "postcode_epi", + postcode_chi = "postcode_chi", + commhosp = "community_hospital_flag" + ) %>% + dplyr::collect() %>% + slfhelper::get_anon_chi("chi") log_slf_event(stage = "read", status = "complete", type = "ae", year = year) diff --git a/SDL_process/dummy_targets.R b/SDL_process/dummy_targets.R index 24549bbb8..632a40c06 100644 --- a/SDL_process/dummy_targets.R +++ b/SDL_process/dummy_targets.R @@ -82,30 +82,30 @@ list( ## Stage 2.1 non-specific targets ---- - ### IT CHI deaths Activity ---- - # READ - IT CHI deaths - tar_target( - # Target name - it_chi_deaths_extract, - read_it_chi_deaths( - denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), - file_path = get_it_deaths_path(BYOC_MODE = BYOC_MODE), - BYOC_MODE = BYOC_MODE - ) - ), - # PROCESS - IT CHI deaths - tar_target( - # Target name - it_chi_deaths_data, - # Function - process_it_chi_deaths( - data = it_chi_deaths_extract, - write_to_disk = write_to_disk, - BYOC_MODE = BYOC_MODE, - run_id = run_id, - run_date_time = run_date_time - ) - ), + # ### IT CHI deaths Activity ---- + # # READ - IT CHI deaths + # tar_target( + # # Target name + # it_chi_deaths_extract, + # read_it_chi_deaths( + # denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + # file_path = get_it_deaths_path(BYOC_MODE = BYOC_MODE), + # BYOC_MODE = BYOC_MODE + # ) + # ), + # # PROCESS - IT CHI deaths + # tar_target( + # # Target name + # it_chi_deaths_data, + # # Function + # process_it_chi_deaths( + # data = it_chi_deaths_extract, + # write_to_disk = write_to_disk, + # BYOC_MODE = BYOC_MODE, + # run_id = run_id, + # run_date_time = run_date_time + # ) + # ), ### Long-Term Conditions (LTCs) Activity ---- # # READ - LTCs @@ -118,23 +118,52 @@ list( # ), # ### NRS BOXI Deaths ---- - # PROCESS - Refined deaths - combine all NRS death data into a lookup - tar_target( - refined_death_data, - process_refined_death( - it_chi_deaths = it_chi_deaths_data, - write_to_disk = write_to_disk, - BYOC_MODE = BYOC_MODE, - run_id = run_id, - run_date_time = run_date_time - ) - ), + # # PROCESS - Refined deaths - combine all NRS death data into a lookup + # tar_target( + # refined_death_data, + # process_refined_death( + # it_chi_deaths = it_chi_deaths_data, + # write_to_disk = write_to_disk, + # BYOC_MODE = BYOC_MODE, + # run_id = run_id, + # run_date_time = run_date_time + # ) + # ), ## Stage 2.2 year specific targets ---- tar_map( list(year = years_to_run), + # Accident & Emergency (AE2) activity -------------------------------------- + # READ - A&E + tar_target( + # Target name + ae_data, + # Function + read_extract_ae( + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE = BYOC_MODE + ) + ), + # PROCESS - A&E + tar_target( + # Target name + source_ae_extract, + # Function + process_extract_ae( + data = ae_data, + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = write_to_disk, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time + ) + ) + ### Maternity (SMR02) Acitivity---- # # READ - Maternity # tar_target( @@ -164,46 +193,46 @@ list( ### Mental Health (SMR02) Activity ---- # READ - Mental Health - tar_target( - mental_health_data, - read_extract_mental_health( - year = year, - denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), - file_path = get_boxi_extract_path( - year = year, - type = "mh", - BYOC_MODE = BYOC_MODE - ), - BYOC_MODE = BYOC_MODE - ) - ), - # PROCESS - Mental Health - tar_target( - # Target name - source_mental_health_extract, - process_extract_mental_health( - mental_health_data, - year = year, - write_to_disk = write_to_disk, - BYOC_MODE = BYOC_MODE, - run_id = run_id, - run_date_time = run_date_time - ) - ), + # tar_target( + # mental_health_data, + # read_extract_mental_health( + # year = year, + # denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + # file_path = get_boxi_extract_path( + # year = year, + # type = "mh", + # BYOC_MODE = BYOC_MODE + # ), + # BYOC_MODE = BYOC_MODE + # ) + # ), + # # PROCESS - Mental Health + # tar_target( + # # Target name + # source_mental_health_extract, + # process_extract_mental_health( + # mental_health_data, + # year = year, + # write_to_disk = write_to_disk, + # BYOC_MODE = BYOC_MODE, + # run_id = run_id, + # run_date_time = run_date_time + # ) + # ), ### Death Activity ---- # PROCESS - Deaths - tar_target( - # Target name - source_nrs_deaths_extract, - # use this anonymous function with redundant but necessary refined_death - # to make sure reading year-specific NRS deaths extracts after it is produced - (\(year, refined_death_data) { - arrow::read_parquet(get_source_extract_path(year, "nrs_deaths", BYOC_MODE = BYOC_MODE)) %>% - as.data.frame() - })(year, refined_death_data) - ) + # tar_target( + # # Target name + # source_nrs_deaths_extract, + # # use this anonymous function with redundant but necessary refined_death + # # to make sure reading year-specific NRS deaths extracts after it is produced + # (\(year, refined_death_data) { + # createslf::read_file(get_source_extract_path(year, "nrs_deaths", BYOC_MODE = BYOC_MODE)) %>% + # as.data.frame() + # })(year, refined_death_data) + # ) # # TESTS - Deaths # tar_target( diff --git a/SDL_process/run_sdl.r b/SDL_process/run_sdl.r index f0f516231..7a9ba4783 100644 --- a/SDL_process/run_sdl.r +++ b/SDL_process/run_sdl.r @@ -92,10 +92,7 @@ year <- "1920" byoc_output_files <- get_byoc_output_files( year, types = c( - "mh", - "nrs_deaths", - "combined_deaths", - "chi_deaths" + "ae" ) ) # using homelessness for test purpose. When development is complete, @@ -118,8 +115,4 @@ tryCatch( ) logger::log_info("Targets finished.") -# Episode file ---- - -# Individual file ---- - logger::log_info("Run SDL ended.") diff --git a/_targets.R b/_targets.R index a3aafa1b1..b83ed4e15 100644 --- a/_targets.R +++ b/_targets.R @@ -496,12 +496,16 @@ list( ), # Accident & Emergency (AE2) activity -------------------------------------- # READ - A&E - tar_file_read( + tar_target( # Target name ae_data, - get_boxi_extract_path(year, type = "ae"), # Function - read_extract_ae(year, !!.x) + read_extract_ae( + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE = BYOC_MODE + ) ), # PROCESS - A&E tar_target( @@ -509,9 +513,13 @@ list( source_ae_extract, # Function process_extract_ae( - ae_data, - year, - write_to_disk = write_to_disk + data = ae_data, + year = year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = write_to_disk, + BYOC_MODE = BYOC_MODE, + run_id = run_id, + run_date_time = run_date_time ) ), # TESTS - A&E diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd index dfcaf4121..c2c56f3e5 100644 --- a/man/process_extract_ae.Rd +++ b/man/process_extract_ae.Rd @@ -4,7 +4,15 @@ \alias{process_extract_ae} \title{Process the A&E extract} \usage{ -process_extract_ae(data, year, write_to_disk = TRUE) +process_extract_ae( + data, + year, + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + write_to_disk = TRUE, + BYOC_MODE = FALSE, + run_id = NA, + run_date_time = NA +) } \arguments{ \item{data}{The extract to process} diff --git a/man/read_extract_ae.Rd b/man/read_extract_ae.Rd index 1a15efbc1..390842e54 100644 --- a/man/read_extract_ae.Rd +++ b/man/read_extract_ae.Rd @@ -6,7 +6,9 @@ \usage{ read_extract_ae( year, - file_path = get_boxi_extract_path(year = year, type = "ae") + denodo_connect = get_denodo_connection(BYOC_MODE = BYOC_MODE), + file_path = get_boxi_extract_path(year = year, type = "ae", BYOC_MODE), + BYOC_MODE ) } \arguments{