Skip to content

Open interval #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 71 additions & 25 deletions R/life-tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,69 +74,115 @@ lt |>
# Period life table for females in Quebec in 2015
# Data from the Canadian Human Mortality Database
# Read in data from the website, and filter out what we need:
Mx <- read_table(
"http://www.prdh.umontreal.ca/BDLC/data/que/Mx_5x5.txt",
skip = 2, col_types = 'ccddd')

d <- Mx |>
Deaths <- read_table(
"https://www.prdh.umontreal.ca/BDLC/data/ont/Deaths_5x1.txt",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How sustainable is this and all other links to prhd.umontreal.ca ? Could it happen that the data will be gone in, say, 5 years? Or if the university decides to change the link structure? So perhaps we would like to keep the data in the repository?

skip = 2, col_types = 'ccddd'
)
Pop <- read_table(
"https://www.prdh.umontreal.ca/BDLC/data/ont/Population5.txt",
skip = 2, col_types = 'ccddd'
)

Deaths <- Deaths |>
mutate(year = as.numeric(substr(Year, 1, 4))) |>
select(year, Age, Total) |>
clean_names() |>
rename(Mx = total)
head(d)

# `age` is a character
# let's make an age `x` and interval length `n` column:
d <- d |>
mutate(x = as.numeric(str_remove(age, "-.*|\\+")),
n = lead(x, default = Inf) - x) |>
filter(x<105) |> # remove older ages that have varying data availability
select(year, age, x, n, Mx)
head(d)
rename(D = total)
Pop <- Pop |>
mutate(year = as.numeric(substr(Year, 1, 4))) |>
select(year, Age, Total) |>
clean_names() |>
rename(N = total)

# join population and deaths
d <- Deaths |>
left_join(Pop, by = c("year", "age"))

# convert `age` from character to numeric and
# aggregate ages 95+ because older ages have varying data quality
d <- d |>
mutate(x = as.numeric(str_remove(age, "-.*|\\+"))) |>
mutate(x = ifelse(x >= 95, 95, x)) |>
group_by(year, x) |>
summarise(
D = sum(D),
N = sum(N),
.groups = "drop"
) |>
mutate(
age = case_when(
x==0 ~ "0",
x==1 ~ "1-4",
x==95 ~ "95+",
TRUE ~ paste0(x, "-", x+4)
)
) |>
select(year, age, x, N, D)

# Use `tidyverse` to calculate the columns in the life table,
# based on the equations presented in previous sections
# Set radix l0 = 1 and filter to just include the year 2015
lt_2015 <- d |>
filter(year==2015) |>
mutate(
n = lead(x, default = Inf) - x,
Mx = D/N,
ax = case_when(
x==0 ~ 0.07 + 1.7*Mx,
x==1 ~ 1.5,
x==110 ~ 1/Mx,
x==95 ~ 1/Mx,
TRUE ~ 2.5
),
qx = n * Mx / (1 + (n - ax)* Mx),
qx = case_when(
x==95 ~ 1, # set to 1 for open interval
TRUE ~ n * Mx / (1 + (n - ax)* Mx)
),
px = 1 - qx,
lx = lag(cumprod(px), default = 1),
dx = lx - lead(lx, default = 0),
Lx = n * lead(lx, default = 0) + (ax* dx),
Lx = case_when(
x==95 ~ lx/Mx,
TRUE ~ n * lead(lx, default = 0) + (ax* dx)
),
Tx = rev(cumsum(rev(Lx))),
ex = Tx / lx
)
)

head(lt_2015)
# Create Table 2
library(kableExtra)
lt_2015 |>
select(Age = age, x, n, Mx, ax, qx, px, lx, dx, Lx, Tx, ex) |>
kable(format = "latex", booktabs = T,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only this specific table is with format='latex? All other tables in this R script are printed with default output.

Also, perhaps, since we are saving figures to plots folder, we should also save tables as PDF or HTML files that can be (1) recreated and (2) viewed when we run the repo in Binder and run this script.

digits = c(NA, 0, 0, 6, 3, 6, 6, 6, 6, 3, 3, 3))


# Extend this to calculate life tables for every year
# using the `group_by` function:
lt_all_years <- d |>
group_by(year) |>
mutate(
n = lead(x, default = Inf) - x,
Mx = D/N,
ax = case_when(
x==0 ~ 0.07 + 1.7*Mx,
x==1 ~ 1.5,
x==110 ~ 1/Mx,
x==95 ~ 1/Mx,
TRUE ~ 2.5
),
qx = n * Mx / (1 + (n - ax)* Mx),
qx = case_when(
x==95 ~ 1, # set to 1 for open interval
TRUE ~ n * Mx / (1 + (n - ax)* Mx)
),
px = 1 - qx,
lx = lag(cumprod(px), default = 1),
dx = lx - lead(lx, default = 0),
Lx = n * lead(lx, default = 0) + (ax* dx),
Lx = case_when(
x==95 ~ lx/Mx,
TRUE ~ n * lead(lx, default = 0) + (ax* dx)
),
Tx = rev(cumsum(rev(Lx))),
ex = Tx / lx
)
)

head(lt_all_years)

Expand Down