Skip to content

Commit fb7aa62

Browse files
committed
Update documentation links and modify GBD data preparation scripts
- Changed documentation and source code links in README.md to reflect new repository location. - Updated R script for GBD data preparation to use the 2023 age metadata and adjusted output directory structure. - Enhanced data merging processes for malaria and dengue, including new calculations for age-specific results. - Added new Jupyter notebook for raking processes and created a new script for raking A2 to GBD data.
1 parent a0189c5 commit fb7aa62

5 files changed

Lines changed: 529 additions & 54 deletions

File tree

README.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22

33
---
44

5-
**Documentation**: [https://bcreiner.github.io/idd-forecast-mbp](https://bcreiner.github.io/idd-forecast-mbp)
5+
**Documentation**: [https://ihmeuw.github.io/idd-forecast-mbp](https://ihmeuw.github.io/idd-forecast-mbp)
66

7-
**Source Code**: [https://github.com/bcreiner/idd-forecast-mbp](https://github.com/bcreiner/idd-forecast-mbp)
8-
9-
**PyPI**: [https://pypi.org/project/idd-forecast-mbp/](https://pypi.org/project/idd-forecast-mbp/)
7+
**Source Code**: [https://github.com/ihmeuw/idd-forecast-mbp](https://github.com/ihmeuw/idd-forecast-mbp)
108

119
---
1210

Untitled-1.ipynb

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "53fa159f",
6+
"metadata": {},
7+
"source": [
8+
"## Raking "
9+
]
10+
},
11+
{
12+
"cell_type": "markdown",
13+
"id": "33f07fac",
14+
"metadata": {},
15+
"source": [
16+
"### Get GBD Values\n",
17+
"\n",
18+
"Malaria\n",
19+
" - pf_mort\n",
20+
" - pf_inc\n",
21+
" - pfpr? It isn't 2-10... and no guarantee if i got 2-10 it would match... Not sure I can rake malaria\n",
22+
"\n",
23+
"DENV\n",
24+
" - incidence\n",
25+
" - mortality\n",
26+
"\n",
27+
"Need\n",
28+
"- all-age for all location/years\n",
29+
"- age-specific for (?) global (?) SR/ years (for age distribution)..."
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"id": "c85a8dc6",
35+
"metadata": {},
36+
"source": [
37+
"### Malaria"
38+
]
39+
}
40+
],
41+
"metadata": {
42+
"language_info": {
43+
"name": "python"
44+
}
45+
},
46+
"nbformat": 4,
47+
"nbformat_minor": 5
48+
}

src/idd_forecast_mbp/data_prep/get_gbd_data.r

Lines changed: 72 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,16 @@ source("/ihme/cc_resources/libraries/current/r/get_location_metadata.R")
77
source("/ihme/cc_resources/libraries/current/r/get_demographics.R")
88
source("/ihme/cc_resources/libraries/current/r/get_ids.R")
99
source("/ihme/cc_resources/libraries/current/r/get_sequela_metadata.R")
10-
source("/ihme/cc_resources/libraries/current/r/get_age_metadata.R")
10+
source("/ihme/cc_resources/libraries/current/r/get_gbd_2023_age_metadata.R")
1111
source("/ihme/cc_resources/libraries/current/r/get_covariate_estimates.R")
1212
source("/ihme/cc_resources/libraries/current/r/get_population.R")
1313

1414
# Output
15-
output_dir <- "/mnt/team/idd/pub/forecast-mbp/01-raw_data"
15+
output_dir <- "/mnt/team/idd/pub/forecast-mbp/01-raw_data/gbd"
16+
# Make sure the directory exists and create it if it doesn't
17+
if (!dir.exists(output_dir)) {
18+
dir.create(output_dir, recursive = TRUE)
19+
}
1620

1721
release_id <- 16
1822
como_v <- 1591
@@ -26,7 +30,7 @@ past_years = 1970:2023
2630

2731
# GBD Constants
2832
release_id_2021 = 9
29-
release_id_2023 = 16
33+
gbd_2023_release_id = 16
3034

3135
gbd_location_set_id = 35
3236
fhs_location_set_id = 39
@@ -43,32 +47,41 @@ sexes = 3
4347
dengue_id = 357
4448
malaria_id = 345
4549

50+
# Get age-meta-data
51+
gbd_2023_age_metadata <- as.data.frame(get_gbd_2023_age_metadata(release_id = gbd_2023_release_id))
52+
gbd_2023_age_metadata <- gbd_2023_age_metadata[, c("age_group_id", "age_group_years_start", "age_group_years_end", "age_group_name")]
53+
new_rows <- data.frame(age_group_id = c(1,22),
54+
age_group_years_start = c(0, 0),
55+
age_group_years_end = c(5, 125),
56+
age_group_name = c("Under 5", "All age"))
57+
gbd_2023_age_metadata <- rbind(gbd_2023_age_metadata, new_rows)
58+
write.csv(gbd_2023_age_metadata, glue("{output_dir}/gbd_2023_age_metadata.csv"), row.names = FALSE)
4659

4760
# Get hierarchy
48-
gbd_modeling_hierarchy_2023 <- as.data.frame(get_location_metadata(location_set_id = gbd_location_set_id, release_id=release_id_2023))
49-
fhs_modeling_hierarchy_2023 <- as.data.frame(get_location_metadata(location_set_id = fhs_location_set_id, release_id=release_id_2023))
61+
gbd_2023_modeling_hierarchy <- as.data.frame(get_location_metadata(location_set_id = gbd_location_set_id, release_id=gbd_2023_release_id))
62+
fhs_2023_modeling_hierarchy <- as.data.frame(get_location_metadata(location_set_id = fhs_location_set_id, release_id=gbd_2023_release_id))
5063

5164
col_names_to_delete <- c("start_date", "end_date", "date_inserted", "last_updated", "last_updated_by", "last_updated_action")
52-
toss_col_locs <- which(names(gbd_modeling_hierarchy_2023) %in% col_names_to_delete)
65+
toss_col_locs <- which(names(gbd_2023_modeling_hierarchy) %in% col_names_to_delete)
5366

54-
gbd_modeling_hierarchy_2023 <- gbd_modeling_hierarchy_2023[,-toss_col_locs]
55-
fhs_modeling_hierarchy_2023 <- fhs_modeling_hierarchy_2023[,-toss_col_locs]
67+
gbd_2023_modeling_hierarchy <- gbd_2023_modeling_hierarchy[,-toss_col_locs]
68+
fhs_2023_modeling_hierarchy <- fhs_2023_modeling_hierarchy[,-toss_col_locs]
5669

5770

58-
write.csv(gbd_modeling_hierarchy_2023, glue("{output_dir}/gbd_modeling_hierarchy_2023.csv"), row.names = FALSE)
59-
write.csv(fhs_modeling_hierarchy_2023, glue("{output_dir}/fhs_modeling_hierarchy_2023.csv"), row.names = FALSE)
71+
write.csv(gbd_2023_modeling_hierarchy, glue("{output_dir}/gbd_2023_modeling_hierarchy.csv"), row.names = FALSE)
72+
write.csv(fhs_2023_modeling_hierarchy, glue("{output_dir}/fhs_2023_modeling_hierarchy.csv"), row.names = FALSE)
6073

6174

6275
# Get population
63-
gbd_population_2023 = as.data.frame(get_population(age_group_id = ages,
64-
release_id = release_id_2023,
76+
gbd_2023_population = as.data.frame(get_population(age_group_id = ages,
77+
release_id = gbd_2023_release_id,
6578
year_id = past_years,
66-
location_id = gbd_modeling_hierarchy_2023$location_id,
79+
location_id = gbd_2023_modeling_hierarchy$location_id,
6780
sex_id = sexes))
6881

69-
gbd_population_2023 <- gbd_population_2023[ ,c("age_group_id", "location_id", "year_id", "sex_id", "population")]
82+
gbd_2023_population <- gbd_2023_population[ ,c("age_group_id", "location_id", "year_id", "sex_id", "population")]
7083

71-
write.csv(gbd_population_2023, glue("{output_dir}/gbd_population_2023.csv"), row.names = FALSE)
84+
write.csv(gbd_2023_population, glue("{output_dir}/gbd_2023_population.csv"), row.names = FALSE)
7285

7386
####
7487
## Dengue
@@ -78,18 +91,35 @@ write.csv(gbd_population_2023, glue("{output_dir}/gbd_population_2023.csv"), row
7891
cause_df <- as.data.frame(get_outputs("cause", cause_id = dengue_id,
7992
measure_id = 1:6, #prev =5 , inc =6 , deaths =1 , dalys =2 , ylds = 3, ylls = 4
8093
year_id = past_years,
81-
location_id = gbd_modeling_hierarchy_2023$location_id,
82-
age_group_id = ages,
83-
release_id = release_id_2023,
94+
location_id = gbd_2023_modeling_hierarchy$location_id,
95+
age_group_id = 22,
96+
release_id = gbd_2023_release_id,
8497
metric_id = c(1,3), #rate =3, counts =1
8598
sex_id = sexes, # males =1, females =2, both =3
8699
compare_version_id = compare_v_2023))
87100

88101
# Merge location hierarchy
89-
cause_df <- merge(cause_df, gbd_modeling_hierarchy_2023, all.x = TRUE, sort = FALSE)
90-
cause_df <- merge(cause_df, gbd_population_2023, all.x = TRUE, sort = FALSE)
102+
cause_df <- merge(cause_df, gbd_2023_modeling_hierarchy, all.x = TRUE, sort = FALSE)
103+
cause_df <- merge(cause_df, gbd_2023_population, all.x = TRUE, sort = FALSE)
91104

92-
write.csv(cause_df, glue("{output_dir}/gbd_dengue_aa_2023.csv"), row.names = FALSE)
105+
write.csv(cause_df, glue("{output_dir}/gbd_2023_dengue_aa.csv"), row.names = FALSE)
106+
107+
# age-specific
108+
cause_df <- as.data.frame(get_outputs("cause", cause_id = dengue_id,
109+
measure_id = c(1,6) #prev =5 , inc =6 , deaths =1 , dalys =2 , ylds = 3, ylls = 4
110+
year_id = past_years,
111+
location_id = gbd_2023_modeling_hierarchy$location_id,
112+
age_group_id = gbd_2023_age_metadata$age_group_id,
113+
release_id = gbd_2023_release_id,
114+
metric_id = c(1,3), #rate =3, counts =1
115+
sex_id = sexes, # males =1, females =2, both =3
116+
compare_version_id = compare_v_2023))
117+
118+
# Merge location hierarchy
119+
cause_df <- merge(cause_df, gbd_2023_modeling_hierarchy, all.x = TRUE, sort = FALSE)
120+
cause_df <- merge(cause_df, gbd_2023_population, all.x = TRUE, sort = FALSE)
121+
122+
write.csv(cause_df, glue("{output_dir}/gbd_2023_dengue_as.csv"), row.names = FALSE)
93123

94124
####
95125
## Malaria
@@ -99,18 +129,33 @@ write.csv(cause_df, glue("{output_dir}/gbd_dengue_aa_2023.csv"), row.names = FAL
99129
cause_df <- as.data.frame(get_outputs("cause", cause_id = malaria_id,
100130
measure_id = 1:6, #prev =5 , inc =6 , deaths =1 , dalys =2 , ylds = 3, ylls = 4
101131
year_id = past_years,
102-
location_id = gbd_modeling_hierarchy_2023$location_id,
103-
age_group_id = ages,
104-
release_id = release_id_2023,
132+
location_id = gbd_2023_modeling_hierarchy$location_id,
133+
age_group_id = 22,
134+
release_id = gbd_2023_release_id,
105135
metric_id = c(1,3), #rate =3, counts =1
106136
sex_id = sexes, # males =1, females =2, both =3
107137
compare_version_id = compare_v_2023))
108138

109139
# Merge location hierarchy
110-
cause_df <- merge(cause_df, gbd_modeling_hierarchy_2023, all.x = TRUE, sort = FALSE)
111-
cause_df <- merge(cause_df, gbd_population_2023, all.x = TRUE, sort = FALSE)
140+
cause_df <- merge(cause_df, gbd_2023_modeling_hierarchy, all.x = TRUE, sort = FALSE)
141+
cause_df <- merge(cause_df, gbd_2023_population, all.x = TRUE, sort = FALSE)
142+
143+
write.csv(cause_df, glue("{output_dir}/gbd_2023_malaria_aa.csv"), row.names = FALSE)
112144

113-
write.csv(cause_df, glue("{output_dir}/gbd_malaria_aa_2023.csv"), row.names = FALSE)
145+
# Age-specific results
146+
cause_df <- as.data.frame(get_outputs("cause", cause_id = malaria_id,
147+
measure_id = c(1,6) , #prev =5 , inc =6 , deaths =1 , dalys =2 , ylds = 3, ylls = 4
148+
year_id = past_years,
149+
location_id = gbd_2023_modeling_hierarchy$location_id,
150+
age_group_id = gbd_2023_age_metadata$age_group_id,
151+
release_id = gbd_2023_release_id,
152+
metric_id = c(1,3), #rate =3, counts =1
153+
sex_id = sexes, # males =1, females =2, both =3
154+
compare_version_id = compare_v_2023))
114155

156+
# Merge location hierarchy
157+
cause_df <- merge(cause_df, gbd_2023_modeling_hierarchy, all.x = TRUE, sort = FALSE)
158+
cause_df <- merge(cause_df, gbd_2023_population, all.x = TRUE, sort = FALSE)
115159

160+
write.csv(cause_df, glue("{output_dir}/gbd_2023_malaria_as.csv"), row.names = FALSE)
116161

0 commit comments

Comments
 (0)