@@ -12,148 +12,57 @@ require(data.table)
1212
1313
1414data_path <- " /mnt/team/idd/pub/forecast-mbp/03-modeling_data"
15- file_path <- glue(" {data_path}/malaria_df .parquet" )
15+ file_path <- glue(" {data_path}/malaria_stage_2_modeling_df .parquet" )
1616lsae_hierarchy_path <- " /mnt/team/rapidresponse/pub/population-model/admin-inputs/raking/gbd-inputs/hierarchy_lsae_1209.parquet"
1717gbd_hierarchy_path <- " /mnt/team/rapidresponse/pub/population-model/admin-inputs/raking/gbd-inputs/hierarchy_gbd_2023.parquet"
1818
1919
2020source(" /mnt/share/homes/bcreiner/repos/idd-forecast-mbp/src/idd_forecast_mbp/modeling/helper_functions.r" )
2121# Read in a parquet file
2222# Read in a parquet file
23-
24- malaria_mortality_threshold <- 1e-5
25-
2623malaria_df <- arrow :: read_parquet(file_path )
27- lsae_hierarchy_df <- arrow :: read_parquet(lsae_hierarchy_path )
28- gbd_hierarchy_df <- arrow :: read_parquet(gbd_hierarchy_path )
29-
30- malaria_df $ malaria_pfpr <- malaria_df $ malaria_pfpr / malaria_df $ population
31- malaria_df $ malaria_pf_mort_rate <- malaria_df $ malaria_pf_mort_rate / malaria_df $ population
32- malaria_df $ malaria_pf_inc_rate <- malaria_df $ malaria_pf_inc_rate / malaria_df $ population
33-
34-
35-
36- A0_rows <- which(malaria_df $ location_id == malaria_df $ A0_location_id )
37-
38- malaria_A0_df <- malaria_df [A0_rows ,]
39- malaria_A0_df <- malaria_A0_df [which(malaria_A0_df $ malaria_pf_mort_rate > malaria_mortality_threshold ),]
40- keep_A0 <- unique(malaria_A0_df $ location_id )
41-
42- malaria_df <- malaria_df [which(malaria_df $ A0_location_id %in% keep_A0 ),]
43-
44- malaria_df <- malaria_df [which(malaria_df $ level == 3 ),]
4524
4625malaria_df $ A0_af <- as.factor(as.character(paste(" A0" , malaria_df $ A0_location_id , sep = " _" )))
4726
48-
49- toss <- which(is.na(malaria_df $ malaria_pfpr ))
50- malaria_df <- malaria_df [- toss ,]
51-
52- malaria_df $ malaria_pfpr <- 0.99 * malaria_df $ malaria_pfpr
53-
54- malaria_df $ log_gdppc_mean <- log(malaria_df $ gdppc_mean )
55- malaria_df $ log_ldipc_mean <- log(malaria_df $ ldipc_mean )
56-
57- malaria_df $ mal_DAH_23_per_capita <- malaria_df $ mal_DAH_23 / malaria_df $ population
58- malaria_df $ log_mal_DAH_23_per_capita <- log(malaria_df $ mal_DAH_23_per_capita + 1e-5 )
59- malaria_df $ log_mal_DAH_23 <- log(malaria_df $ mal_DAH_23 + 1e-5 )
60-
61-
62- malaria_df $ elevation <- malaria_df $ elevation / malaria_df $ population
63-
64- malaria_df <- center_by_location_df(malaria_df , " log_mal_DAH_23_per_capita" )
65- malaria_df <- center_by_location_df(malaria_df , " log_mal_DAH_23" )
66- malaria_df <- center_by_location_df(malaria_df , " malaria_suitability" )
67- malaria_df <- center_by_location_df(malaria_df , " log_gdppc_mean" )
68- malaria_df <- center_by_location_df(malaria_df , " log_ldipc_mean" )
69- malaria_df <- center_by_location_df(malaria_df , " log_ldipc_mean_A0_centered" )
70- malaria_df <- center_by_location_df(malaria_df , " elevation" )
71- malaria_df <- center_by_location_df(malaria_df , " total_precipitation" )
72- malaria_df <- center_by_location_df(malaria_df , " people_flood_days_per_capita" )
73- malaria_df <- center_by_location_df(malaria_df , " relative_humidity" )
74- malaria_df <- center_by_location_df(malaria_df , " precipitation_days" )
75-
76- malaria_pfpr_df <- malaria_df [which(malaria_df $ malaria_pfpr > 0 ),]
77-
78- malaria_pfpr_df $ logit_malaria_pfpr <- log(malaria_pfpr_df $ malaria_pfpr / (1 - malaria_pfpr_df $ malaria_pfpr ))
79-
8027# ##
8128#
8229# Without A0_af, with A0_af, centered with A0_af
8330#
8431# ##
8532
86- mod1 <- lm(logit_malaria_pfpr ~ malaria_suitability_A0_centered * log_ldipc_mean_A0_centered + log_mal_DAH_23_per_capita_A0_centered + total_precipitation_A0_centered +
87- people_flood_days_per_capita_A0_centered + A0_af , data = malaria_pfpr_df )
33+ mod1 <- lm(logit_malaria_pfpr ~ malaria_suitability_A0_centered * log_ldipc_mean_A0_centered + log_mal_DAH_total_per_capita + total_precipitation_A0_centered +
34+ people_flood_days_per_capita_A0_centered + A0_af , data = malaria_df )
8835
89- out <- summary(mod1 )[[4 ]]
90- cbind(out [which(rownames(out ) %nlike % " A0_af" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0_af" ), 4 ], 3 ))
91- summary(mod1 )$ r.squared
9236
9337
9438
95- # Run the three models
96- results <- compare_three_models(
97- df = malaria_pfpr_df ,
98- y_var = " logit_malaria_pfpr" ,
99- covariate_names = Covariate_options [[10 ]],
100- interaction_pairs = list (Covariate_options [[10 ]][1 : 2 ])
101- )
102-
103- results $ summary_table [,c(" Covariate" , " Model2_Centered_Coef" )]
104-
105- # # Create the grid plot
106- # plot <- plot_models_grid_base(
107- # model_results = results,
108- # interaction_pair = c("log_ldipc_mean_A0_centered", "malaria_suitability"),
109- # df = malaria_pfpr_df,
110- # y_var = "logit_malaria_pfpr",
111- # gbd_hierarchy_path = gbd_hierarchy_path
112- # )
113-
114- # Create the grid plot
115- plot <- plot_models_grid_base(
116- model_results = results ,
117- interaction_pair = Covariate_options [[4 ]][1 : 2 ],
118- df = malaria_pfpr_df ,
119- y_var = " logit_malaria_pfpr" ,
120- transform_type = " invlogit" ,
121- pretty_labels = list (
122- original = " Logit pfpr" ,
123- transformed = " pfpr"
124- ),
125- color_ranges = list (
126- original = c(- 20 , 5 ),
127- transformed = c(0 , 0.1 )
128- ),
129- gbd_hierarchy_path = gbd_hierarchy_path
130- )
131-
13239
40+ out <- summary(mod1 )[[4 ]]
41+ cbind(out [which(rownames(out ) %nlike % " A0_af" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0_af" ), 4 ], 3 ))
42+ summary(mod1 )$ r.squared
13343
134- #
13544
13645
13746Covariate_options <- list (
138- c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
139- c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
140- c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
141- c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
142- c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
143- c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_23_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
144- c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_23_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
145- c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_23_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
146- c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_23_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
147- c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_23_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
148- c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_23_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
149- c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_23_per_capita " , " precipitation_days" , " people_flood_days_per_capita" )
47+ c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
48+ c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
49+ c(" malaria_suitability" ," log_gdppc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
50+ c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
51+ c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
52+ c(" malaria_suitability" ," log_ldipc_mean" ," elevation" ," log_mal_DAH_total_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
53+ c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_total_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
54+ c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_total_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
55+ c(" malaria_suitability" ," log_gdppc_mean" ," log_mal_DAH_total_per_capita " , " precipitation_days" , " people_flood_days_per_capita" ),
56+ c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_total_per_capita " , " total_precipitation" , " people_flood_days_per_capita" ),
57+ c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_total_per_capita " , " relative_humidity" , " people_flood_days_per_capita" ),
58+ c(" malaria_suitability" ," log_ldipc_mean" ," log_mal_DAH_total_per_capita " , " precipitation_days" , " people_flood_days_per_capita" )
15059)
15160
15261All_Results <- vector(" list" , length = length(Covariate_options ))
15362for (c_num in seq_along(Covariate_options )){
15463 message(" Running model " , c_num , " of " , length(Covariate_options ))
15564 results <- compare_three_models(
156- df = malaria_pfpr_df ,
65+ df = malaria_df ,
15766 y_var = " logit_malaria_pfpr" ,
15867 covariate_names = Covariate_options [[c_num ]],
15968 interaction_pairs = list (Covariate_options [[c_num ]][1 : 2 ])
@@ -176,7 +85,7 @@ Expectation_table <- data.frame(variable = c("malaria_suitability",
17685 " log_gdppc_mean" ,
17786 " log_ldipc_mean" ,
17887 " elevation" ,
179- " log_mal_DAH_23_per_capita " ,
88+ " log_mal_DAH_total_per_capita " ,
18089 " total_precipitation" ,
18190 " relative_humidity" ,
18291 " precipitation_days" ,
@@ -219,6 +128,13 @@ parse_model <- function(results_table){
219128 tmp_direction <- summary_df $ value_3 / abs(summary_df $ value_3 )
220129 summary_df $ issues_3 <- ifelse(tmp_direction * summary_df $ expectation < 0 , " ISSUE" , " " )
221130 return (summary_df )
131+ # # if (any(summary_df$issues_3 == "ISSUE")){
132+ # #
133+ # # } else {
134+ # return(summary_df[,c("covariate", "value_3", "p_value_3", "issues_3")])
135+ # # }
136+
137+
222138}
223139
224140for (c_num in seq_along(All_Results )){
@@ -236,7 +152,7 @@ get_pchs <- function(vec){
236152UCov <- setdiff(Expectation_table $ variable , " R-squared" )
237153require(RColorBrewer )
238154require(beeswarm , lib = " ~/packages" )
239- COLS <- brewer.pal(6 , " Set1 " )
155+ COLS <- brewer.pal(length( All_Results ) , " Paired " )
240156# COLS <- rep(1,12)
241157par(mfrow = c(3 ,4 ))
242158for (c_num in seq_along(UCov )){
@@ -271,7 +187,7 @@ for (c_num in seq_along(UCov)){
271187 }
272188 YLIM <- c(min(tmp_df $ Model1_Coef , tmp_df $ Model2_Coef , tmp_df $ Model2_Centered_Coef ), max(tmp_df $ Model1_Coef , tmp_df $ Model2_Coef , tmp_df $ Model2_Centered_Coef ))
273189
274- # Pad YLIM a little
190+
275191 if (YLIM [1 ] < 0 ){
276192 YLIM [1 ] <- YLIM [1 ] - 0.05 * diff(YLIM )
277193 } else {
@@ -283,6 +199,14 @@ for (c_num in seq_along(UCov)){
283199 YLIM [2 ] <- YLIM [2 ] + 0.05 * diff(YLIM )
284200 }
285201
202+ if (diff(sign(YLIM )) == 0 ){
203+ if (sign(YLIM [1 ]) == - 1 ){
204+ YLIM [2 ] <- abs(YLIM [1 ])* 0.1
205+ } else {
206+ YLIM [1 ] <- - abs(YLIM [2 ])* 0.1
207+ }
208+ }
209+
286210 plot(1 , type = " n" , xlim = c(0.5 , 3.5 ),xaxs = " i" , yaxs = " i" , ylim = YLIM , ann = FALSE , axes = FALSE )
287211 box()
288212 axis(2 , pretty(YLIM ))
@@ -342,175 +266,7 @@ Centered", line = 1.1, tick = FALSE)
342266
343267
344268
345- mod <- lm(logit_malaria_pfpr ~ log_mal_DAH_23_per_capita + total_precipitation +
346- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
347-
348- out <- summary(mod )[[4 ]]
349- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
350- summary(mod )$ r.squared
351-
352-
353-
354- mod <- lm(logit_malaria_pfpr ~ log_mal_DAH_23_per_capita + total_precipitation +
355- log_gdppc_mean + malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
356-
357-
358- out <- summary(mod )[[4 ]]
359- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
360- summary(mod )$ r.squared
361-
362-
363-
364- mod <- lm(logit_malaria_pfpr ~ log_mal_DAH_23_per_capita + relative_humidity +
365- log_ldipc_mean + malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
366-
367-
368- out <- summary(mod )[[4 ]]
369- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
370- summary(mod )$ r.squared
371-
372-
373- require(scam )
374-
375-
376- mod <- scam(logit_malaria_pfpr ~ s(log_mal_DAH_23_per_capita , k = 4 , bs = " mpi" ) + relative_humidity +
377- log_ldipc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
378- summary(mod )
379- plot(mod )
380-
381-
382-
383-
384-
385-
386-
387-
388- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation +
389- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
390-
391- out <- summary(mod )[[4 ]]
392- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
393- summary(mod )$ r.squared
394-
395- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + precipitation_days +
396- log_gdppc_mean + malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
397-
398- out <- summary(mod )[[4 ]]
399- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
400- summary(mod )$ r.squared
401-
402- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation +
403- log_gdppc_mean + malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
404-
405- out <- summary(mod )[[4 ]]
406- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
407- summary(mod )$ r.squared
408-
409-
410- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + precipitation_days +
411- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_df )
412-
413- out <- summary(mod )[[4 ]]
414- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
415- summary(mod )$ r.squared
416-
417- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation +
418- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_df )
419-
420- out <- summary(mod )[[4 ]]
421- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
422- summary(mod )$ r.squared
423-
424- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + precipitation_days +
425- log_gdppc_mean + malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_df )
426-
427- out <- summary(mod )[[4 ]]
428- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
429- summary(mod )$ r.squared
430-
431- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation +
432- log_gdppc_mean + malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_df )
433-
434- out <- summary(mod )[[4 ]]
435- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
436- summary(mod )$ r.squared
437-
438-
439-
440-
441-
442-
443-
444-
445-
446-
447-
448-
449-
450-
451-
452- mod <- gam(logit_malaria_pfpr ~ elevation + s(mal_DAH_23 , k = 4 ) + precipitation_days +
453- log_ldipc_mean + malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
454- summary(mod )
455- plot(mod )
456-
457-
458-
459-
460-
461-
462-
463-
464- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + malaria_itn +
465- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_int_df )
466- out <- summary(mod )[[4 ]]
467- cbind(out [which(rownames(out ) %nlike % " A0" ), 1 : 3 ], round(out [which(rownames(out ) %nlike % " A0" ), 4 ], 3 ))
468-
469-
470-
471- mod <- gam(logit_malaria_pfpr ~ elevation + s(mal_DAH_23 , k = 4 ) +
472- log_ldipc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_int_df )
473- summary(mod )
474- plot(mod )
475-
476-
477-
478-
479- mod <- gam(logit_malaria_pfpr ~ s(malaria_suitability ), data = malaria_pfpr_df )
480-
481- malaria_pfpr_int_df <- malaria_pfpr_df [which(malaria_pfpr_df $ malaria_itn > 0 | malaria_pfpr_df $ malaria_irs ),]
482-
483- summary(mod )
484-
485- mod <- lm(logit_malaria_pfpr ~ elevation + malaria_itn + malaria_irs + mal_DAH_23 +
486- log_gdppc_mean * malaria_suitability + relative_humidity + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
487-
488- summary(mod )
489-
490- mod <- lm(logit_malaria_pfpr ~ elevation + malaria_irs + mal_DAH_23 + total_precipitation +
491- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_int_df )
492- summary(mod )
493-
494-
495-
496-
497- mod <- gam(logit_malaria_pfpr ~ elevation + s(mal_DAH_23 , k = 4 ) + total_precipitation +
498- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita , data = malaria_pfpr_int_df )
499- summary(mod )
500-
501-
502- mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation
503- log_gdppc_mean * malaria_suitability + people_flood_days_per_capita + A0_af , data = malaria_pfpr_df )
504- plot(mod )
505-
506- # Summarize the model except for the A0 location
507- # mod <- lm(logit_malaria_pfpr ~ elevation + mal_DAH_23 + total_precipitation
508- # log_gdppc_mean * malaria_suitability + people_flood_days_per_capita, data = malaria_pfpr_df)
509- # summary(mod)
510-
511269
512- summary(mod )
513270
514- cor(malaria_pfpr_df $ total_precipitation , malaria_pfpr_df $ people_flood_days_per_capita )
515271
516272
0 commit comments