forked from khakieconomics/covid_data
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_munging.R
More file actions
80 lines (68 loc) · 4.15 KB
/
data_munging.R
File metadata and controls
80 lines (68 loc) · 4.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(tidyverse); library(tidycensus); library(stringr)
all_fips <- tidycensus::fips_codes %>%
mutate(locality = tolower(county)) %>%
as_tibble()
# We have a codebook for these -- will ask Delaney to add others
NPIs_to_keep <- c("SDO", "SD", paste0("GS_", c(10, 100, 1000, 25, 250, 50, 500)), "CPV", "CPV_50", "PC", "NESC", "LD")
intervention_data <- read_csv("npis_raw_03-24-2020.csv") %>%
mutate(date_start = case_when(grepl("[0-9]{1,2}-[A-Za-z]{3}-[0-9]{2}", data_start) ~ as.Date(data_start, format = "%d-%b-%y"),
grepl("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", data_start) ~ as.Date(data_start, format = "%m/%d/%Y"),
grepl("[0-9]{1,2}-[A-Za-z]{3}", data_start) ~ as.Date(paste0(data_start, "-2020"), format = "%d-%b-%Y"),
TRUE ~ as.Date("1901-01-01")),
date_end_date = case_when(grepl("[0-9]{1,2}-[A-Za-z]{3}-[0-9]{2}", date_end) ~ as.Date(date_end, format = "%d-%b-%y"),
grepl("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", date_end) ~ as.Date(date_end, format = "%m/%d/%Y"),
grepl("[0-9]{1,2}-[A-Za-z]{3}", date_end) ~ as.Date(paste0(date_end, "-2020"), format = "%d-%b-%Y"),
TRUE ~ as.Date(NA_character_))
) %>%
filter(!date_start == as.Date("1901-01-01")) %>%
select(locality, type_of_intervention, state, date_start, date_end_date) %>%
rename(locality_original = locality) %>%
filter(!type_of_intervention %in% c("CI") & !duplicated(.)) %>%
arrange(state, locality_original, date_start) %>%
group_by(state, locality_original, type_of_intervention) %>%
mutate(intervention = ifelse((1:n())>1, paste0(type_of_intervention,"_", 1:n()), type_of_intervention)) %>%
ungroup %>%
filter(type_of_intervention %in% NPIs_to_keep)
intervention_starts <- intervention_data %>% select(-date_end_date, -type_of_intervention) %>%
spread(intervention, date_start)
county_data <- read_csv("us-counties.csv") %>%
mutate(county_code = substr(fips, 3, 5)) %>%
rename(state_name = state) %>%
left_join(all_fips %>% select(state, state_name) %>% group_by(state) %>% summarise(state_name = first(state_name)) %>% ungroup) %>%
mutate(county_code = ifelse(county == "New York City", "-1", county_code))
# Counties_in_nyt_data <- county_data %>%
# group_by(fips) %>%
# select(county) %>%
# summarize(county = first(county)) %>%
# ungroup %>%
# mutate(locality = tolower(county))
localities <- intervention_starts %>%
mutate(locality = locality_original) %>%
group_by(locality) %>%
summarize(state = first(state)) %>%
ungroup %>%
mutate(locality_original = locality,
locality = gsub("_", " ", locality)) %>%
mutate(locality = case_when(locality=="bexar county, san antonio" ~ "bexar county",
locality=="busan" ~ NA_character_,
locality=="miami dade county" ~ "miami-dade county",
locality == "london" ~ NA_character_,
locality == "las vegas county" & state == "NV"~ "clark county",
TRUE ~ locality)) %>%
left_join(all_fips %>% select(locality, county_code, state), by = c("locality","state") ) %>%
filter(state %in% all_fips$state) %>%
filter(locality != "nan") %>%
mutate(county_code = case_when(locality == "new york city" ~ "-1",
locality == "washington dc" ~ "001",
TRUE ~ county_code))
joined_data <- county_data %>% left_join(localities, by = c("county_code", "state")) %>%
filter(!is.na(locality)) %>%
left_join(intervention_starts, by = c("locality_original", "state"))
panel_attempt <- joined_data %>%
gather(intervention, date_of_intervention, -date:-locality_original) %>%
arrange(state, locality_original, intervention, date) %>%
mutate(intervention_on = as.numeric(date >= date_of_intervention),
intervention_on = if_else(is.na(intervention_on), 0, intervention_on)) %>%
select(-date_of_intervention) %>%
spread(intervention, intervention_on)
write_csv(panel_attempt, "interventions_and_deaths.csv")