Skip to content

Commit b09e6db

Browse files
authored
remove state col from time seriesparquet (#862)
1 parent 4f5280e commit b09e6db

File tree

3 files changed

+22
-20
lines changed

3 files changed

+22
-20
lines changed

src/get_baseline.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,14 @@ make_baseline_forecast <- function(
5757
dplyr::filter(
5858
as_of == max(as_of)
5959
) |>
60+
dplyr::mutate(
61+
geo_value = forecasttools::us_location_recode(
62+
.data$location,
63+
"code",
64+
"abbr"
65+
)
66+
) |>
6067
dplyr::rename(
61-
geo_value = state,
6268
time_value = date
6369
) |>
6470
dplyr::select(-c("as_of", "location", "target")) |>

src/get_target_data.R

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ get_truth_data <- function(
2222
"US"
2323
)
2424
) |>
25-
dplyr::filter(!stringr::str_detect(.data$state, "Region")) |>
2625
dplyr::mutate(
2726
location = forecasttools::us_location_recode(.data$state, "abbr", "code"),
2827
location_name = forecasttools::us_location_recode(
@@ -78,35 +77,37 @@ get_target_data <- function(
7877
output_dirpath <- fs::path(base_hub_path, "target-data")
7978
fs::dir_create(output_dirpath)
8079

81-
raw_nhsn_data <- forecasttools::pull_nhsn(
80+
nhsn_data <- forecasttools::pull_nhsn(
8281
api_endpoint = "https://data.cdc.gov/resource/mpgq-jmmr.json",
8382
columns = c("totalconfc19newadm"),
8483
start_date = first_full_weekending_date
85-
)
86-
87-
output_file <- fs::path(output_dirpath, "time-series", ext = "parquet")
88-
hubverse_format_nhsn_data <- raw_nhsn_data |>
84+
) |>
8985
dplyr::rename(
9086
observation = "totalconfc19newadm",
91-
date = "weekendingdate",
92-
state = "jurisdiction"
87+
date = "weekendingdate"
9388
) |>
9489
dplyr::mutate(
9590
date = as.Date(.data$date),
9691
observation = as.numeric(.data$observation),
97-
state = stringr::str_replace(.data$state, "USA", "US")
92+
jurisdiction = stringr::str_replace(.data$jurisdiction, "USA", "US")
9893
) |>
99-
dplyr::filter(!stringr::str_detect(.data$state, "Region")) |>
10094
dplyr::mutate(
101-
location = forecasttools::us_location_recode(.data$state, "abbr", "code"),
95+
location = forecasttools::us_location_recode(
96+
.data$jurisdiction,
97+
"abbr",
98+
"code"
99+
),
102100
as_of = !!today,
103101
target = "wk inc covid hosp"
104102
) |>
105103
dplyr::filter(!(location %in% !!excluded_locations))
106104

107-
hubverse_format_nhsn_data |>
105+
hubverse_format_nhsn_data <- nhsn_data |> dplyr::select(-"jurisdiction")
106+
107+
nhsn_data |>
108108
dplyr::rename(
109-
value = observation
109+
value = "observation",
110+
state = "jurisdiction"
110111
) |>
111112
dplyr::select(-c("as_of", "target")) |>
112113
readr::write_csv(
@@ -131,11 +132,6 @@ get_target_data <- function(
131132
observation = as.numeric(.data$percent_visits_covid) / 100,
132133
) |>
133134
dplyr::mutate(
134-
state = forecasttools::us_location_recode(
135-
.data$geography,
136-
"name",
137-
"abbr"
138-
),
139135
location = forecasttools::us_location_recode(
140136
.data$geography,
141137
"name",
@@ -146,13 +142,13 @@ get_target_data <- function(
146142
) |>
147143
dplyr::select(
148144
"date",
149-
"state",
150145
"observation",
151146
"location",
152147
"as_of",
153148
"target"
154149
)
155150

151+
output_file <- fs::path(output_dirpath, "time-series", ext = "parquet")
156152
forecasttools::read_tabular_file(output_file) |>
157153
dplyr::bind_rows(hubverse_format_nhsn_data, hubverse_format_nssp_data) |>
158154
forecasttools::write_tabular_file(output_file)

target-data/time-series.parquet

-5.94 KB
Binary file not shown.

0 commit comments

Comments
 (0)