1- # Functions to import and save data for predictions and observations
2- # Examples:
3- # forecasts <- get_forecasts()
4- # obs <- get_observed()
5- # forecasts <- left_join(forecasts, obs,
6- # by = c("location", "target_end_date"))
7- # anomalies <- get_anomalies()
8- # forecasts <- anti_join(forecasts, anomalies,
9- # by = c("target_end_date", "location"))
10-
111library(here )
122library(dplyr )
133library(readr )
@@ -16,57 +6,10 @@ library(arrow)
166library(tidyr )
177library(ggplot2 )
188library(stringr )
19- theme_set(theme_minimal())
20-
21- # Prediction data ------------------------------------------------------
22- get_forecasts <- function (data_type = " death" ) {
23- forecasts <- arrow :: read_parquet(here(" data" ,
24- " covid19-forecast-hub-europe.parquet" )) | >
25- filter(grepl(data_type , target ))
26-
27- forecasts <- forecasts | >
28- separate(target , into = c(" horizon" , " target_variable" ),
29- sep = " wk ahead " ) | >
30- # set forecast date to corresponding submission date
31- mutate(
32- horizon = as.numeric(horizon ),
33- forecast_date = target_end_date - weeks(horizon ) + days(1 )) | >
34- rename(prediction = value ) | >
35- select(location , forecast_date ,
36- horizon , target_end_date ,
37- model , quantile , prediction )
38-
39- # Exclusions
40- # dates should be between start of hub and until end of JHU data
41- forecasts <- forecasts | >
42- filter(forecast_date > = as.Date(" 2021-03-07" ) &
43- target_end_date < = as.Date(" 2023-03-10" ))
44- # only keep forecasts up to 4 weeks ahead
45- forecasts <- filter(forecasts , horizon < = 4 )
46-
47- # only include predictions from models with all quantiles
48- rm_quantiles <- forecasts | >
49- group_by(model , forecast_date , location ) | >
50- summarise(q = length(unique(quantile ))) | >
51- filter(q < 23 )
52- forecasts <- anti_join(forecasts , rm_quantiles ,
53- by = c(" model" , " forecast_date" , " location" ))
54- forecasts <- filter(forecasts , ! is.na(quantile )) # remove "median"
55-
56- # remove duplicates
57- forecasts <- forecasts | >
58- group_by_all() | >
59- mutate(duplicate = row_number()) | >
60- ungroup() | >
61- filter(duplicate == 1 ) | >
62- select(- duplicate )
63-
64- return (forecasts )
65- }
9+ library(purrr )
6610
6711# Observed data ---------------------------------------------------------
68- # Get raw values
69- get_observed <- function (data_type = " death" ) {
12+ walk(c(" case" , " death" ), \(data_type ) {
7013 file_name <- paste0(" truth_JHU-Incident%20" , str_to_title(data_type ), " s.csv" )
7114 obs <- read_csv(paste0(
7215 " https://raw.githubusercontent.com/covid19-forecast-hub-europe/" ,
@@ -98,67 +41,13 @@ get_observed <- function(data_type = "death") {
9841 " Stable" )))))
9942 obs <- obs | >
10043 select(location , target_end_date , observed , trend )
101- return (obs )
102- }
103-
104- # Observed data ---------------------------------------------------------
105- # Get raw values
106- get_pop <- function () {
107- pop <- read_csv(paste0(
108- " https://raw.githubusercontent.com/european-modelling-hubs/" ,
109- " covid19-forecast-hub-europe/main/data-locations/locations_eu.csv"
110- ), show_col_types = FALSE ) | >
111- select(location , population )
112- return (pop )
113- }
114-
115- # Plot observed data and trend classification
116- plot_observed <- function () {
117- obs <- import_observed()
118- obs | >
119- ggplot(aes(x = target_end_date , y = log(observed ))) +
120- geom_point(col = trend ) +
121- geom_line(alpha = 0.3 ) +
122- scale_x_date() +
123- labs(x = NULL , y = " Log observed" , col = " Trend" ,
124- caption = " Trend (coloured points) of weekly change in 3-week moving average" ) +
125- theme(legend.position = " bottom" , ) +
126- facet_wrap(facets = " location" , ncol = 1 ,
127- strip.position = " left" )
128-
129- ggsave(filename = here(" output/fig-trends.pdf" ),
130- height = 50 , width = 15 , limitsize = FALSE )
131- }
132-
133- # Anomalies
134- get_anomalies <- function () {
135- read_csv(" https://raw.githubusercontent.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/5a2a8d48e018888f652e981c95de0bf05a838135/data-truth/anomalies/anomalies.csv" ) | >
136- filter(target_variable == " inc death" ) | >
137- select(- target_variable ) | >
138- mutate(anomaly = TRUE )
44+ write_csv(obs , here(" data" , paste0(" observed-" , data_type , " .csv" )))
13945}
14046
141- # Plot anomalies
142- plot_anomalies <- function () {
143- obs <- get_observed()
144- anomalies <- get_anomalies()
145-
146- obs <- left_join(obs , anomalies ) | >
147- group_by(location ) | >
148- mutate(anomaly = replace_na(anomaly , FALSE ))
149-
150- obs | >
151- ggplot(aes(x = target_end_date ,
152- y = log(observed ),
153- col = anomaly )) +
154- geom_line() +
155- geom_point(size = 0.3 ) +
156- scale_x_date() +
157- labs(x = NULL , y = " Log observed" ) +
158- theme(legend.position = " bottom" , ) +
159- facet_wrap(facets = " location" , ncol = 1 ,
160- strip.position = " left" )
161-
162- ggsave(filename = here(" output/fig-anomalies.pdf" ),
163- height = 50 , width = 15 , limitsize = FALSE )
164- }
47+ # Population data ---------------------------------------------------------
48+ pop <- read_csv(paste0(
49+ " https://raw.githubusercontent.com/european-modelling-hubs/" ,
50+ " covid19-forecast-hub-europe/main/data-locations/locations_eu.csv"
51+ ), show_col_types = FALSE ) | >
52+ select(location , population )
53+ write_csv(pop , here(" data" , paste0(" populations.csv" )))
0 commit comments