-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcases-same-start-point-log10.R
More file actions
87 lines (74 loc) · 3.07 KB
/
cases-same-start-point-log10.R
File metadata and controls
87 lines (74 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# compare slopes starting at same cases for all countries
min_start_value <- 1000
countries <- c('DE','US', 'IT', 'ES','FR','UK','KR','AT','CA','BER')
#install.packages("readxl")
#install.packages("httr")
library(readxl)
library(httr)
library(reshape2)
library(ggplot2)
library(dplyr)
library(scales)
source("load_data.R")
source("utilities.R")
ecdc_list <- EcdcData(countries)
ecdc <- ecdc_list$data
latest_data_date <- ecdc_list$latest_data_date
retrieved_date <- ecdc_list$retrieved_date
#berlin <- BerlinData("data\\daily.csv")
berlin <- BerlinData("https://raw.githubusercontent.com/jakubvalenta/covid-berlin-data/master/covid_berlin_data_incl_hospitalized.csv")
ecdc <- rbind(berlin, ecdc)
ecdc <- mutate(ecdc, date = as.Date(dateRep, '%Y-%m-%d'))
# calculate the running sum... ecdc data has new cases, cases per day
ecdc <- ecdc %>%
group_by(geoId) %>%
arrange(date) %>%
mutate(casesTot = cumsum(cases))
# find the day that had > min_start_value cases per country
start_dates <- filter(ecdc, casesTot > min_start_value) %>%
group_by(geoId) %>%
summarise(start_date = min(date))
# get that into an "index" field that can be used for graphing
ecdc <- filter(ecdc, geoId %in% start_dates$geoId) %>%
inner_join(start_dates,by = c('geoId','geoId')) %>%
group_by(geoId) %>%
mutate(casesIndexDate = as.integer(date - start_date))
y_ends <- ecdc %>%
group_by(geoId) %>%
top_n(1, casesTot) %>%
pull(casesTot)
labels <- ecdc %>%
group_by(geoId) %>%
top_n(1, casesIndexDate)
ggplot(subset(ecdc,casesIndexDate >= 0),
aes(x=casesIndexDate,
y=casesTot,
fill=geoId,
color=geoId)) +
geom_line(size = 0.1)+
# slopes are log(2) / doubling days: these are 4,3,2 days
geom_abline(intercept = log10(min_start_value), slope = (log10(2) / 2), linetype="dashed", color="gray", ) +
geom_abline(intercept = log10(min_start_value), slope = (log10(2) / 3), linetype="dashed", color="gray", ) +
geom_abline(intercept = log10(min_start_value), slope = (log10(2) / 4), linetype="dashed", color="gray") +
geom_abline(intercept = log10(min_start_value), slope = (log10(2) / 5), linetype="dashed", color="gray") +
geom_text(data=labels,
aes(label = geoId,
colour = geoId,
x = casesIndexDate,
y = casesTot),
hjust = -0.5,
vjust = 0.5) +
ggtitle("Total cases over time",
subtitle = paste("Synchronized with day '0' as when the country had",min_start_value, "cases.")) +
labs(caption=paste("Data from ECDC data set",latest_data_date,"retrieved",retrieved_date)) +
theme(legend.position = "none") +
scale_y_continuous("Total cases",
trans="log10",
label=comma,
sec.axis = sec_axis(~ .,
breaks = y_ends,
label = comma),
expand = expansion(mult = c(0, 0.1))
)+
scale_x_continuous(paste("Days since",min_start_value,"cases"),
expand = expansion(mult = c(0, .1)))