From 788ebf18939c5f628216e40931e7beafb5689bd0 Mon Sep 17 00:00:00 2001
From: jebyrnes <jarrett.byrnes@umb.edu>
Date: Wed, 13 May 2020 21:54:16 -0400
Subject: [PATCH 1/2] Create covid19R compliant JHU coronavirus dataset

---
 data_raw/data_covid19R.R  |  53 ++++++++++++++++++++
 data_raw/get_code_table.R | 100 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 data_raw/data_covid19R.R
 create mode 100644 data_raw/get_code_table.R

diff --git a/data_raw/data_covid19R.R b/data_raw/data_covid19R.R
new file mode 100644
index 0000000..f3d46d8
--- /dev/null
+++ b/data_raw/data_covid19R.R
@@ -0,0 +1,53 @@
+#----------------------------------------------------
+# Creating a covid19R compliant JHU coronavirus data set
+# using coronavirus data
+# https://github.com/CSSEGISandData/COVID-19
+
+`%>%` <- magrittr::`%>%`
+setwd(here::here())
+source("data-raw/dplyr::left_join")
+
+
+# the initial data
+# git_df <- read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus.csv",
+#                    stringsAsFactors = FALSE)
+
+# create valid locations
+git_df_long_location <- git_df %>%
+  dplyr::mutate(country = ifelse(country=="Korea, South", "South Korea", country),
+                province = ifelse(province=="Bonaire, Sint Eustatius and Saba",
+                                  "Bonaire and Sint Eustatius and Saba", province)
+                ) %>%
+  tidyr::unite(location, province, country, sep = ", ") %>%
+  dplyr::rename(data_type = type,
+                value = cases) %>%
+
+  # fix some bad location names
+  dplyr::mutate(
+    location = gsub("^\\, ", "", location),
+    location_type = ifelse(grepl("\\,", location), "state", "country"))
+
+code_table <- get_code_table()
+
+# add codes
+coronavirus_covid19 <- dplyr::left_join(git_df_long_location, code_table)
+
+# fix data types
+coronavirus_covid19 <- coronavirus_covid19 %>%
+  dplyr::mutate(data_type = dplyr::case_when(
+    data_type == "confirmed" ~ "cases_new",
+    data_type == "recovered" ~ "recovered_new",
+    data_type == "death" ~ "deaths_new",
+
+  ))
+
+
+# data checks
+sum(is.na(coronavirus_covid19$location_code)) #make sure codes combine a-ok - will be >0 due to cruise ships
+nrow(coronavirus_covid19)- nrow(git_df_long_location) #should be 0, or there was a one to many match
+
+# write out
+#write.csv(coronavirus, "csv/coronavirus_covid19format.csv", row.names = FALSE)
+print("covid19R compliant data done...")
+
+
diff --git a/data_raw/get_code_table.R b/data_raw/get_code_table.R
new file mode 100644
index 0000000..bb71ca6
--- /dev/null
+++ b/data_raw/get_code_table.R
@@ -0,0 +1,100 @@
+get_code_table <- function() {
+  # get iso 3166 2 codes
+  iso_codes <-
+    read.csv(
+      "https://github.com/olahol/iso-3166-2.js/raw/master/data.csv",
+      col.names = c(
+        "Country",
+        "iso_3166_2",
+        "name",
+        "type",
+        "Country_iso_3166_2"
+      ),
+      na.strings = "."
+    ) %>%
+    dplyr::mutate(
+      name = ifelse(
+        iso_3166_2 == "NL-BQ1",
+        "Bonaire and Sint Eustatius and Saba",
+        name
+      ),
+      Country = ifelse(iso_3166_2 == "VG-VG", "British Virgin Islands", Country),
+      Country = ifelse(
+        Country_iso_3166_2 == "CD",
+        "The Democratic Republic Of The Congo",
+        Country
+      ),
+      Country = ifelse(Country_iso_3166_2 == "CZ", "Czechia", Country),
+    )
+
+  country_code <- iso_codes %>%
+    dplyr::mutate(location = Country) %>%
+    dplyr::group_by(location) %>%
+    dplyr::summarize(location_code = Country_iso_3166_2[1]) %>%
+    dplyr::bind_rows(
+      data.frame(location = "Cabo Verde", location_code = "CV"),
+      data.frame(location = "Greenland, Denmark", location_code = "GL"),
+      data.frame(location = "Cote d'Ivoire", location_code = "CI")
+    ) #do not know why these were missing
+
+  province_code <- iso_codes %>%
+    dplyr::mutate(type = ifelse(iso_3166_2 == "NL-AW", "Province", type)) %>% # problem with Aruba
+    dplyr::filter(type != "Country") %>%
+    dplyr::mutate(location = paste(name, Country, sep = ", ")) %>%
+    dplyr::group_by(location) %>%
+    dplyr::summarize(location_code = iso_3166_2[1]) %>%
+    dplyr::bind_rows(
+      data.frame(location = "Channel Islands, United Kingdom", location_code = "GB-CHA"),
+      data.frame(location = "Tibet, China", location_code = "CN-XZ"),
+      data.frame(location = "Inner Mongolia, China", location_code = "CN-NM")
+    )
+
+  code_table <- dplyr::bind_rows(country_code, province_code) %>%
+    dplyr::mutate(
+      location_code_type = "iso_3166_2",
+      location = dplyr::case_when(
+        location == "Cayman Islands" ~ "Cayman Islands, United Kingdom",
+        location == "Anguilla" ~ "Anguilla, United Kingdom",
+        location == "Kinshasa, The Democratic Republic Of The Congo" ~ "Congo (Kinshasa)",
+        location == "Brazzaville, Congo" ~ "Congo (Brazzaville)",
+        location == "Brunei Darussalam" ~ "Brunei",
+        location == "Myanmar" ~ "Burma",
+        location ==  "Falkland Islands" ~ "Falkland Islands (Malvinas), United Kingdom",
+        location == "Swaziland" ~ "Eswatini",
+        location == "Bermuda" ~ "Bermuda, United Kingdom",
+        location == "Curaçao" ~ "Curacao, Netherlands",
+        location == "French Polynesia" ~ "French Polynesia, France",
+        location == "British Virgin Islands" ~ "British Virgin Islands, United Kingdom",
+        location == "Faroe Islands" ~ "Faroe Islands, Denmark",
+        location == "French Guiana" ~ "French Guiana, France",
+        location == "French Guiana" ~ "French Guiana, France",
+        location == "Gibraltar" ~ "Gibraltar, United Kingdom",
+        location == "Vatican City" ~ "Holy See",
+        location == "Vatican City" ~ "Holy See",
+        location == "Isle of Man" ~ "Isle of Man, United Kingdom",
+        location == "Kosovo-Metohija, Serbia" ~ "Kosovo",
+        location == "Macau" ~ "Macau, China",
+        location == "Montserrat" ~ "Montserrat, United Kingdom",
+        location == "New Caledonia" ~ "New Caledonia, France",
+        location == "Macedonia, the Former Yugoslav Republic Of" ~ "North Macedonia",
+        location == "Reunion" ~ "Reunion, France",
+        location == "Saint-Barthélemy, France" ~ "Saint Barthelemy, France",
+        location == "Saint Kitts And Nevis" ~ "Saint Kitts and Nevis",
+        location == "Saint-Pierre-et-Miquelon, France" ~ "Saint Pierre and Miquelon, France",
+        location == "Saint Vincent And The Grenadines" ~ "Saint Vincent and the Grenadines",
+        location == "St. Maarten" ~ "Sint Maarten, Netherlands",
+        location == "Korea, Republic of" ~ "South Korea",
+        location == "Saint-Martin, France" ~ "St Martin, France",
+        location == "Taiwan" ~ "Taiwan*",
+        location == "East Timor" ~ "Timor-Leste",
+        location == "Turks & Caicos Islands" ~ "Turks and Caicos Islands, United Kingdom",
+        location == "United States" ~ "US",
+        location == "Viet Nam" ~ "Vietnam",
+        location == "Gaza, Palestine" ~ "West Bank and Gaza",
+        TRUE ~ location
+      )
+    )
+
+
+  code_table
+}

From 858cebeeaf44d7c2dd5abd7688f21dffc46b84e8 Mon Sep 17 00:00:00 2001
From: jebyrnes <jarrett.byrnes@umb.edu>
Date: Wed, 13 May 2020 22:08:18 -0400
Subject: [PATCH 2/2] Adding covid19R format

---
 R/refresh_coronavirus.R  | 56 ++++++++++++++++++++++++++++++++++++++++
 data_raw/data_covid19R.R | 34 ++++++++++++++----------
 2 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 R/refresh_coronavirus.R

diff --git a/R/refresh_coronavirus.R b/R/refresh_coronavirus.R
new file mode 100644
index 0000000..83e5fec
--- /dev/null
+++ b/R/refresh_coronavirus.R
@@ -0,0 +1,56 @@
+#' Refresh the 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset in the Covid19R Project Format
+#'
+#' Daily summary of the Coronavirus (COVID-19) cases by state/province.
+#' @return A tibble object
+#' * date - The date in YYYY-MM-DD form
+#' * location - The name of the location as provided by the data source.
+#' * location_type - The type of location using the covid19R controlled vocabulary.
+#' * location_code - A standardized location code using a national or international standard. Drawn from \href{https://github.com/olahol/iso-3166-2.js/}{iso-3166-2.js}'s version
+#' * location_code_type The type of standardized location code being used according to the covid19R controlled vocabulary. Here we use `iso_3166_2`
+#' * data_type - the type of data in that given row using the covid19R controlled vocabulary. Includes cases_new, deaths_new, recovered_new.
+#' * value - number of cases of each data type
+#' @export refresh_coronavirus_jhu
+#' @return A data.frame object
+#' @source coronavirus - Johns Hopkins University Center for Systems Science and Engineering (JHU CCSE) Coronavirus \href{https://systems.jhu.edu/research/public-health/ncov/}{website}
+#'
+#' @examples
+#' \dontrun{
+#' # update the data
+#' jhu_covid19_dat <- refresh_coronavirus_jhu()
+#' }
+#'
+refresh_coronavirus_jhu <- function(){
+  utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus_covid19format.csv",
+                                     stringsAsFactors = FALSE)
+}
+
+
+
+#' Get information about the datasets provided by the coronavirus package
+#'
+#' @description Returns information about the datasets in this package for covid19R harvesting
+#'
+#' @return a tibble of information about the datasets in this package
+#' @export get_info_coronavirus
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # get the dataset info from this package
+#' get_info_coronavirus()
+#' }
+#'
+get_info_coronavirus <- function(){
+  data.frame(
+    data_set_name = "coronavirus_jhu",
+    package_name = "coronavirus",
+    function_to_get_data = "refresh_coronavirus_jhu*",
+    data_details = "The 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset from the Johns Hopkins University Center for Systems Science and Engineering",
+    data_url = "https://systems.jhu.edu/research/public-health/ncov/",
+    license_url = "https://github.com/CSSEGISandData/COVID-19/",
+    data_types = "cases_new, recovered_new, deaths_new",
+    location_types = "country, state",
+    spatial_extent = "global",
+    TRUE
+  )
+}
diff --git a/data_raw/data_covid19R.R b/data_raw/data_covid19R.R
index f3d46d8..70f2132 100644
--- a/data_raw/data_covid19R.R
+++ b/data_raw/data_covid19R.R
@@ -14,18 +14,23 @@ source("data-raw/dplyr::left_join")
 
 # create valid locations
 git_df_long_location <- git_df %>%
-  dplyr::mutate(country = ifelse(country=="Korea, South", "South Korea", country),
-                province = ifelse(province=="Bonaire, Sint Eustatius and Saba",
-                                  "Bonaire and Sint Eustatius and Saba", province)
-                ) %>%
+  dplyr::mutate(
+    country = ifelse(country == "Korea, South", "South Korea", country),
+    province = ifelse(province == "Bonaire, Sint Eustatius and Saba",
+      "Bonaire and Sint Eustatius and Saba", province
+    )
+  ) %>%
   tidyr::unite(location, province, country, sep = ", ") %>%
-  dplyr::rename(data_type = type,
-                value = cases) %>%
+  dplyr::rename(
+    data_type = type,
+    value = cases
+  ) %>%
 
   # fix some bad location names
   dplyr::mutate(
     location = gsub("^\\, ", "", location),
-    location_type = ifelse(grepl("\\,", location), "state", "country"))
+    location_type = ifelse(grepl("\\,", location), "state", "country")
+  )
 
 code_table <- get_code_table()
 
@@ -38,16 +43,19 @@ coronavirus_covid19 <- coronavirus_covid19 %>%
     data_type == "confirmed" ~ "cases_new",
     data_type == "recovered" ~ "recovered_new",
     data_type == "death" ~ "deaths_new",
-
   ))
 
+coronavirus_covid19 <- coronavirus_covid19 %>%
+  dplyr::select(
+    date, location, location_type,
+    location_code, location_code_type,
+    data_type, value, lat, long
+  )
 
 # data checks
-sum(is.na(coronavirus_covid19$location_code)) #make sure codes combine a-ok - will be >0 due to cruise ships
-nrow(coronavirus_covid19)- nrow(git_df_long_location) #should be 0, or there was a one to many match
+sum(is.na(coronavirus_covid19$location_code)) # make sure codes combine a-ok - will be >0 due to cruise ships
+nrow(coronavirus_covid19) - nrow(git_df_long_location) # should be 0, or there was a one to many match
 
 # write out
-#write.csv(coronavirus, "csv/coronavirus_covid19format.csv", row.names = FALSE)
+# write.csv(coronavirus, "csv/coronavirus_covid19format.csv", row.names = FALSE)
 print("covid19R compliant data done...")
-
-