Hopeful fixes for Rmd vignette travis bugs, part deux

rudeboybert · rudeboybert · commit 964f257bad65 · 2019-07-30T16:57:55.000-04:00
diff --git a/vignettes/comics_gender.Rmd b/vignettes/comics_gender.Rmd
@@ -18,13 +18,58 @@ This vignette is based on [538 study : Comic Books Are Still Made By Men, For Me
 library(fivethirtyeight)
 library(ggplot2)
 library(dplyr)
+library(readr)
+library(tidyr)
+library(lubridate)
+library(janitor)
 library(knitr)
 library(grid)
 library(fmsb)
 library(wordcloud)
 library(gridExtra)
 ```
 
+
+#### Overview plots 
+
+Load full dataset using code in `?comic_characters` help file. Note we need to do this since `fivethirtyeight::comic_characters` only contains a
+preview of the first 10 rows of the full dataset.
+
+```{r, warning = FALSE, message = FALSE}
+# Get DC characters:
+comic_characters_dc <- 
+  "https://github.com/fivethirtyeight/data/raw/master/comic-characters/dc-wikia-data.csv" %>% 
+  read_csv() %>% 
+  clean_names() %>% 
+  mutate(publisher = "DC")
+
+# Get Marvel characters:
+comic_characters_marvel <- 
+  "https://github.com/fivethirtyeight/data/raw/master/comic-characters/marvel-wikia-data.csv" %>% 
+  read_csv() %>% 
+  clean_names() %>% 
+  mutate(publisher = "Marvel")
+
+# Merge two dataset and perform further data wrangling:
+comic_characters <-
+  comic_characters_dc %>% 
+  bind_rows(comic_characters_marvel) %>% 
+  separate(first_appearance, c("year2", "month"), ", ", remove = FALSE) %>%
+  mutate(
+    # If month was missing, set as January and day as 01:
+    month = ifelse(is.na(month), "01", month),
+    day = "01",
+    # Note some years missing:
+    date = ymd(paste(year, month, day, sep = "-")),
+    align = factor(
+      align, 
+      levels = c("Bad Characters", "Reformed Criminals", "Netural Characters", "Good Characters"),
+      ordered = TRUE)
+  ) %>%
+  select(publisher, everything(), -c(year2, day))
+```
+
+
 #### Overview plots 
 
 * percentage of Gender per publisher.
diff --git a/vignettes/trump_twitter.Rmd b/vignettes/trump_twitter.Rmd
@@ -145,9 +145,9 @@ plotSentByTime <- function(trump_tweet_times, timeGroupVar) {
   timeVarLabel <- str_to_title(timeVar)
   
   trump_tweet_time_sent <- trump_tweet_times %>% 
-    rename_(timeGroup = timeVar) %>% 
+    rename(timeGroup = !! timeVar) %>% 
     group_by(timeGroup) %>% 
-    summarise(score = mean(score, na.rm=TRUE),Count = n()) %>% 
+    summarise(score = mean(score, na.rm=TRUE), Count = n()) %>% 
     ungroup()
 
   ggplot(trump_tweet_time_sent, aes(x=timeGroup, y=Count, fill = score)) +
@@ -159,22 +159,22 @@ plotSentByTime <- function(trump_tweet_times, timeGroupVar) {
 
 
 ```{r plot_hour, fig.width=7, warning=FALSE}
-plotSentByTime(trump_tweet_times, hour)
+plotSentByTime(trump_tweet_times, "hour")
 ```
 
 * Trump tweets the least between 4 and 10 am. 
 * Trump's tweets are most positive during the 10am hour. 
 
 
 ```{r plot_weekday, fig.width=7, warning=FALSE}
-plotSentByTime(trump_tweet_times, weekday)
+plotSentByTime(trump_tweet_times, "weekday")
 ```
 
 * Trump tweeted the most on Tuesday and Wednesday 
 * Trump was most positive in the second part of the work week (Wed, Thurs, Fri)
 
 ```{r plot_month, fig.width=7, warning=FALSE}
-plotSentByTime(trump_tweet_times, month_over_time)
+plotSentByTime(trump_tweet_times, "month_over_time")
 ```
 
 * In this dataset, the number of tweets decreased after November 2015 and drastically dropped off after March 2016.  It is unclear if this is a result of actual decrease in tweeting frequency or a result of the data collection process.