diff --git a/chapters/chapter1.md b/chapters/chapter1.md index 936f622..156fb8c 100644 --- a/chapters/chapter1.md +++ b/chapters/chapter1.md @@ -57,7 +57,7 @@ The first time you run a code exercise, it may take a little while for your Dock **Wherever you see `___` in a code exercise, replace it with the correct code as instructed. Run the code (via the button) to see if it will run, and submit it (via the other button) to check if it's correct.** -The `tidyverse` metapackage is loaded for you, so you can use readr and ggplot2. +The tidyverse metapackage is loaded for you, so you can use ggplot2 and other functions for exploratory data analysis. The arrow package is also loaded, so you can read our data stored as a [Parquet](https://parquet.apache.org/) file. - Take a look at the `cars2018` object using `glimpse()`. - Use the appropriate column from the data set in the call to `aes()` so you can plot a histogram of fuel efficiency (miles per gallon, `mpg`). diff --git a/data/c1_car_vars.parquet b/data/c1_car_vars.parquet new file mode 100644 index 0000000..05f507a Binary files /dev/null and b/data/c1_car_vars.parquet differ diff --git a/data/c1_car_vars.rds b/data/c1_car_vars.rds deleted file mode 100644 index 052b098..0000000 Binary files a/data/c1_car_vars.rds and /dev/null differ diff --git a/data/c1_fit_lm.rds b/data/c1_fit_lm.rds index e743c02..e489a70 100644 Binary files a/data/c1_fit_lm.rds and b/data/c1_fit_lm.rds differ diff --git a/data/c1_fit_rf.rds b/data/c1_fit_rf.rds index 186df0f..43c6e38 100644 Binary files a/data/c1_fit_rf.rds and b/data/c1_fit_rf.rds differ diff --git a/data/c1_lm_res.rds b/data/c1_lm_res.rds index 4674e5f..025425a 100644 Binary files a/data/c1_lm_res.rds and b/data/c1_lm_res.rds differ diff --git a/data/c1_rf_res.rds b/data/c1_rf_res.rds index 21fdc59..e0ff900 100644 Binary files a/data/c1_rf_res.rds and b/data/c1_rf_res.rds differ diff --git a/data/c1_test.parquet b/data/c1_test.parquet new file mode 100644 index 0000000..c31ee00 Binary files /dev/null and b/data/c1_test.parquet differ diff --git a/data/c1_test.rds b/data/c1_test.rds deleted file mode 100644 index 5313f5f..0000000 Binary files a/data/c1_test.rds and /dev/null differ diff --git a/data/c1_train.parquet b/data/c1_train.parquet new file mode 100644 index 0000000..5425548 Binary files /dev/null and b/data/c1_train.parquet differ diff --git a/data/c1_train.rds b/data/c1_train.rds deleted file mode 100644 index 5e886e4..0000000 Binary files a/data/c1_train.rds and /dev/null differ diff --git a/data/c1_train_10_percent.parquet b/data/c1_train_10_percent.parquet new file mode 100644 index 0000000..0040478 Binary files /dev/null and b/data/c1_train_10_percent.parquet differ diff --git a/data/c1_train_10_percent.rds b/data/c1_train_10_percent.rds deleted file mode 100644 index 626d2ed..0000000 Binary files a/data/c1_train_10_percent.rds and /dev/null differ diff --git a/exercises/exc_01_03.R b/exercises/exc_01_03.R index 2bc942e..628a4b6 100644 --- a/exercises/exc_01_03.R +++ b/exercises/exc_01_03.R @@ -1,5 +1,6 @@ library(tidyverse) -cars2018 <- read_csv("data/cars2018.csv") +library(arrow) +cars2018 <- read_parquet("data/cars2018.parquet") # Print the cars2018 object glimpse(___) diff --git a/exercises/exc_01_04.R b/exercises/exc_01_04.R index 23a64ed..a5469e3 100644 --- a/exercises/exc_01_04.R +++ b/exercises/exc_01_04.R @@ -1,5 +1,6 @@ library(tidyverse) -cars2018 <- read_csv("data/cars2018.csv") +library(arrow) +cars2018 <- read_parquet("data/cars2018.parquet") # Deselect the 2 columns to create cars_vars car_vars <- cars2018 %>% diff --git a/exercises/exc_01_06.R b/exercises/exc_01_06.R index 233d538..0dcad81 100644 --- a/exercises/exc_01_06.R +++ b/exercises/exc_01_06.R @@ -1,4 +1,5 @@ -car_vars <- readRDS("data/c1_car_vars.rds") +library(arrow) +car_vars <- read_parquet("data/c1_car_vars.parquet") # Load tidymodels ___ diff --git a/exercises/exc_01_07_1.R b/exercises/exc_01_07_1.R index 5517a62..296de33 100644 --- a/exercises/exc_01_07_1.R +++ b/exercises/exc_01_07_1.R @@ -1,5 +1,6 @@ -car_train <- readRDS("data/c1_train.rds") -car_test <- readRDS("data/c1_test.rds") +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +car_test <- read_parquet("data/c1_test.parquet") # Load tidymodels ___ diff --git a/exercises/exc_01_07_2.R b/exercises/exc_01_07_2.R index f3c5903..88ba759 100644 --- a/exercises/exc_01_07_2.R +++ b/exercises/exc_01_07_2.R @@ -1,7 +1,7 @@ library(tidymodels) - -car_train <- readRDS("data/c1_train.rds") -car_test <- readRDS("data/c1_test.rds") +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +car_test <- read_parquet("data/c1_test.parquet") # Build a random forest model specification rf_mod <- ___ %>% diff --git a/exercises/exc_01_08.R b/exercises/exc_01_08.R index c6d966b..ecb1619 100644 --- a/exercises/exc_01_08.R +++ b/exercises/exc_01_08.R @@ -1,6 +1,8 @@ -car_train <- readRDS("data/c1_train.rds") -fit_lm <- readRDS("data/c1_fit_lm.rds") -fit_rf <- readRDS("data/c1_fit_rf.rds") +library(tidyverse) +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +fit_lm <- read_rds("data/c1_fit_lm.rds") +fit_rf <- read_rds("data/c1_fit_rf.rds") # Load tidymodels library(___) diff --git a/exercises/exc_01_09.R b/exercises/exc_01_09.R index f705d50..cbdc116 100644 --- a/exercises/exc_01_09.R +++ b/exercises/exc_01_09.R @@ -1,8 +1,8 @@ library(tidymodels) - -car_test <- readRDS("data/c1_test.rds") -fit_lm <- readRDS("data/c1_fit_lm.rds") -fit_rf <- readRDS("data/c1_fit_rf.rds") +library(tidyverse) +car_test <- read_parquet("data/c1_test.parquet") +fit_lm <- read_rds("data/c1_fit_lm.rds") +fit_rf <- read_rds("data/c1_fit_rf.rds") # Create the new columns results <- ___ %>% diff --git a/exercises/exc_01_11.R b/exercises/exc_01_11.R index 3cfcdcf..351c591 100644 --- a/exercises/exc_01_11.R +++ b/exercises/exc_01_11.R @@ -1,6 +1,6 @@ library(tidymodels) - -car_train <- readRDS("data/c1_train_10_percent.rds") +library(arrow) +car_train <- read_parquet("data/c1_train_10_percent.parquet") lm_mod <- linear_reg() %>% set_engine("lm") diff --git a/exercises/exc_01_12_1.R b/exercises/exc_01_12_1.R index fc4e120..ce7015b 100644 --- a/exercises/exc_01_12_1.R +++ b/exercises/exc_01_12_1.R @@ -1,7 +1,8 @@ +library(tidyverse) library(tidymodels) -lm_res <- readRDS("data/c1_lm_res.rds") -rf_res <- readRDS("data/c1_rf_res.rds") +lm_res <- read_rds("data/c1_lm_res.rds") +rf_res <- read_rds("data/c1_rf_res.rds") results <- bind_rows(___ %>% collect_predictions() %>% diff --git a/exercises/exc_01_12_2.R b/exercises/exc_01_12_2.R index 391ce4c..4945bdc 100644 --- a/exercises/exc_01_12_2.R +++ b/exercises/exc_01_12_2.R @@ -1,7 +1,8 @@ +library(tidyverse) library(tidymodels) -lm_res <- readRDS("data/c1_lm_res.rds") -rf_res <- readRDS("data/c1_rf_res.rds") +lm_res <- read_rds("data/c1_lm_res.rds") +rf_res <- read_rds("data/c1_rf_res.rds") results <- bind_rows(lm_res %>% collect_predictions() %>% diff --git a/exercises/solution_01_03.R b/exercises/solution_01_03.R index eaf3da6..25ca521 100644 --- a/exercises/solution_01_03.R +++ b/exercises/solution_01_03.R @@ -1,5 +1,6 @@ library(tidyverse) -cars2018 <- read_csv("data/cars2018.csv") +library(arrow) +cars2018 <- read_parquet("data/cars2018.parquet") # Print the cars2018 object glimpse(cars2018) diff --git a/exercises/solution_01_04.R b/exercises/solution_01_04.R index 5b66023..bb5ee1e 100644 --- a/exercises/solution_01_04.R +++ b/exercises/solution_01_04.R @@ -1,5 +1,6 @@ library(tidyverse) -cars2018 <- read_csv("data/cars2018.csv") +library(arrow) +cars2018 <- read_parquet("data/cars2018.parquet") # Deselect the 2 columns to create cars_vars car_vars <- cars2018 %>% diff --git a/exercises/solution_01_06.R b/exercises/solution_01_06.R index b04020b..1bdb87b 100644 --- a/exercises/solution_01_06.R +++ b/exercises/solution_01_06.R @@ -1,4 +1,5 @@ -car_vars <- readRDS("data/c1_car_vars.rds") +library(arrow) +car_vars <- read_parquet("data/c1_car_vars.parquet") # Load tidymodels library(tidymodels) diff --git a/exercises/solution_01_07_1.R b/exercises/solution_01_07_1.R index fdd39ec..92302c3 100644 --- a/exercises/solution_01_07_1.R +++ b/exercises/solution_01_07_1.R @@ -1,5 +1,6 @@ -car_train <- readRDS("data/c1_train.rds") -car_test <- readRDS("data/c1_test.rds") +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +car_test <- read_parquet("data/c1_test.parquet") # Load tidymodels library(tidymodels) diff --git a/exercises/solution_01_07_2.R b/exercises/solution_01_07_2.R index 7d4063e..c59ba55 100644 --- a/exercises/solution_01_07_2.R +++ b/exercises/solution_01_07_2.R @@ -1,7 +1,7 @@ library(tidymodels) - -car_train <- readRDS("data/c1_train.rds") -car_test <- readRDS("data/c1_test.rds") +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +car_test <- read_parquet("data/c1_test.parquet") # Build a random forest model specification rf_mod <- rand_forest() %>% diff --git a/exercises/solution_01_08.R b/exercises/solution_01_08.R index d92a646..075de79 100644 --- a/exercises/solution_01_08.R +++ b/exercises/solution_01_08.R @@ -1,6 +1,8 @@ -car_train <- readRDS("data/c1_train.rds") -fit_lm <- readRDS("data/c1_fit_lm.rds") -fit_rf <- readRDS("data/c1_fit_rf.rds") +library(tidyverse) +library(arrow) +car_train <- read_parquet("data/c1_train.parquet") +fit_lm <- read_rds("data/c1_fit_lm.rds") +fit_rf <- read_rds("data/c1_fit_rf.rds") # Load tidymodels library(tidymodels) diff --git a/exercises/solution_01_09.R b/exercises/solution_01_09.R index e1628b7..b6303ad 100644 --- a/exercises/solution_01_09.R +++ b/exercises/solution_01_09.R @@ -1,8 +1,8 @@ library(tidymodels) - -car_test <- readRDS("data/c1_test.rds") -fit_lm <- readRDS("data/c1_fit_lm.rds") -fit_rf <- readRDS("data/c1_fit_rf.rds") +library(tidyverse) +car_test <- read_parquet("data/c1_test.parquet") +fit_lm <- read_rds("data/c1_fit_lm.rds") +fit_rf <- read_rds("data/c1_fit_rf.rds") # Create the new columns results <- car_test %>% diff --git a/exercises/solution_01_11.R b/exercises/solution_01_11.R index 9a40edf..9dc2028 100644 --- a/exercises/solution_01_11.R +++ b/exercises/solution_01_11.R @@ -1,6 +1,6 @@ library(tidymodels) - -car_train <- readRDS("data/c1_train_10_percent.rds") +library(arrow) +car_train <- read_parquet("data/c1_train_10_percent.parquet") lm_mod <- linear_reg() %>% set_engine("lm") diff --git a/exercises/solution_01_12_1.R b/exercises/solution_01_12_1.R index 2a43cf6..3e3ca71 100644 --- a/exercises/solution_01_12_1.R +++ b/exercises/solution_01_12_1.R @@ -1,7 +1,8 @@ +library(tidyverse) library(tidymodels) -lm_res <- readRDS("data/c1_lm_res.rds") -rf_res <- readRDS("data/c1_rf_res.rds") +lm_res <- read_rds("data/c1_lm_res.rds") +rf_res <- read_rds("data/c1_rf_res.rds") results <- bind_rows(lm_res %>% collect_predictions() %>% diff --git a/exercises/solution_01_12_2.R b/exercises/solution_01_12_2.R index 391ce4c..4945bdc 100644 --- a/exercises/solution_01_12_2.R +++ b/exercises/solution_01_12_2.R @@ -1,7 +1,8 @@ +library(tidyverse) library(tidymodels) -lm_res <- readRDS("data/c1_lm_res.rds") -rf_res <- readRDS("data/c1_rf_res.rds") +lm_res <- read_rds("data/c1_lm_res.rds") +rf_res <- read_rds("data/c1_rf_res.rds") results <- bind_rows(lm_res %>% collect_predictions() %>%