RepData_PeerAssessment1_TZs Zsolt Terjek 2023-06-03 Calling the libraries
library(tidyverse)
ℹ Use the conflicted package (http://conflicted.r-lib.org/) to force all conflicts to become errors
library(mice)
Reading the data downloaded to the desktop
activity <- read.csv("C:/Users/zterjek/Desktop/activity.csv")
Calculate the total steps taken per day, visualize it with a histogram
perdaysum <- aggregate(steps ~ date, activity, sum)
hist(perdaysum$steps, breaks = 20, col = "steelblue")
Calculate and report the average and median of total taken steps
perdaymean <- mean(perdaysum$steps)
perdaymed <- median(perdaysum$steps)
perdaymean
perdaymed
Calculate the average steps taken by intervals, visualize it with a line chart
perintervalmean <- aggregate(steps ~ interval, activity, mean)
plot(perintervalmean$interval, perintervalmean$steps, type = "l", col = "darkred", lwd = 1.5)
Calculate and report the interval when max steps were taken
maxstepsperinterval <- max(perintervalmean$steps)
maxstepsinterval <- filter(perintervalmean, steps == maxstepsperinterval)
maxstepsinterval$interval
Calculate and report the interval when max steps were taken
maxstepsperinterval <- max(perintervalmean$steps)
maxstepsinterval <- filter(perintervalmean, steps == maxstepsperinterval)
maxstepsinterval$interval
Impute the missing values with predictive mean matching
md.pattern(activity)
activity_imputed <- data.frame(original = activity$steps, imputed_steps = complete(mice(activity, method = "pmm"))$steps)
activity_imputed$num <- 1:17568 activity$num <- 1:17568
merged_activity <- merge(activity, activity_imputed, by = "num") merged_activity <- merged_activity[,-c(1, 2)] merged_activity <- rename(merged_activity, original_steps = original) md.pattern(merged_activity)
Calculate the total steps taken per day of the imputed dataset, visualize it with a histogram
perdaysum_merged <- aggregate(imputed_steps ~ date, merged_activity, sum)
hist(perdaysum_merged$imputed_steps, breaks = 20, col = "darkgreen")
Calculate and report the average and median of total taken steps in the imputed dataset
perdaymean_merged <- mean(perdaysum_merged$imputed_steps)
perdaymed_merged <- median(perdaysum_merged$imputed_steps)
perdaymean_merged
perdaymed_merged
Checking the weekdays
merged_activity$date <- as.Date(merged_activity$date)
merged_activity$daytype <- as.factor(ifelse(weekdays(merged_activity$date) %in% c("szombat", "vasárnap"), "Weekend", "Weekday")) Calculate the average steps taken by intervals, visualize it with a line chart with weekdays and weekends separated
grouped <- merged_activity %>% group_by(daytype, interval) %>% summarize_at("imputed_steps", mean)
ggplot(grouped, aes(interval, imputed_steps))+ geom_line(aes(col = daytype))+ facet_wrap(.~ grouped$daytype, nrow = 2, ncol = 1)