-
Notifications
You must be signed in to change notification settings - Fork 37.1k
/
Copy pathPA1_template.Rmd
89 lines (53 loc) · 2.5 KB
/
PA1_template.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#Call the libraries
library(tidyverse)
library(mice)
#Read the downloaded data from the desktop
activity <- read.csv("C:/Users/zterjek/Desktop/activity.csv")
#Calculate the total steps taken per day, visualize it with a histogram
perdaysum <- aggregate(steps ~ date, activity, sum)
hist(perdaysum$steps, breaks = 20, col = "steelblue")
#Calculate and report the average and median of total taken steps
perdaymean <- mean(perdaysum$steps)
perdaymed <- median(perdaysum$steps)
perdaymean
perdaymed
#Calculate the average steps taken by intervals, visualize it with a line chart
perintervalmean <- aggregate(steps ~ interval, activity, mean)
plot(perintervalmean$interval, perintervalmean$steps, type = "l", col = "darkred",
lwd = 1.5)
#Calculate and report the interval when max steps were taken
maxstepsperinterval <- max(perintervalmean$steps)
maxstepsinterval <- filter(perintervalmean, steps == maxstepsperinterval)
maxstepsinterval$interval
#Calculate the missing values
md.pattern(activity)
#Impute the missing values with predictive mean matching
activity_imputed <- data.frame(original = activity$steps,
imputed_steps = complete(mice(activity, method = "pmm"))$steps)
activity_imputed$num <- 1:17568
activity$num <- 1:17568
merged_activity <- merge(activity, activity_imputed, by = "num")
merged_activity <- merged_activity[,-c(1, 2)]
merged_activity <- rename(merged_activity, original_steps = original)
md.pattern(merged_activity)
#Calculate the total steps taken per day of the imputed dataset,
#visualize it with a histogram
perdaysum_merged <- aggregate(imputed_steps ~ date, merged_activity, sum)
hist(perdaysum_merged$imputed_steps, breaks = 20, col = "darkgreen")
#Calculate and report the average and median of total taken steps in the
#imputed dataset
perdaymean_merged <- mean(perdaysum_merged$imputed_steps)
perdaymed_merged <- median(perdaysum_merged$imputed_steps)
perdaymean_merged
perdaymed_merged
#Checking the weekdays
merged_activity$date <- as.Date(merged_activity$date)
merged_activity$daytype <- as.factor(ifelse(weekdays(merged_activity$date) %in%
c("szombat", "vasárnap"), "Weekend", "Weekday"))
#Calculate the average steps taken by intervals, visualize it with a line chart
#with weekdays and weekends separated
grouped <- merged_activity %>% group_by(daytype, interval) %>%
summarize_at("imputed_steps", mean)
ggplot(grouped, aes(interval, imputed_steps))+
geom_line(aes(col = daytype))+
facet_wrap(.~ grouped$daytype, nrow = 2, ncol = 1)