Skip to content

Commit cc58741

Browse files
Add helper functions for data analysis in R
This script contains helper functions for data analysis, including functions to retrieve data, calculate performance metrics, and visualize results by team.
1 parent 9950269 commit cc58741

1 file changed

Lines changed: 120 additions & 0 deletions

File tree

code/15_workshop/api/functions.R

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# functions.R
2+
# A script of helper functions to perform your analysis
3+
# Load this into your app/reporter/api of choice and use.
4+
5+
# pairs with this data: https://docs.google.com/spreadsheets/d/15yBf5tapFSq-n6668GQ_fa3LQNafJUtxxJdjPhbaT3c/edit?usp=sharing
6+
# pairs with this form: https://forms.gle/R8axC5GAMXcm7VWd6
7+
8+
# 1. SETUP ----------------------------------
9+
10+
# Load any requisite packages
11+
library(dplyr)
12+
library(readr)
13+
library(ggplot2)
14+
15+
# 2. HELPER FUNCTIONS ------------------------
16+
17+
# Let's write some helper functions to procure data and summarize that data
18+
19+
#' @name get_data
20+
#' @description A function to retrieve `n` rows from dataset
21+
#' @param n:int number of most recent orders to retrieve
22+
get_data = function(n){
23+
n = as.integer(n)
24+
25+
# Change share link to download link
26+
url = "https://docs.google.com/spreadsheets/d/15yBf5tapFSq-n6668GQ_fa3LQNafJUtxxJdjPhbaT3c/export?format=csv"
27+
28+
# Download data
29+
data = url %>%
30+
read_csv(show_col_types = FALSE) %>%
31+
tail(n) %>%
32+
select(Timestamp, order_id, task, team, person, done, elapsed)
33+
34+
return(data)
35+
}
36+
37+
38+
# Test it!
39+
# data = get_data(1000)
40+
41+
42+
#' @name get_n
43+
#' @description Function to get back tallies about orders
44+
#' @param data data.frame of order stage data
45+
get_n = function(data){
46+
47+
# Calculate some performance metrics...
48+
stat1 = data %>%
49+
summarize(
50+
n_orders = n(),
51+
n_customers = length(unique(order_id )),
52+
n_customers_served = length(unique(order_id[done == TRUE] ))
53+
)
54+
55+
return(stat1)
56+
}
57+
58+
# Try it!
59+
# get_n(data)
60+
61+
62+
#' @name get_time
63+
#' @description Function to get back average completion times
64+
#' @param data data.frame of order stage data
65+
get_time_avg = function(data){
66+
67+
stat2 = data %>%
68+
filter(done == TRUE) %>%
69+
group_by(order_id) %>%
70+
summarize(total_time = sum(elapsed)) %>%
71+
ungroup() %>%
72+
summarize(avg_time_to_completion = mean(total_time))
73+
74+
return(stat2)
75+
}
76+
77+
# Try it!
78+
# get_time_avg(data)
79+
80+
81+
#' @name get_time_threshold
82+
#' @description Function to get back n orders within thresholds
83+
#' @param data data.frame of order stage data
84+
#' @param lower:int Order must take more time than this to be counted. Defaults to 0
85+
#' @param upper:int Order must take less time than this to be counted.
86+
get_time_threshold = function(data, upper, lower = 0){
87+
88+
data %>%
89+
filter(done == TRUE) %>%
90+
group_by(order_id) %>%
91+
summarize(total_time = sum(elapsed)) %>%
92+
ungroup() %>%
93+
summarize(
94+
n_orders = sum(total_time > lower & total_time < upper),
95+
upper = upper,
96+
lower = lower
97+
)
98+
}
99+
100+
# Try it!
101+
# get_time_threshold(data, upper = 16)
102+
103+
104+
#' @name get_viz_by_team
105+
#' @description Visualize a bar chart of average time to order completion by person by team
106+
#' @param data data.frame of order stage data
107+
#' @param .team:str Filter by team (eg. "Front")
108+
get_viz_by_team = function(data, .team = "Front"){
109+
viz1 = data %>%
110+
filter(team == .team) %>%
111+
group_by(person) %>%
112+
summarize(time_avg = mean(elapsed))
113+
114+
ggplot() +
115+
geom_col(data = viz1, mapping = aes(x = person, y = time_avg)) +
116+
coord_flip()
117+
}
118+
119+
# Try it!
120+
# get_viz_by_team(data, .team = "Front")

0 commit comments

Comments
 (0)