-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_perverse_calibration_plot.R
More file actions
67 lines (57 loc) · 2.66 KB
/
Copy pathmake_perverse_calibration_plot.R
File metadata and controls
67 lines (57 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
library(ggplot2)
library(dplyr)
library(gridExtra)
# This script produces the plot which illustrates that calibration is an insufficient criterion to
# guarantee fair risk scores. Checked.
setwd("~/cost-of-fairness/")
theme_set(
theme_bw(base_size = 15) +
theme(panel.grid.major = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(legend.background = element_rect(fill = 'transparent'))
)
test_calibration = function(df, col){
# Given a dataframe with risk scores in column "col" and outcome in column "outcome",
# returns a dataframe showing whether the risk scores are approximately calibrated in each bin.
df$p_hat = plyr::round_any(df[,col], accuracy = .03)
calibration_df = df %>%
group_by(p_hat) %>%
summarise(true_p = mean(outcome), n = n()) %>%
filter(n > 5000)
return(calibration_df)
}
# Simulate data by drawing from a beta distribution.
lambda = 10
phi = .2
n = 1e6
risk_scores = rbeta(n = n, shape1 = lambda * phi, shape2 = lambda * (1 - phi))
noisy_risk_scores = risk_scores + rnorm(n = n, mean = 0, sd = .5)
outcome = runif(n = n) < risk_scores
df = data.frame(risk_scores, noisy_risk_scores, outcome)
# recalibrate noisy risk scores
model = glm(outcome ~ noisy_risk_scores, data = df, family = binomial)
df$fitted_noisy_risk_scores = predict(model, df, type = 'response')
density_plot = ggplot(df) +
geom_density(aes(x = risk_scores), color = 'black') +
geom_density(aes(x = fitted_noisy_risk_scores), color = 'red') +
geom_vline(xintercept = .3, linetype = 'dashed') + # draw line for threshold.
xlab("Risk score") + ylab("") +
scale_y_continuous(expand = c(0, 0), lim = c(0, 30), breaks = NULL) +
scale_x_continuous(labels = scales::percent, expand = c(0, 0), lim = c(0, 1.05))
original_calibration = test_calibration(df, 'risk_scores') %>% mutate(noisy = 'not.noisy')
noisy_calibration = test_calibration(df, 'fitted_noisy_risk_scores') %>% mutate(noisy = 'noisy')
df = rbind(original_calibration, noisy_calibration)
calibration_plot = ggplot(df) +
geom_point(aes(x = p_hat, y = true_p, size = n, group = noisy, color = noisy), shape = 1) +
xlab("Risk score") +
ylab("True probability") +
scale_x_continuous(labels = scales::percent, limits = c(0, .5)) +
scale_y_continuous(labels = scales::percent, limits = c(0, .5)) +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
theme(legend.position="none") +
scale_color_manual(values = c('red', 'black')) +
scale_size_area(max_size = 15)
print(calibration_plot)
pdf("figures/calibration_counterexample.pdf", width = 10, height = 5)
grid.arrange(density_plot, calibration_plot, ncol = 2, nrow = 1)
dev.off()