-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparadox21.R
69 lines (54 loc) · 2.18 KB
/
paradox21.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
dayprobs = c(.0026123,.0026785,.0026838,.0026426,.0026702,.0027424,.0028655,
.0028954,.0029407,.0027705,.0026842,.0026864)
daysmo = c(31,28,31,30,31,30,31,31,30,31,30,31)
daysmo2 = c(31,28.25,31,30,31,30,31,31,30,31,30,31)
# need both: the former is how the probs are reported,
# while the latter allows leap days
moprob = daysmo * dayprobs
mob = sample(1:12,10000 * 367,rep=TRUE,prob=moprob)
dob = sapply(mob,function(x) ceiling(sample((4*daysmo2[x]),1)/4) )
# The ceiling() function isn't vectorized, so we make the equivalent
# using sapply().
mobdob = paste(mob,dob)
# convert the vector into a matrix with the maximum
# group size as the number of columns
# as noted above, this could safely be truncated, with great savings
mdmat = matrix(mobdob, ncol=367, nrow=10000)
matchn = function(x) {
for (i in 1:367){
if (length(unique(x[1:i])) != i) break
}
return(i)
}
groups = apply(mdmat, 1, matchn)
bdprobs = cumsum(table(groups)/10000)
# find the N with each group number, divide by number of groups
# and get the cumulative sum
rgroups = as.numeric(names(bdprobs))
# extract the group sizes from the table
probs = 1 - ((factorial(rgroups) * choose(365,rgroups)) / 365**rgroups)
# calculate the analytic answer, for any group size
# for which there was an observed simulated value
diffs = bdprobs - probs
diffpcts = diffs/probs
addsecondy <- function(x, y, origy, yname="Y2") {
prevlimits <- range(origy)
axislimits <- range(y)
axis(side=4, at=prevlimits[1] + diff(prevlimits)*c(0:5)/5,
labels=round(axislimits[1] + diff(axislimits)*c(0:5)/5, 3))
mtext(yname, side=4)
newy <- (y-axislimits[1])/(diff(axislimits)/diff(prevlimits)) +
prevlimits[1]
points(x, newy, pch=2)
abline(h=(-axislimits[1])/(diff(axislimits)/diff(prevlimits)) +
prevlimits[1])
}
plottwoy <- function(x, y1, y2, xname="X", y1name="Y1", y2name="Y2")
{
plot(x, y1, ylab=y1name, xlab=xname)
abline(h=0)
addsecondy(x, y2, y1, yname=y2name)
}
plottwoy(rgroups, diffs, diffpcts, xname="Number in group",
y1name="Diff in prob", y2name="Diff in percent")
legend(80, .0013, pch=1:2, legend=c("Diffs", "Pcts"))