-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaired_bootstrap.py
80 lines (55 loc) · 2.73 KB
/
paired_bootstrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# author = "lorenzoscottb"
# adapted from bootsa py library
import numpy as np
from tqdm import tqdm
def paird_bootstrap(targs, h0_preds, h1_preds, metric, num_rounds=1000, sample_size=.1,
alpha=0.05):
"""
Nonparametric two-tailed bootstrap test, comparing metric scores
Parameters
-------------
targs : list or numpy array with shape (n_datapoints,)
A list or 1D numpy array of the first sample
gold standard/dataset items
h0_preds : list or numpy array with shape (n_datapoints,)
A list or 1D numpy array of the first sample
Output of model A
h1_preds : list or numpy array with shape (n_datapoints,)
A list or 1D numpy array of the first sample
Output of model B
metric: function, metric to use as evaluation for the models
num_rounds : int (default: 1000), number of permutation samples.
set to 0 to have it estimanted by n of test itmes
sample_size : n. of itmes to subsample
alpha : alpha threshold
Returns
----------
p-value under the null hypothesis
"""
assert sample_size == 1 or (.05 <= sample_size <= .5), 'sample_size must be between .05 and .5'
# number of permutation based on items and alpha
mx_R = int(max(10000, int(len(targs) * (1 / float(alpha)))))
num_rounds = num_rounds if num_rounds > 0 else mx_R
overall_size = len(targs)
sample_size = int(len(targs) * sample_size)
targs = np.array(targs)
h0_preds = np.array(h0_preds)
h1_preds = np.array(h1_preds)
original_diff = np.abs(metric(h1_preds, targs) - metric(h0_preds, targs))
twice_diff = 0
for _ in range(num_rounds):
if sample_size < 1.: # use boostsa subsample withot replacement strategy
i_sample = np.random.choice(range(overall_size), size=sample_size, replace=False)
sample_h0_preds = h0_preds[i_sample]
sample_h1_preds = h1_preds[i_sample]
sample_targs = targs[i_sample]
else: # use full-sample with replacement strategy
i_sample = np.random.randint(0, overall_size, overall_size)
sample_h0_preds = [h0_preds[i] for i in i_sample]
sample_h1_preds = [h1_preds[i] for i in i_sample]
sample_targs = [targs[i] for i in i_sample]
sample_diff = np.abs(metric(sample_h1_preds, sample_targs) - metric(sample_h0_preds, sample_targs))
if sample_diff >= 2*original_diff:
twice_diff += 1
p_val = twice_diff / num_rounds
return p_val