-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpairwise.py
More file actions
109 lines (88 loc) · 3.57 KB
/
pairwise.py
File metadata and controls
109 lines (88 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
This module provides functions to perform pairwise tests on a pandas DataFrame.
"""
import itertools
import pandas as pd
from scipy.stats import chi2_contingency, fisher_exact
def _bonferroni_correct(p_value, n_tests):
"""Return the Bonferroni-corrected p-value capped at 1.0."""
return min(p_value * n_tests, 1.0)
def chisquare(input, col1, col2):
"""
Given a a pandas dataframe (input) and the name of 2 columns (col1, col2), computes chi2 test for each pairwise combination of col1 and returns p value
col1 must be the fixed variable (es. Group)
Arguments:
input: a pandas DataFrame
col1: name of the column with the fixed variable (es. Group)
col2: name of the column with the dependent variable (es. Sex)
Prints:
chi2 square p value for each pairwise combinations of col1
Returns:
pandas DataFrame with columns 'Variable1', 'Variable2', 'p-value',
and 'bonferroni p-value'
"""
series1 = input[str(col1)]
series2 = input[str(col2)]
contingency = pd.crosstab(series2, series1)
x = list(itertools.combinations(
range(0, series1.nunique()), 2))
print("Pairwise chi2 test comparison for combinations of: ",
str(col1), " and variable: ", str(col2))
results = []
n_tests = len(x)
for i in x:
a, b = i
c, p, dof, expected = chi2_contingency(contingency.iloc[:, [a, b]])
bonferroni_p = _bonferroni_correct(p, n_tests)
print("chi2 test between: ", contingency.columns[a],
" and ", contingency.columns[b])
print("pvalue: ", p)
print("Bonferroni corrected pvalue: ", bonferroni_p)
results.append(
[contingency.columns[a], contingency.columns[b], p, bonferroni_p]
)
print("\n")
return pd.DataFrame(
results,
columns=['Variable1', 'Variable2', 'p-value', 'bonferroni p-value']
)
def fisher(input, col1, col2):
"""
Given a a pandas dataframe (input) and the name of 2 columns (col1, col2), computes Fisher's exact test for each pairwise combination of col1 and returns p value
col1 must be the fixed variable (es. Group)
Arguments:
input: a pandas DataFrame
col1: name of the column with the fixed variable (es. Group)
col2: name of the column with the dependent variable (es. Sex)
Prints:
Fisher's exact test p value for each pairwise combinations of col1
Returns:
pandas DataFrame with columns 'Variable1', 'Variable2', 'p-value',
and 'bonferroni p-value'
"""
series1 = input[str(col1)]
series2 = input[str(col2)]
contingency = pd.crosstab(series2, series1)
x = list(itertools.combinations(
range(0, series1.nunique()), 2))
print("Pairwise Fisher's exact test comparison for combinations of: ",
str(col1), " and variable: ", str(col2))
results = []
n_tests = len(x)
for i in x:
a, b = i
table = contingency.iloc[:, [a, b]].values
_, p = fisher_exact(table)
bonferroni_p = _bonferroni_correct(p, n_tests)
print("Fisher's exact test between: ", contingency.columns[a],
" and ", contingency.columns[b])
print("pvalue: ", p)
print("Bonferroni corrected pvalue: ", bonferroni_p)
results.append(
[contingency.columns[a], contingency.columns[b], p, bonferroni_p]
)
print("\n")
return pd.DataFrame(
results,
columns=['Variable1', 'Variable2', 'p-value', 'bonferroni p-value']
)