Skip to content

Commit 36d3784

Browse files
[178390751]: remove the pairwise_significance legacy package
1 parent 7e8f84c commit 36d3784

File tree

5 files changed

+1
-214
lines changed

5 files changed

+1
-214
lines changed

src/cr/cube/cubepart.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from cr.cube.enums import CUBE_MEASURE as CM
2323
from cr.cube.min_base_size_mask import MinBaseSizeMask
2424
from cr.cube.matrix import Assembler
25-
from cr.cube.measures.pairwise_significance import PairwiseSignificance
25+
2626
from cr.cube.noa.smoothing import SingleSidedMovingAvgSmoother
2727
from cr.cube.scalar import MeansScalar
2828
from cr.cube.stripe.assembler import StripeAssembler
@@ -1046,12 +1046,6 @@ def sums(self):
10461046
"`.sums` is undefined for a cube-result without a sum measure"
10471047
)
10481048

1049-
@lazyproperty
1050-
def summary_pairwise_indices(self):
1051-
return PairwiseSignificance(
1052-
self, self._alpha, self._only_larger
1053-
).summary_pairwise_indices
1054-
10551049
@lazyproperty
10561050
def table_base(self):
10571051
"""Scalar or 1D/2D np.float64 ndarray of unweighted-N for table.
@@ -1184,26 +1178,6 @@ def zscores(self):
11841178

11851179
# ---implementation (helpers)-------------------------------------
11861180

1187-
@lazyproperty
1188-
def _alpha(self):
1189-
"""float confidence-interval threshold for pairwise-t (sig) tests."""
1190-
# TODO: remove me when summary_pairwise_indices, summary_p_vals and
1191-
# summary_t_stats will be moved from measures/pairwise_significance.py to
1192-
# matrix/measures.py
1193-
return self._columns_dimension.pairwise_significance_spec.alpha_values[0]
1194-
1195-
@lazyproperty
1196-
def _alpha_alt(self):
1197-
"""Alternate float confidence-interval threshold or None.
1198-
1199-
This is an optional secondary confidence interval allowing two-level
1200-
significance testing. Value is None if no alternate alpha was specified by user.
1201-
"""
1202-
# TODO: remove me when summary_pairwise_indices, summary_p_vals and
1203-
# summary_t_stats will be moved from measures/pairwise_significance.py to
1204-
# matrix/measures.py
1205-
return self._columns_dimension.pairwise_significance_spec.alpha_values[1]
1206-
12071181
@lazyproperty
12081182
def _assembler(self):
12091183
"""The Assembler object for this slice.
@@ -1241,21 +1215,6 @@ def _dimensions(self):
12411215
)
12421216
)
12431217

1244-
@lazyproperty
1245-
def _only_larger(self):
1246-
"""True if only the larger of reciprocal pairwise-t values should appear.
1247-
1248-
In general, pairwise-t tests are reciprocal. That is, if A is significant with
1249-
respect to B, then B is significant with respect to A. Having a letter in both
1250-
columns can produce a cluttered appearance. When this flag is set by the user,
1251-
only the cell in the reciprocal pair having the largest value gets a letter.
1252-
Defaults to True unless explicitly set False.
1253-
"""
1254-
# TODO: remove me when summary_pairwise_indices, summary_p_vals and
1255-
# summary_t_stats will be moved from measures/pairwise_significance.py to
1256-
# matrix/measures.py
1257-
return self._columns_dimension.pairwise_significance_spec.only_larger
1258-
12591218
@lazyproperty
12601219
def _rows_dimension(self):
12611220
return self._dimensions[0]

src/cr/cube/measures/__init__.py

Whitespace-only changes.
Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +0,0 @@
1-
# encoding: utf-8
2-
3-
"""T-score based P-values of pairwise comparison or columns of a contingency table."""
4-
5-
import numpy as np
6-
from scipy.stats import t
7-
8-
from cr.cube.util import lazyproperty
9-
10-
11-
class PairwiseSignificance(object):
12-
"""Implementation of p-vals and t-tests for each column proportions comparison."""
13-
14-
def __init__(self, slice_, alpha=0.05, only_larger=True):
15-
self._slice = slice_
16-
self._alpha = alpha
17-
self._only_larger = only_larger
18-
19-
@lazyproperty
20-
def summary_pairwise_indices(self):
21-
"""ndarray containing tuples of pairwise indices for the column summary."""
22-
summary_pairwise_indices = np.empty(
23-
self.values[0].summary_t_stats.shape[0], dtype=object
24-
)
25-
summary_pairwise_indices[:] = [
26-
sig.summary_pairwise_indices for sig in self.values
27-
]
28-
return summary_pairwise_indices
29-
30-
@lazyproperty
31-
def values(self):
32-
"""list of _ColumnPairwiseSignificance tests.
33-
34-
Result has as many elements as there are coliumns in the slice. Each
35-
significance test contains `p_vals` and `t_stats` significance tests.
36-
"""
37-
return [
38-
_ColumnPairwiseSignificance(
39-
self._slice, col_idx, self._alpha, self._only_larger
40-
)
41-
for col_idx in range(self._slice.shape[1])
42-
]
43-
44-
45-
class _ColumnPairwiseSignificance(object):
46-
"""Value object providing matrix of T-score based pairwise-comparison P-values"""
47-
48-
def __init__(self, slice_, col_idx, alpha=0.05, only_larger=True):
49-
self._slice = slice_
50-
self._col_idx = col_idx
51-
self._alpha = alpha
52-
self._only_larger = only_larger
53-
54-
@lazyproperty
55-
def summary_p_vals(self):
56-
return 2 * (1 - t.cdf(abs(self.summary_t_stats), df=self._df))
57-
58-
@lazyproperty
59-
def summary_pairwise_indices(self):
60-
significance = self.summary_p_vals < self._alpha
61-
if self._only_larger:
62-
significance = np.logical_and(self.summary_t_stats < 0, significance)
63-
return tuple(np.where(significance)[0])
64-
65-
@lazyproperty
66-
def summary_t_stats(self):
67-
col_margin_props = self._slice.columns_base / self._slice.table_margin
68-
diff = col_margin_props - col_margin_props[self._col_idx]
69-
var_props = (
70-
col_margin_props * (1.0 - col_margin_props) / self._slice.table_margin
71-
)
72-
se_diff = np.sqrt(var_props + var_props[self._col_idx])
73-
return diff / se_diff
74-
75-
@lazyproperty
76-
def _df(self):
77-
selected_unweighted_n = (
78-
self._slice.columns_base[self._col_idx]
79-
if self._slice.columns_base.ndim < 2
80-
else self._slice.columns_base[:, self._col_idx][:, None]
81-
)
82-
return self._slice.columns_base + selected_unweighted_n - 2

tests/integration/test_pairwise_significance.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -489,26 +489,6 @@ def test_cat_x_cat_pruning_and_hs_scale_means_pairwise_t_tests(self):
489489
[1.0, 0.1046981, 0.059721, 0.2918845]
490490
)
491491

492-
def test_cat_x_cat_summary_pairwise_indices(self):
493-
# Only larger
494-
slice_ = Cube(CR.PAIRWISE_HIROTSU_OCCUPATION_X_ILLNESS).partitions[0]
495-
pairwise_indices = slice_.summary_pairwise_indices
496-
expected_indices = np.array([(2,), (0, 2), ()], dtype=tuple)
497-
np.testing.assert_array_equal(pairwise_indices, expected_indices)
498-
499-
# Larger and smaller
500-
transforms = {
501-
"columns_dimension": {
502-
"pairwise_significance": {"pairwise_indices": {"only_larger": False}},
503-
},
504-
}
505-
slice_ = Cube(
506-
CR.PAIRWISE_HIROTSU_OCCUPATION_X_ILLNESS, transforms=transforms
507-
).partitions[0]
508-
pairwise_indices = slice_.summary_pairwise_indices
509-
expected_indices = np.array([(1, 2), (0, 2), (0, 1)], dtype="i,i")
510-
np.testing.assert_array_equal(pairwise_indices, expected_indices)
511-
512492
def test_cat_x_cat_wgtd_pairwise_t_tests(self):
513493
"""The weights on this cube demonstrate much higher variance (less
514494
extreme t values, and higher associated p-values) than if weighted_n

tests/unit/test_cubepart.py

Lines changed: 0 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,6 @@ def it_knows_whether_it_is_empty(self, shape, expected_value, shape_prop_):
159159

160160
assert is_empty is expected_value
161161

162-
def but_it_returns_None_when_no_secondary_alpha_specified(self, cube_):
163-
cube_.alpha_values = (0.05, None)
164-
165-
assert _Slice(cube_, None, None, None, None).pairwise_indices_alt is None
166-
167162
def it_knows_the_population_fraction(self, cube_):
168163
cube_.population_fraction = 0.5
169164
slice_ = _Slice(cube_, None, None, None, None)
@@ -198,14 +193,11 @@ def it_provides_the_scale_mean_pairwise_indices(
198193
self,
199194
_assembler_prop_,
200195
assembler_,
201-
PairwiseSignificance_,
202196
dimension_,
203197
_dimensions_prop_,
204198
):
205199
assembler_.scale_mean_pairwise_indices = ((2,), (0,), ())
206200
_assembler_prop_.return_value = assembler_
207-
PairwiseSignificance_.alpha_values = (0.05, None)
208-
dimension_.pairwise_significance_spec = PairwiseSignificance_
209201
_dimensions_prop_.return_value = (None, dimension_)
210202
slice_ = _Slice(None, None, None, None, None)
211203

@@ -225,14 +217,11 @@ def it_provides_the_secondary_scale_mean_pairwise_indices(
225217
self,
226218
_assembler_prop_,
227219
assembler_,
228-
PairwiseSignificance_,
229220
dimension_,
230221
_dimensions_prop_,
231222
):
232223
assembler_.scale_mean_pairwise_indices_alt = ((2,), (0,), ())
233224
_assembler_prop_.return_value = assembler_
234-
PairwiseSignificance_.alpha_values = (0.05, 0.01)
235-
dimension_.pairwise_significance_spec = PairwiseSignificance_
236225
_dimensions_prop_.return_value = (None, dimension_)
237226
slice_ = _Slice(None, None, None, None, None)
238227

@@ -293,61 +282,6 @@ def it_constructs_its_assembler_instance_to_help(
293282
Assembler_.assert_called_once_with(cube_, (dimension_, dimension_), slice_idx)
294283
assert assembler is assembler_
295284

296-
def it_knows_the_primary_alpha_value_to_help(
297-
self, dimension_, _dimensions_prop_, PairwiseSignificance_
298-
):
299-
"""alpha is the primary confidence-interval threshold specified by the user."""
300-
PairwiseSignificance_.alpha_values = (0.042, 0.084)
301-
dimension_.pairwise_significance_spec = PairwiseSignificance_
302-
_dimensions_prop_.return_value = (None, dimension_)
303-
slice_ = _Slice(None, None, None, None, None)
304-
305-
assert slice_._alpha == 0.042
306-
307-
@pytest.mark.parametrize(
308-
"alpha_values, expected_value", (((0.042, 0.084), 0.084), ((0.042, None), None))
309-
)
310-
def it_knows_the_secondary_alpha_value_to_help(
311-
self,
312-
dimension_,
313-
_dimensions_prop_,
314-
PairwiseSignificance_,
315-
alpha_values,
316-
expected_value,
317-
):
318-
PairwiseSignificance_.alpha_values = alpha_values
319-
dimension_.pairwise_significance_spec = PairwiseSignificance_
320-
_dimensions_prop_.return_value = (None, dimension_)
321-
slice_ = _Slice(None, None, None, None, None)
322-
323-
assert slice_._alpha_alt == expected_value
324-
325-
def but_it_returns_None_when_no_secondary_alpha_specified(
326-
self,
327-
dimension_,
328-
_dimensions_prop_,
329-
PairwiseSignificance_,
330-
):
331-
PairwiseSignificance_.alpha_values = (0.05, None)
332-
dimension_.pairwise_significance_spec = PairwiseSignificance_
333-
_dimensions_prop_.return_value = (None, dimension_)
334-
slice_ = _Slice(None, None, None, None, None)
335-
336-
assert slice_._alpha_alt is None
337-
338-
def it_knows_the_only_larger_flag_state_to_help(
339-
self,
340-
dimension_,
341-
_dimensions_prop_,
342-
PairwiseSignificance_,
343-
):
344-
PairwiseSignificance_.only_larger = False
345-
dimension_.pairwise_significance_spec = PairwiseSignificance_
346-
_dimensions_prop_.return_value = (None, dimension_)
347-
slice_ = _Slice(None, None, None, None, None)
348-
349-
assert slice_._only_larger is False
350-
351285
# fixture components ---------------------------------------------
352286

353287
@pytest.fixture
@@ -374,10 +308,6 @@ def _dimensions_prop_(self, request):
374308
def dimension_types_prop_(self, request):
375309
return property_mock(request, _Slice, "dimension_types")
376310

377-
@pytest.fixture
378-
def PairwiseSignificance_(self, request):
379-
return class_mock(request, "cr.cube.cubepart.PairwiseSignificance")
380-
381311
@pytest.fixture
382312
def shape_prop_(self, request):
383313
return property_mock(request, _Slice, "shape")

0 commit comments

Comments
 (0)