Skip to content

Commit d01af40

Browse files
authored
Merge pull request #36 from josesho/v0.2.3
v0.2.3
2 parents cf35623 + a4693f5 commit d01af40

File tree

7 files changed

+61
-61
lines changed

7 files changed

+61
-61
lines changed

dabest/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@
2323
from ._stats_tools import effsize as effsize
2424
from ._classes import TwoGroupsEffectSize
2525

26-
__version__ = "0.2.2"
26+
__version__ = "0.2.3"

dabest/_classes.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
124124
value_vars=all_plot_groups,
125125
value_name=self.__yvar,
126126
var_name=self.__xvar)
127+
128+
# Lines 131 to 140 added in v0.2.3.
129+
# Fixes a bug that jammed up when the xvar column was already
130+
# a pandas Categorical. Now we check for this and act appropriately.
131+
if isinstance(plot_data[self.__xvar].dtype,
132+
pd.CategoricalDtype) is True:
133+
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
134+
plot_data[self.__xvar].cat.reorder_categories(all_plot_groups,
135+
ordered=True,
136+
inplace=True)
137+
else:
138+
plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
139+
categories=all_plot_groups,
140+
ordered=True)
127141

128-
plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
129-
categories=all_plot_groups,
130-
ordered=True)
131142

132143
self.__plot_data = plot_data
133-
144+
134145
self.__all_plot_groups = all_plot_groups
135146

147+
136148
# Sanity check that all idxs are paired, if so desired.
137149
if paired is True:
138150
if id_col is None:

dabest/tests/test_02_plotting.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,13 @@ def test_cummings_unpaired():
7979

8080
rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1),
8181
np.random.uniform(base_mean, base_mean+10, 1))
82-
rand_contrast_ylim = (-base_mean/3, base_mean/3)
82+
83+
if base_mean == 0:
84+
# Have to set the contrast ylim, because the way I dynamically generate
85+
# the contrast ylims will flunk out with base_mean = 0.
86+
rand_contrast_ylim = (-0.5, 0.5)
87+
else:
88+
rand_contrast_ylim = (-base_mean/3, base_mean/3)
8389

8490
f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim,
8591
contrast_ylim=rand_contrast_ylim,
@@ -89,18 +95,12 @@ def test_cummings_unpaired():
8995
rawswarm_axes = f1.axes[0]
9096
contrast_axes = f1.axes[1]
9197

92-
# Check ylims match the desired ones.
98+
# Check swarm ylims match the desired ones.
9399
assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0])
94100
assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1])
95-
96-
# This needs to be rounded, because if the base mean is 0,
97-
# the ylim might be -0.001, which will not match 0.
98-
if base_mean == 0:
99-
ylim_low = np.round(contrast_axes.get_ylim()[0])
100-
else:
101-
ylim_low = contrast_axes.get_ylim()[0]
102-
assert ylim_low == pytest.approx(rand_contrast_ylim[0])
103-
101+
102+
# Check contrast ylims match the desired ones.
103+
assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0])
104104
assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1])
105105

106106
# Check xtick labels.

dabest/tests/test_03_confint.py

+18-21
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,20 @@
1212

1313

1414

15-
def test_unpaired_ci(reps=50, ci=95):
16-
n = 10
17-
N = 10000
18-
19-
20-
21-
# Create data for hedges g and cohens d
15+
def test_unpaired_ci(reps=40, ci=95):
16+
17+
POPULATION_N = 10000
18+
SAMPLE_N = 10
19+
20+
# Create data for hedges g and cohens d.
2221
CONTROL_MEAN = np.random.randint(1, 1000)
2322
POP_SD = np.random.randint(1, 15)
2423
POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)
2524

2625
TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)
27-
norm_rvs_kwargs = dict(scale=POP_SD, size=n)
28-
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
29-
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_rvs_kwargs)
26+
norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)
27+
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
28+
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)
3029

3130
std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})
3231

@@ -36,10 +35,9 @@ def test_unpaired_ci(reps=50, ci=95):
3635
CONTROL_MEAN = np.random.randint(1, 1000)
3736
POP_SD = np.random.randint(1, 15)
3837
TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)
39-
40-
norm_rvs_kwargs = dict(scale=POP_SD, size=n)
41-
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
42-
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_rvs_kwargs)
38+
39+
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
40+
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)
4341

4442
mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})
4543

@@ -49,11 +47,11 @@ def test_unpaired_ci(reps=50, ci=95):
4947
MEDIAN_DIFFERENCE = np.random.randint(-5, 5)
5048
A = np.random.randint(-7, 7)
5149

52-
skew_kwargs = dict(a=A, scale=5, size=N)
50+
skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)
5351
skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)
5452
skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)
5553

56-
sample_kwargs = dict(size=n, replace=False)
54+
sample_kwargs = dict(replace=False, size=SAMPLE_N)
5755
skewsample1 = np.random.choice(skewpop1, **sample_kwargs)
5856
skewsample2 = np.random.choice(skewpop2, **sample_kwargs)
5957

@@ -65,13 +63,11 @@ def test_unpaired_ci(reps=50, ci=95):
6563
CD_DIFFERENCE = np.random.randint(1, 10)
6664
SD = np.abs(CD_DIFFERENCE)
6765

68-
N = 10000
69-
pop_kwargs = dict(scale=SD, size=N)
66+
pop_kwargs = dict(scale=SD, size=POPULATION_N)
7067
pop1 = norm.rvs(loc=100, **pop_kwargs)
7168
pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)
7269

73-
n = 20
74-
sample_kwargs = dict(size=n, replace=False)
70+
sample_kwargs = dict(replace=False, size=SAMPLE_N)
7571
sample1 = np.random.choice(pop1, **sample_kwargs)
7672
sample2 = np.random.choice(pop2, **sample_kwargs)
7773

@@ -129,7 +125,8 @@ def test_unpaired_ci(reps=50, ci=95):
129125
error_count_cliffs_delta += 1
130126

131127

132-
max_errors = reps * (100 - ci) / 100
128+
max_errors = int(np.ceil(reps * (100 - ci) / 100))
129+
133130
assert error_count_cohens_d <= max_errors
134131
assert error_count_hedges_g <= max_errors
135132
assert error_count_mean_diff <= max_errors

docs/source/index.rst

+8-23
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ DABEST
99
-----------------------------------------------
1010
Data Analysis with Bootstrap-coupled ESTimation
1111
-----------------------------------------------
12-
*version 0.2.2*
12+
*version 0.2.3*
1313

1414
Analyze your data with estimation statistics!
1515
---------------------------------------------
@@ -19,31 +19,16 @@ Analyze your data with estimation statistics!
1919

2020
News
2121
----
22-
April 2019:
23-
- v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. See the :doc:`release-notes`.
22+
May 2019:
23+
- v0.2.3 released. This is a fix for a bug that did not properly handle x-columns which were pandas Categorical objects. See the :doc:`release-notes`.
2424

25-
March 2019:
26-
- v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`.
25+
April 2019:
26+
- v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero.
2727

28-
- Release of v0.2.0. This is a major update that makes several breaking changes to the API.
28+
March 2019:
29+
- v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting.
30+
- v0.2.0 released. This is a major update that makes several breaking changes to the API.
2931

30-
January 2019:
31-
- Release of v0.1.7. Added `cumming_vertical_spacing` option.
32-
33-
October 2018:
34-
- Release of v0.1.6. Added more keywords for control of plot elements.
35-
36-
July 2018:
37-
- Release of v0.1.5. *bugfix for setup and package management*
38-
- Release of v0.1.4.
39-
40-
June 2018:
41-
- Release of v0.1.3. Also added a short tutorial for dabest in R.
42-
43-
December 2017:
44-
- We have made a `webapp <https://www.estimationstats.com>`_ that produces Gardner-Altman and Cumming plots!
45-
46-
4732
Contents
4833
--------
4934

docs/source/release-notes.rst

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
Release Notes
55
=============
66

7+
v0.2.2
8+
------
9+
10+
This release fixes a bug that did not handle when the supplied ``x`` was a :py:mod:`pandas` :py:class:`Categorical` object, but the ``idx`` did not include all the original categories.
11+
12+
713
v0.2.2
814
------
915

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def check_dependencies():
8989
author_email='[email protected]',
9090
maintainer='Joses W. Ho',
9191
maintainer_email='[email protected]',
92-
version='0.2.2',
92+
version='0.2.3',
9393
description=DESCRIPTION,
9494
long_description=LONG_DESCRIPTION,
9595
packages=find_packages(),

0 commit comments

Comments
 (0)