Merge pull request #36 from josesho/v0.2.3

josesho · web-flow · commit d01af40b35b7 · 2019-05-07T11:33:11.000+08:00
v0.2.3
diff --git a/dabest/__init__.py b/dabest/__init__.py
@@ -23,4 +23,4 @@
 from ._stats_tools import effsize as effsize
 from ._classes import TwoGroupsEffectSize 
 
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/dabest/_classes.py b/dabest/_classes.py
@@ -124,15 +124,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
                                 value_vars=all_plot_groups,
                                 value_name=self.__yvar,
                                 var_name=self.__xvar)
+        
+        # Lines 131 to 140 added in v0.2.3.
+        # Fixes a bug that jammed up when the xvar column was already 
+        # a pandas Categorical. Now we check for this and act appropriately.
+        if isinstance(plot_data[self.__xvar].dtype, 
+                      pd.CategoricalDtype) is True:
+            plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
+            plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, 
+                                                          ordered=True, 
+                                                          inplace=True)
+        else:
+            plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
+                                               categories=all_plot_groups,
+                                               ordered=True)
 
-        plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
-                                           categories=all_plot_groups,
-                                           ordered=True)
 
         self.__plot_data = plot_data
-
+        
         self.__all_plot_groups = all_plot_groups
 
+
         # Sanity check that all idxs are paired, if so desired.
         if paired is True:
             if id_col is None:
diff --git a/dabest/tests/test_02_plotting.py b/dabest/tests/test_02_plotting.py
@@ -79,7 +79,13 @@ def test_cummings_unpaired():
 
     rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1),
                        np.random.uniform(base_mean, base_mean+10, 1))
-    rand_contrast_ylim = (-base_mean/3, base_mean/3)
+                       
+    if base_mean == 0:
+        # Have to set the contrast ylim, because the way I dynamically generate
+        # the contrast ylims will flunk out with base_mean = 0.
+        rand_contrast_ylim = (-0.5, 0.5)
+    else:
+        rand_contrast_ylim = (-base_mean/3, base_mean/3)
 
     f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim,
                                               contrast_ylim=rand_contrast_ylim,
@@ -89,18 +95,12 @@ def test_cummings_unpaired():
     rawswarm_axes = f1.axes[0]
     contrast_axes = f1.axes[1]
 
-    # Check ylims match the desired ones.
+    # Check swarm ylims match the desired ones.
     assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0])
     assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1])
-    
-    # This needs to be rounded, because if the base mean is 0,
-    # the ylim might be -0.001, which will not match 0.
-    if base_mean == 0:
-        ylim_low = np.round(contrast_axes.get_ylim()[0])
-    else:
-        ylim_low = contrast_axes.get_ylim()[0]
-    assert ylim_low == pytest.approx(rand_contrast_ylim[0])
-    
+
+    # Check contrast ylims match the desired ones.
+    assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0])
     assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1])
 
     # Check xtick labels.
diff --git a/dabest/tests/test_03_confint.py b/dabest/tests/test_03_confint.py
@@ -12,21 +12,20 @@
 
 
 
-def test_unpaired_ci(reps=50, ci=95):
-    n = 10
-    N = 10000
-
-
-
-    # Create data for hedges g and cohens d
+def test_unpaired_ci(reps=40, ci=95):
+    
+    POPULATION_N = 10000
+    SAMPLE_N = 10
+    
+    # Create data for hedges g and cohens d.
     CONTROL_MEAN = np.random.randint(1, 1000)
     POP_SD       = np.random.randint(1, 15)
     POP_D        = np.round(np.random.uniform(-2, 2, 1)[0], 2)
 
     TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)
-    norm_rvs_kwargs = dict(scale=POP_SD, size=n)
-    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
-    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_rvs_kwargs)
+    norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)
+    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
+    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)
 
     std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})
 
@@ -36,10 +35,9 @@ def test_unpaired_ci(reps=50, ci=95):
     CONTROL_MEAN = np.random.randint(1, 1000)
     POP_SD       = np.random.randint(1, 15)
     TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)
-
-    norm_rvs_kwargs = dict(scale=POP_SD, size=n)
-    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
-    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_rvs_kwargs)
+    
+    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
+    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)
 
     mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})
 
@@ -49,11 +47,11 @@ def test_unpaired_ci(reps=50, ci=95):
     MEDIAN_DIFFERENCE = np.random.randint(-5, 5)
     A = np.random.randint(-7, 7)
 
-    skew_kwargs = dict(a=A, scale=5, size=N)
+    skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)
     skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)
     skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)
 
-    sample_kwargs = dict(size=n, replace=False)
+    sample_kwargs = dict(replace=False, size=SAMPLE_N)
     skewsample1 = np.random.choice(skewpop1, **sample_kwargs)
     skewsample2 = np.random.choice(skewpop2, **sample_kwargs)
 
@@ -65,13 +63,11 @@ def test_unpaired_ci(reps=50, ci=95):
     CD_DIFFERENCE = np.random.randint(1, 10)
     SD = np.abs(CD_DIFFERENCE)
 
-    N = 10000
-    pop_kwargs = dict(scale=SD, size=N)
+    pop_kwargs = dict(scale=SD, size=POPULATION_N)
     pop1 = norm.rvs(loc=100, **pop_kwargs)
     pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)
 
-    n = 20
-    sample_kwargs = dict(size=n, replace=False)
+    sample_kwargs = dict(replace=False, size=SAMPLE_N)
     sample1 = np.random.choice(pop1, **sample_kwargs)
     sample2 = np.random.choice(pop2, **sample_kwargs)
 
@@ -129,7 +125,8 @@ def test_unpaired_ci(reps=50, ci=95):
             error_count_cliffs_delta += 1
 
 
-    max_errors = reps * (100 - ci) / 100
+    max_errors = int(np.ceil(reps * (100 - ci) / 100))
+
     assert error_count_cohens_d     <= max_errors
     assert error_count_hedges_g     <= max_errors
     assert error_count_mean_diff    <= max_errors
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -9,7 +9,7 @@ DABEST
 -----------------------------------------------
 Data Analysis with Bootstrap-coupled ESTimation
 -----------------------------------------------
-*version 0.2.2*
+*version 0.2.3*
 
 Analyze your data with estimation statistics!
 ---------------------------------------------
@@ -19,31 +19,16 @@ Analyze your data with estimation statistics!
 
 News
 ----
-April 2019:
-  - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. See the :doc:`release-notes`.
+May 2019:
+  - v0.2.3 released. This is a fix for a bug that did not properly handle x-columns which were pandas Categorical objects. See the :doc:`release-notes`.
 
-March 2019:
-  - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`.
+April 2019:
+  - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero.
   
-  - Release of v0.2.0. This is a major update that makes several breaking changes to the API. 
+March 2019:
+  - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting.
+  - v0.2.0 released. This is a major update that makes several breaking changes to the API. 
   
-January 2019:
-  - Release of v0.1.7. Added `cumming_vertical_spacing` option.
-
-October 2018:
-  - Release of v0.1.6. Added more keywords for control of plot elements.
-
-July 2018:
- - Release of v0.1.5. *bugfix for setup and package management*
- - Release of v0.1.4.
-
-June 2018:
-  - Release of v0.1.3. Also added a short tutorial for dabest in R.
-
-December 2017:
-  - We have made a `webapp <https://www.estimationstats.com>`_ that produces Gardner-Altman and Cumming plots!
-
-
 Contents
 --------
 
diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst
@@ -4,6 +4,12 @@
 Release Notes
 =============
 
+v0.2.2
+------
+
+This release fixes a bug that did not handle when the supplied ``x`` was a :py:mod:`pandas` :py:class:`Categorical` object, but the ``idx`` did not include all the original categories.
+
+
 v0.2.2
 ------
 
diff --git a/setup.py b/setup.py
@@ -89,7 +89,7 @@ def check_dependencies():
         author_email='joseshowh@gmail.com',
         maintainer='Joses W. Ho',
         maintainer_email='joseshowh@gmail.com',
-        version='0.2.2',
+        version='0.2.3',
         description=DESCRIPTION,
         long_description=LONG_DESCRIPTION,
         packages=find_packages(),