Merge pull request #236 from knaaptime/builddocs

knaaptime · web-flow · commit 3b3ee00b40e5 · 2025-02-03T17:45:04.000-08:00
use ax instead of plt
diff --git a/segregation/inference/inference_wrappers.py b/segregation/inference/inference_wrappers.py
@@ -12,9 +12,13 @@
 from tqdm.auto import tqdm
 
 from .._base import MultiGroupIndex
-from .comparative import (DUAL_SIMULATORS, _estimate_counterfac_difference,
-                          _estimate_random_label_difference,
-                          _generate_counterfactual, _prepare_random_label)
+from .comparative import (
+    DUAL_SIMULATORS,
+    _estimate_counterfac_difference,
+    _estimate_random_label_difference,
+    _generate_counterfactual,
+    _prepare_random_label,
+)
 from .randomization import SIMULATORS, simulate_null
 
 
@@ -45,12 +49,12 @@ def _infer_segregation(
 
         * ``bootstrap``:
         generates bootstrap replications of the units with replacement of the same size of the
-        original data. This procedure creates a confidence interval for the index statistic to test 
+        original data. This procedure creates a confidence interval for the index statistic to test
         whether the null value lies within.
 
         * ``evenness``:
         assumes that each spatial unit has the same global probability of drawing elements from the
-        minority group of the fixed total unit population (binomial distribution). 
+        minority group of the fixed total unit population (binomial distribution).
 
         * ``person_permutation``:
         randomly allocates individuals into units keeping the total population of each
@@ -224,7 +228,6 @@ def __init__(
         n_jobs=-1,
         **kwargs,
     ):
-
         aux = _infer_segregation(
             seg_class,
             iterations_under_null,
@@ -433,7 +436,6 @@ def _compare_segregation(
         "share",
         "dual_composition",
     ]:
-
         if isinstance(seg_class_1, MultiGroupIndex):
             raise ValueError("Not implemented for MultiGroup indexes.")
 
@@ -575,7 +577,6 @@ def __init__(
         index_kwargs_2=None,
         **kwargs,
     ):
-
         aux = _compare_segregation(
             seg_class_1,
             seg_class_2,
@@ -620,21 +621,19 @@ def plot(self, color="darkblue", color2="darkred", kde=True, ax=None, **kwargs):
             import seaborn as sns
         except ImportError:
             warnings.warn("This method relies on importing `matplotlib` and `seaborn`")
+        if ax is None:
+            _, ax = plt.subplots()
 
         if self._null_approach == "bootstrap":
             ax = sns.histplot(self.est_sim[0], color=color, kde=kde, ax=ax, **kwargs)
             ax = sns.histplot(self.est_sim[1], color=color2, kde=kde, ax=ax, **kwargs)
-            plt.title(
-                "{} (Diff. value = {})".format(
-                    self._class_name, round(self.est_point_diff, 3)
-                )
+            ax.set_title(
+                f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
             )
         else:
             ax = sns.histplot(self.est_sim, color=color, kde=kde, ax=ax, **kwargs)
-            plt.axvline(self.est_point_diff, color="red")
-            plt.title(
-                "{} (Diff. value = {})".format(
-                    self._class_name, round(self.est_point_diff, 3)
-                )
+            ax.vlines(self.est_point_diff, 0, ax.get_ylim()[1], color="red")
+            ax.set_title(
+                f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
             )
         return ax
diff --git a/segregation/inference/randomization.py b/segregation/inference/randomization.py
@@ -17,14 +17,18 @@ def _generate_estimate(input):
     else:
         df = input[0].data.copy()
     if input[0].index_type == "singlegroup":
-        df = input[1](df, group=input[0].group_pop_var, total=input[0].total_pop_var,)
+        df = input[1](
+            df,
+            group=input[0].group_pop_var,
+            total=input[0].total_pop_var,
+        )
         estimate = (
             input[0]
             .__class__(df, input[0].group_pop_var, input[0].total_pop_var, **input[2])
             .statistic
         )
     else:
-        df = input[1](df, groups=input[0].groups)
+        df = input[1](df, groups=input[0].groups, verbose=input[3])
         estimate = input[0].__class__(df, input[0].groups, **input[2]).statistic
     return estimate
 
@@ -36,6 +40,7 @@ def simulate_null(
     n_jobs=-1,
     backend="loky",
     index_kwargs=None,
+    verbose=False,
 ):
     """Simulate a series of index values in parallel to serve as a null distribution.
 
@@ -56,6 +61,8 @@ def simulate_null(
     index_kwargs : dict, optional
         additional keyword arguments used to fit the index, such as distance or network
         if estimating a spatial index; by default None
+    verbose: bool
+        whether to print warning statements
 
     Returns
     -------
@@ -67,13 +74,13 @@ def simulate_null(
     if n_jobs == -1:
         n_jobs = multiprocessing.cpu_count()
     estimates = Parallel(n_jobs=n_jobs, backend=backend)(
-        delayed(_generate_estimate)((seg_class, sim_func, index_kwargs))
+        delayed(_generate_estimate)((seg_class, sim_func, index_kwargs, verbose))
         for i in tqdm(range(iterations))
     )
     return pd.Series(estimates)
 
 
-def simulate_person_permutation(df, group=None, total=None, groups=None):
+def simulate_person_permutation(df, group=None, total=None, groups=None, verbose=False):
     """Simulate the permutation of individuals across spatial units.
 
     Parameters
@@ -145,7 +152,7 @@ def simulate_person_permutation(df, group=None, total=None, groups=None):
     return gpd.GeoDataFrame(df, geometry=geoms.geometry.name)
 
 
-def simulate_evenness(df, group=None, total=None, groups=None):
+def simulate_evenness(df, group=None, total=None, groups=None, verbose=True):
     """Simulate even redistribution of population groups across spatial units.
 
     Parameters
@@ -192,17 +199,17 @@ def simulate_evenness(df, group=None, total=None, groups=None):
         global_prob_vector = df.sum(axis=0) / df.sum().sum()
         t = df[groups].sum(axis=1).astype(int)
 
-        simul = list(
-            map(lambda i: list(np.random.multinomial(i, global_prob_vector)), t)
-        )
+        simul = [list(np.random.multinomial(i, global_prob_vector)) for i in t]
         output = pd.DataFrame(simul, columns=groups)
     if geoms:
         return gpd.GeoDataFrame(output, geometry=geoms, crs=crs)
 
     return output
 
 
-def simulate_systematic_randomization(df, group=None, total=None, groups=None):
+def simulate_systematic_randomization(
+    df, group=None, total=None, groups=None, verbose=True
+):
     """Simulate systematic redistribution of population groups across spatial units.
 
     Parameters
@@ -211,7 +218,7 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
         geodataframe with population data to be randomized
     group : str, optional
         name of column on geodataframe that holds the group total
-        (for use with singlegroup indices). 
+        (for use with singlegroup indices).
     total : str, optional
         name of column on geodataframe that holds the total population for
         each unit. For singlegroup indices, this parameter is required. For
@@ -242,16 +249,18 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
     Reference: :cite:`allen2015more`
     """
     if groups:
-        if not total:
+        if not total and verbose:
             warn(
-                "No `total` argument passed. Assuming population groups are exhaustive"
+                "No `total` argument passed. Assuming population groups are exhaustive",
+                stacklevel=2,
             )
             total = "total"
         df[total] = df[groups].sum(axis=1)
     if group:
-        assert (
-            total
-        ), "If simulating a single group, you must also supply a total population column"
+        if not total:
+            raise ValueError(
+                "If simulating a single group, you must also supply a total population column"
+            )
         df["other_group_pop"] = df[total] - df[group]
         groups = [group, "other_group_pop"]
 
@@ -320,7 +329,9 @@ def simulate_geo_permutation(df, **kwargs):
     return data
 
 
-def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None):
+def simulate_systematic_geo_permutation(
+    df, group=None, total=None, groups=None, verbose=True
+):
     """Simulate systematic redistribution followed by random permutation of geographic units.
 
     Parameters
@@ -342,12 +353,16 @@ def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None)
     geopandas.GeoDataFrame
         geodataframe with systematically randomized population groups
     """
-    df = simulate_systematic_randomization(df, group=group, total=total, groups=groups)
+    df = simulate_systematic_randomization(
+        df, group=group, total=total, groups=groups, verbose=verbose
+    )
     df = simulate_geo_permutation(df)
     return df
 
 
-def simulate_evenness_geo_permutation(df, group=None, total=None, groups=None):
+def simulate_evenness_geo_permutation(
+    df, group=None, total=None, groups=None, verbose=True
+):
     """Simulate evenness followed by random permutation of geographic units.
 
     Parameters
diff --git a/segregation/singlegroup/gini.py b/segregation/singlegroup/gini.py
@@ -2,15 +2,20 @@
 
 __author__ = "Renan X. Cortes <renanc@ucr.edu>, Sergio J. Rey <sergio.rey@ucr.edu> and Elijah Knaap <elijah.knaap@ucr.edu>"
 
+import os
+
 import geopandas as gpd
-import numpy as np
 
-from .._base import SingleGroupIndex, SpatialImplicitIndex
+# must be set prior to importing numpy
+# <https://github.com/numba/numba/issues/5275>
+os.environ["KMP_WARNINGS"] = "off"
 
+import numpy as np
 
+from .._base import SingleGroupIndex, SpatialImplicitIndex
 
 try:
-    from numba import njit, jit, prange, boolean
+    from numba import boolean, jit, njit, prange
 except (ImportError, ModuleNotFoundError):
 
     def jit(*dec_args, **dec_kwargs):
@@ -28,7 +33,11 @@ def intercepted_function(f, *f_args, **f_kwargs):
     prange = range
     boolean = bool
 
-@njit(parallel=True, fastmath=True,)
+
+@njit(
+    parallel=True,
+    fastmath=True,
+)
 def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
     """Memory efficient calculation of Gini
 
@@ -41,25 +50,23 @@ def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
 
     Returns
     ----------
-    
+
     implicit: float
              Gini coefficient
     """
 
-
     n = ti.shape[0]
     num = np.zeros(1)
     T = ti.sum()
     P = pi.sum() / T
     pi = np.where(ti == 0, 0, pi / ti)
     T = ti.sum()
-    for i in prange(n-1):
-        num += (ti[i] * ti[i+1:] * np.abs(pi[i] - pi[i+1:])).sum()
+    for i in prange(n - 1):
+        num += (ti[i] * ti[i + 1 :] * np.abs(pi[i] - pi[i + 1 :])).sum()
     num *= 2
-    den = (2 * T * T * P * (1-P))
+    den = 2 * T * T * P * (1 - P)
     return (num / den)[0]
 
-    
 
 def _gini_seg(data, group_pop_var, total_pop_var):
     """Calculate Gini segregation index.
@@ -107,6 +114,7 @@ def _gini_seg(data, group_pop_var, total_pop_var):
 
     return G, data
 
+
 class Gini(SingleGroupIndex, SpatialImplicitIndex):
     """Gini Index.
 
@@ -154,7 +162,7 @@ def __init__(
         decay=None,
         function="triangular",
         precompute=None,
-        **kwargs
+        **kwargs,
     ):
         """Init."""
         SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var)