Skip to content

Commit 3b3ee00

Browse files
authored
Merge pull request #236 from knaaptime/builddocs
use ax instead of plt
2 parents bd169f1 + a467c81 commit 3b3ee00

File tree

3 files changed

+68
-46
lines changed

3 files changed

+68
-46
lines changed

segregation/inference/inference_wrappers.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,13 @@
1212
from tqdm.auto import tqdm
1313

1414
from .._base import MultiGroupIndex
15-
from .comparative import (DUAL_SIMULATORS, _estimate_counterfac_difference,
16-
_estimate_random_label_difference,
17-
_generate_counterfactual, _prepare_random_label)
15+
from .comparative import (
16+
DUAL_SIMULATORS,
17+
_estimate_counterfac_difference,
18+
_estimate_random_label_difference,
19+
_generate_counterfactual,
20+
_prepare_random_label,
21+
)
1822
from .randomization import SIMULATORS, simulate_null
1923

2024

@@ -45,12 +49,12 @@ def _infer_segregation(
4549
4650
* ``bootstrap``:
4751
generates bootstrap replications of the units with replacement of the same size of the
48-
original data. This procedure creates a confidence interval for the index statistic to test
52+
original data. This procedure creates a confidence interval for the index statistic to test
4953
whether the null value lies within.
5054
5155
* ``evenness``:
5256
assumes that each spatial unit has the same global probability of drawing elements from the
53-
minority group of the fixed total unit population (binomial distribution).
57+
minority group of the fixed total unit population (binomial distribution).
5458
5559
* ``person_permutation``:
5660
randomly allocates individuals into units keeping the total population of each
@@ -224,7 +228,6 @@ def __init__(
224228
n_jobs=-1,
225229
**kwargs,
226230
):
227-
228231
aux = _infer_segregation(
229232
seg_class,
230233
iterations_under_null,
@@ -433,7 +436,6 @@ def _compare_segregation(
433436
"share",
434437
"dual_composition",
435438
]:
436-
437439
if isinstance(seg_class_1, MultiGroupIndex):
438440
raise ValueError("Not implemented for MultiGroup indexes.")
439441

@@ -575,7 +577,6 @@ def __init__(
575577
index_kwargs_2=None,
576578
**kwargs,
577579
):
578-
579580
aux = _compare_segregation(
580581
seg_class_1,
581582
seg_class_2,
@@ -620,21 +621,19 @@ def plot(self, color="darkblue", color2="darkred", kde=True, ax=None, **kwargs):
620621
import seaborn as sns
621622
except ImportError:
622623
warnings.warn("This method relies on importing `matplotlib` and `seaborn`")
624+
if ax is None:
625+
_, ax = plt.subplots()
623626

624627
if self._null_approach == "bootstrap":
625628
ax = sns.histplot(self.est_sim[0], color=color, kde=kde, ax=ax, **kwargs)
626629
ax = sns.histplot(self.est_sim[1], color=color2, kde=kde, ax=ax, **kwargs)
627-
plt.title(
628-
"{} (Diff. value = {})".format(
629-
self._class_name, round(self.est_point_diff, 3)
630-
)
630+
ax.set_title(
631+
f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
631632
)
632633
else:
633634
ax = sns.histplot(self.est_sim, color=color, kde=kde, ax=ax, **kwargs)
634-
plt.axvline(self.est_point_diff, color="red")
635-
plt.title(
636-
"{} (Diff. value = {})".format(
637-
self._class_name, round(self.est_point_diff, 3)
638-
)
635+
ax.vlines(self.est_point_diff, 0, ax.get_ylim()[1], color="red")
636+
ax.set_title(
637+
f"{self._class_name} (Diff. value = {round(self.est_point_diff, 3)})"
639638
)
640639
return ax

segregation/inference/randomization.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@ def _generate_estimate(input):
1717
else:
1818
df = input[0].data.copy()
1919
if input[0].index_type == "singlegroup":
20-
df = input[1](df, group=input[0].group_pop_var, total=input[0].total_pop_var,)
20+
df = input[1](
21+
df,
22+
group=input[0].group_pop_var,
23+
total=input[0].total_pop_var,
24+
)
2125
estimate = (
2226
input[0]
2327
.__class__(df, input[0].group_pop_var, input[0].total_pop_var, **input[2])
2428
.statistic
2529
)
2630
else:
27-
df = input[1](df, groups=input[0].groups)
31+
df = input[1](df, groups=input[0].groups, verbose=input[3])
2832
estimate = input[0].__class__(df, input[0].groups, **input[2]).statistic
2933
return estimate
3034

@@ -36,6 +40,7 @@ def simulate_null(
3640
n_jobs=-1,
3741
backend="loky",
3842
index_kwargs=None,
43+
verbose=False,
3944
):
4045
"""Simulate a series of index values in parallel to serve as a null distribution.
4146
@@ -56,6 +61,8 @@ def simulate_null(
5661
index_kwargs : dict, optional
5762
additional keyword arguments used to fit the index, such as distance or network
5863
if estimating a spatial index; by default None
64+
verbose: bool
65+
whether to print warning statements
5966
6067
Returns
6168
-------
@@ -67,13 +74,13 @@ def simulate_null(
6774
if n_jobs == -1:
6875
n_jobs = multiprocessing.cpu_count()
6976
estimates = Parallel(n_jobs=n_jobs, backend=backend)(
70-
delayed(_generate_estimate)((seg_class, sim_func, index_kwargs))
77+
delayed(_generate_estimate)((seg_class, sim_func, index_kwargs, verbose))
7178
for i in tqdm(range(iterations))
7279
)
7380
return pd.Series(estimates)
7481

7582

76-
def simulate_person_permutation(df, group=None, total=None, groups=None):
83+
def simulate_person_permutation(df, group=None, total=None, groups=None, verbose=False):
7784
"""Simulate the permutation of individuals across spatial units.
7885
7986
Parameters
@@ -145,7 +152,7 @@ def simulate_person_permutation(df, group=None, total=None, groups=None):
145152
return gpd.GeoDataFrame(df, geometry=geoms.geometry.name)
146153

147154

148-
def simulate_evenness(df, group=None, total=None, groups=None):
155+
def simulate_evenness(df, group=None, total=None, groups=None, verbose=True):
149156
"""Simulate even redistribution of population groups across spatial units.
150157
151158
Parameters
@@ -192,17 +199,17 @@ def simulate_evenness(df, group=None, total=None, groups=None):
192199
global_prob_vector = df.sum(axis=0) / df.sum().sum()
193200
t = df[groups].sum(axis=1).astype(int)
194201

195-
simul = list(
196-
map(lambda i: list(np.random.multinomial(i, global_prob_vector)), t)
197-
)
202+
simul = [list(np.random.multinomial(i, global_prob_vector)) for i in t]
198203
output = pd.DataFrame(simul, columns=groups)
199204
if geoms:
200205
return gpd.GeoDataFrame(output, geometry=geoms, crs=crs)
201206

202207
return output
203208

204209

205-
def simulate_systematic_randomization(df, group=None, total=None, groups=None):
210+
def simulate_systematic_randomization(
211+
df, group=None, total=None, groups=None, verbose=True
212+
):
206213
"""Simulate systematic redistribution of population groups across spatial units.
207214
208215
Parameters
@@ -211,7 +218,7 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
211218
geodataframe with population data to be randomized
212219
group : str, optional
213220
name of column on geodataframe that holds the group total
214-
(for use with singlegroup indices).
221+
(for use with singlegroup indices).
215222
total : str, optional
216223
name of column on geodataframe that holds the total population for
217224
each unit. For singlegroup indices, this parameter is required. For
@@ -242,16 +249,18 @@ def simulate_systematic_randomization(df, group=None, total=None, groups=None):
242249
Reference: :cite:`allen2015more`
243250
"""
244251
if groups:
245-
if not total:
252+
if not total and verbose:
246253
warn(
247-
"No `total` argument passed. Assuming population groups are exhaustive"
254+
"No `total` argument passed. Assuming population groups are exhaustive",
255+
stacklevel=2,
248256
)
249257
total = "total"
250258
df[total] = df[groups].sum(axis=1)
251259
if group:
252-
assert (
253-
total
254-
), "If simulating a single group, you must also supply a total population column"
260+
if not total:
261+
raise ValueError(
262+
"If simulating a single group, you must also supply a total population column"
263+
)
255264
df["other_group_pop"] = df[total] - df[group]
256265
groups = [group, "other_group_pop"]
257266

@@ -320,7 +329,9 @@ def simulate_geo_permutation(df, **kwargs):
320329
return data
321330

322331

323-
def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None):
332+
def simulate_systematic_geo_permutation(
333+
df, group=None, total=None, groups=None, verbose=True
334+
):
324335
"""Simulate systematic redistribution followed by random permutation of geographic units.
325336
326337
Parameters
@@ -342,12 +353,16 @@ def simulate_systematic_geo_permutation(df, group=None, total=None, groups=None)
342353
geopandas.GeoDataFrame
343354
geodataframe with systematically randomized population groups
344355
"""
345-
df = simulate_systematic_randomization(df, group=group, total=total, groups=groups)
356+
df = simulate_systematic_randomization(
357+
df, group=group, total=total, groups=groups, verbose=verbose
358+
)
346359
df = simulate_geo_permutation(df)
347360
return df
348361

349362

350-
def simulate_evenness_geo_permutation(df, group=None, total=None, groups=None):
363+
def simulate_evenness_geo_permutation(
364+
df, group=None, total=None, groups=None, verbose=True
365+
):
351366
"""Simulate evenness followed by random permutation of geographic units.
352367
353368
Parameters

segregation/singlegroup/gini.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,20 @@
22

33
__author__ = "Renan X. Cortes <[email protected]>, Sergio J. Rey <[email protected]> and Elijah Knaap <[email protected]>"
44

5+
import os
6+
57
import geopandas as gpd
6-
import numpy as np
78

8-
from .._base import SingleGroupIndex, SpatialImplicitIndex
9+
# must be set prior to importing numpy
10+
# <https://github.com/numba/numba/issues/5275>
11+
os.environ["KMP_WARNINGS"] = "off"
912

13+
import numpy as np
1014

15+
from .._base import SingleGroupIndex, SpatialImplicitIndex
1116

1217
try:
13-
from numba import njit, jit, prange, boolean
18+
from numba import boolean, jit, njit, prange
1419
except (ImportError, ModuleNotFoundError):
1520

1621
def jit(*dec_args, **dec_kwargs):
@@ -28,7 +33,11 @@ def intercepted_function(f, *f_args, **f_kwargs):
2833
prange = range
2934
boolean = bool
3035

31-
@njit(parallel=True, fastmath=True,)
36+
37+
@njit(
38+
parallel=True,
39+
fastmath=True,
40+
)
3241
def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
3342
"""Memory efficient calculation of Gini
3443
@@ -41,25 +50,23 @@ def _gini_vecp(pi: np.ndarray, ti: np.ndarray):
4150
4251
Returns
4352
----------
44-
53+
4554
implicit: float
4655
Gini coefficient
4756
"""
4857

49-
5058
n = ti.shape[0]
5159
num = np.zeros(1)
5260
T = ti.sum()
5361
P = pi.sum() / T
5462
pi = np.where(ti == 0, 0, pi / ti)
5563
T = ti.sum()
56-
for i in prange(n-1):
57-
num += (ti[i] * ti[i+1:] * np.abs(pi[i] - pi[i+1:])).sum()
64+
for i in prange(n - 1):
65+
num += (ti[i] * ti[i + 1 :] * np.abs(pi[i] - pi[i + 1 :])).sum()
5866
num *= 2
59-
den = (2 * T * T * P * (1-P))
67+
den = 2 * T * T * P * (1 - P)
6068
return (num / den)[0]
6169

62-
6370

6471
def _gini_seg(data, group_pop_var, total_pop_var):
6572
"""Calculate Gini segregation index.
@@ -107,6 +114,7 @@ def _gini_seg(data, group_pop_var, total_pop_var):
107114

108115
return G, data
109116

117+
110118
class Gini(SingleGroupIndex, SpatialImplicitIndex):
111119
"""Gini Index.
112120
@@ -154,7 +162,7 @@ def __init__(
154162
decay=None,
155163
function="triangular",
156164
precompute=None,
157-
**kwargs
165+
**kwargs,
158166
):
159167
"""Init."""
160168
SingleGroupIndex.__init__(self, data, group_pop_var, total_pop_var)

0 commit comments

Comments
 (0)