Skip to content

Commit bd169f1

Browse files
authored
Merge pull request #235 from knaaptime/builddocs
fix crs concat
2 parents 8090618 + c58a661 commit bd169f1

File tree

1 file changed

+68
-25
lines changed

1 file changed

+68
-25
lines changed

segregation/inference/comparative.py

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
from .randomization import simulate_person_permutation
88

99

10-
def _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2):
10+
def _prepare_comparative_data(
11+
df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2
12+
):
1113
df1 = df1.copy()
1214
df2 = df2.copy()
1315
if hasattr(df1, "geometry"):
@@ -23,7 +25,6 @@ def _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_po
2325
return df1, df2
2426

2527

26-
2728
def _generate_counterfactual(
2829
data1,
2930
data2,
@@ -64,7 +65,12 @@ def _generate_counterfactual(
6465
6566
"""
6667
df1, df2 = DUAL_SIMULATORS[counterfactual_approach](
67-
data1, data2, group_pop_var1, total_pop_var1, group_pop_var2, total_pop_var2,
68+
data1,
69+
data2,
70+
group_pop_var1,
71+
total_pop_var1,
72+
group_pop_var2,
73+
total_pop_var2,
6874
)
6975
df1["group_composition"] = (df1[group_pop_var1] / df1[total_pop_var1]).fillna(0)
7076
df2["group_composition"] = (df2[group_pop_var2] / df2[total_pop_var2]).fillna(0)
@@ -83,7 +89,12 @@ def _generate_counterfactual(
8389

8490

8591
def sim_composition(
86-
df1, df2, group_pop_var1, total_pop_var1, group_pop_var2, total_pop_var2,
92+
df1,
93+
df2,
94+
group_pop_var1,
95+
total_pop_var1,
96+
group_pop_var2,
97+
total_pop_var2,
8798
):
8899
"""Simulate the spatial distribution of a population group in a region using the CDF of a comparison region.
89100
@@ -112,7 +123,9 @@ def sim_composition(
112123
two pandas.DataFrame
113124
dataframes with simulated population columns appended
114125
"""
115-
df1, df2 = _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2)
126+
df1, df2 = _prepare_comparative_data(
127+
df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2
128+
)
116129

117130
df1["group_composition"] = (df1[group_pop_var1] / df1[total_pop_var1]).fillna(0)
118131
df2["group_composition"] = (df2[group_pop_var2] / df2[total_pop_var2]).fillna(0)
@@ -133,7 +146,12 @@ def sim_composition(
133146

134147

135148
def sim_dual_composition(
136-
df1, df2, group_pop_var1, total_pop_var1, group_pop_var2, total_pop_var2,
149+
df1,
150+
df2,
151+
group_pop_var1,
152+
total_pop_var1,
153+
group_pop_var2,
154+
total_pop_var2,
137155
):
138156
"""Apply the 'composition' for both minority and complementary groups.
139157
@@ -158,7 +176,9 @@ def sim_dual_composition(
158176
dataframes with simulated population columns appended
159177
160178
"""
161-
df1, df2 = _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2)
179+
df1, df2 = _prepare_comparative_data(
180+
df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2
181+
)
162182

163183
df1["group_composition"] = (df1[group_pop_var1] / df1[total_pop_var1]).fillna(0)
164184
df2["group_composition"] = (df2[group_pop_var2] / df2[total_pop_var2]).fillna(0)
@@ -198,7 +218,12 @@ def sim_dual_composition(
198218

199219

200220
def sim_share(
201-
df1, df2, group_pop_var1, total_pop_var1, group_pop_var2, total_pop_var2,
221+
df1,
222+
df2,
223+
group_pop_var1,
224+
total_pop_var1,
225+
group_pop_var2,
226+
total_pop_var2,
202227
):
203228
"""Simulate the spatial population distribution of a region using the CDF of a comparison region.
204229
@@ -228,7 +253,9 @@ def sim_share(
228253
dataframes with simulated population columns appended
229254
230255
"""
231-
df1, df2 = _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2)
256+
df1, df2 = _prepare_comparative_data(
257+
df1, df2, group_pop_var1, group_pop_var2, total_pop_var1, total_pop_var2
258+
)
232259

233260
df1["compl_pop_var"] = df1[total_pop_var1] - df1[group_pop_var1]
234261
df2["compl_pop_var"] = df2[total_pop_var2] - df2[group_pop_var2]
@@ -298,7 +325,6 @@ def _prepare_random_label(seg_class_1, seg_class_2):
298325
data_2["grouping_variable"] = "Group_2"
299326

300327
if isinstance(seg_class_1, SingleGroupIndex):
301-
302328
# This step is just to make sure the each frequency column is integer for the approaches and from the same type in order to be able to stack them
303329
data_1.loc[:, (seg_class_1.group_pop_var, seg_class_1.total_pop_var)] = (
304330
data_1.loc[:, (seg_class_1.group_pop_var, seg_class_1.total_pop_var)]
@@ -308,7 +334,11 @@ def _prepare_random_label(seg_class_1, seg_class_2):
308334

309335
# random permutation needs the columns to have the same names
310336
data_1 = data_1[
311-
[seg_class_1.group_pop_var, seg_class_1.total_pop_var, "grouping_variable",]
337+
[
338+
seg_class_1.group_pop_var,
339+
seg_class_1.total_pop_var,
340+
"grouping_variable",
341+
]
312342
]
313343
data_1.columns = ["group", "total", "grouping_variable"]
314344

@@ -318,14 +348,17 @@ def _prepare_random_label(seg_class_1, seg_class_2):
318348
.astype(int)
319349
)
320350
data_2 = data_2[
321-
[seg_class_2.group_pop_var, seg_class_2.total_pop_var, "grouping_variable",]
351+
[
352+
seg_class_2.group_pop_var,
353+
seg_class_2.total_pop_var,
354+
"grouping_variable",
355+
]
322356
]
323357
data_2.columns = ["group", "total", "grouping_variable"]
324358

325359
stacked_data = pd.concat([data_1, data_2], axis=0)
326360

327361
elif isinstance(seg_class_1, MultiGroupIndex):
328-
329362
groups_list = seg_class_1.groups
330363

331364
for i in range(len(groups_list)):
@@ -334,7 +367,9 @@ def _prepare_random_label(seg_class_1, seg_class_2):
334367

335368
if seg_class_1.groups != seg_class_2.groups:
336369
raise ValueError("MultiGroup groups should be the same")
337-
370+
# geometry has been discarded, but the CRS can cause concatenation problems
371+
data_1.crs = None
372+
data_2.crs = None
338373
stacked_data = pd.concat([data_1, data_2], ignore_index=True)
339374
return stacked_data
340375

@@ -343,7 +378,7 @@ def _estimate_random_label_difference(data):
343378
# note: if estimating a spatial implicit index, then "space" has already been accounted for...
344379
# when the index is computed, the underlying data are transformed to represent the *accessible* population
345380
# so when calculating the simulated difference, we need to pop spatial implicit parameters
346-
381+
347382
stacked_data = data[0]
348383
function = data[1]
349384
index_args_1 = data[2]
@@ -352,18 +387,20 @@ def _estimate_random_label_difference(data):
352387
groups = data[5]
353388
approach = data[6]
354389
for args in [index_args_1, index_args_2]:
355-
if 'network' in args:
356-
args.pop('network')
357-
elif 'distance' in args:
358-
args.pop('distance')
390+
if "network" in args:
391+
args.pop("network")
392+
elif "distance" in args:
393+
args.pop("distance")
359394

360-
if approach == 'person_permutation':
361-
grouping = stacked_data['grouping_variable'].copy().values
395+
if approach == "person_permutation":
396+
grouping = stacked_data["grouping_variable"].copy().values
362397
if groups:
363398
stacked_data = simulate_person_permutation(stacked_data, groups=groups)
364399
else:
365-
stacked_data = simulate_person_permutation(stacked_data, group='group', total='total')
366-
stacked_data['grouping_variable'] = grouping
400+
stacked_data = simulate_person_permutation(
401+
stacked_data, group="group", total="total"
402+
)
403+
stacked_data["grouping_variable"] = grouping
367404

368405
else:
369406
stacked_data["grouping_variable"] = np.random.permutation(
@@ -414,7 +451,10 @@ def _estimate_counterfac_difference(data):
414451
data_1_test = data_1.drop([group_1], axis=1)
415452

416453
simulations_1 = function(
417-
data_1_test, "test_group_pop_var", total_1, **index_args_1,
454+
data_1_test,
455+
"test_group_pop_var",
456+
total_1,
457+
**index_args_1,
418458
)[0]
419459

420460
# Dropping to avoid confusion in the next iteration
@@ -431,7 +471,10 @@ def _estimate_counterfac_difference(data):
431471
data_2_test = data_2.drop([group_2], axis=1)
432472

433473
simulations_2 = function(
434-
data_2_test, "test_group_pop_var", total_2, **index_args_2,
474+
data_2_test,
475+
"test_group_pop_var",
476+
total_2,
477+
**index_args_2,
435478
)[0]
436479

437480
# Dropping to avoid confusion in the next iteration

0 commit comments

Comments
 (0)