77from .randomization import simulate_person_permutation
88
99
10- def _prepare_comparative_data (df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2 ):
10+ def _prepare_comparative_data (
11+ df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2
12+ ):
1113 df1 = df1 .copy ()
1214 df2 = df2 .copy ()
1315 if hasattr (df1 , "geometry" ):
@@ -23,7 +25,6 @@ def _prepare_comparative_data(df1, df2, group_pop_var1, group_pop_var2, total_po
2325 return df1 , df2
2426
2527
26-
2728def _generate_counterfactual (
2829 data1 ,
2930 data2 ,
@@ -64,7 +65,12 @@ def _generate_counterfactual(
6465
6566 """
6667 df1 , df2 = DUAL_SIMULATORS [counterfactual_approach ](
67- data1 , data2 , group_pop_var1 , total_pop_var1 , group_pop_var2 , total_pop_var2 ,
68+ data1 ,
69+ data2 ,
70+ group_pop_var1 ,
71+ total_pop_var1 ,
72+ group_pop_var2 ,
73+ total_pop_var2 ,
6874 )
6975 df1 ["group_composition" ] = (df1 [group_pop_var1 ] / df1 [total_pop_var1 ]).fillna (0 )
7076 df2 ["group_composition" ] = (df2 [group_pop_var2 ] / df2 [total_pop_var2 ]).fillna (0 )
@@ -83,7 +89,12 @@ def _generate_counterfactual(
8389
8490
8591def sim_composition (
86- df1 , df2 , group_pop_var1 , total_pop_var1 , group_pop_var2 , total_pop_var2 ,
92+ df1 ,
93+ df2 ,
94+ group_pop_var1 ,
95+ total_pop_var1 ,
96+ group_pop_var2 ,
97+ total_pop_var2 ,
8798):
8899 """Simulate the spatial distribution of a population group in a region using the CDF of a comparison region.
89100
@@ -112,7 +123,9 @@ def sim_composition(
112123 two pandas.DataFrame
113124 dataframes with simulated population columns appended
114125 """
115- df1 , df2 = _prepare_comparative_data (df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2 )
126+ df1 , df2 = _prepare_comparative_data (
127+ df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2
128+ )
116129
117130 df1 ["group_composition" ] = (df1 [group_pop_var1 ] / df1 [total_pop_var1 ]).fillna (0 )
118131 df2 ["group_composition" ] = (df2 [group_pop_var2 ] / df2 [total_pop_var2 ]).fillna (0 )
@@ -133,7 +146,12 @@ def sim_composition(
133146
134147
135148def sim_dual_composition (
136- df1 , df2 , group_pop_var1 , total_pop_var1 , group_pop_var2 , total_pop_var2 ,
149+ df1 ,
150+ df2 ,
151+ group_pop_var1 ,
152+ total_pop_var1 ,
153+ group_pop_var2 ,
154+ total_pop_var2 ,
137155):
138156 """Apply the 'composition' for both minority and complementary groups.
139157
@@ -158,7 +176,9 @@ def sim_dual_composition(
158176 dataframes with simulated population columns appended
159177
160178 """
161- df1 , df2 = _prepare_comparative_data (df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2 )
179+ df1 , df2 = _prepare_comparative_data (
180+ df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2
181+ )
162182
163183 df1 ["group_composition" ] = (df1 [group_pop_var1 ] / df1 [total_pop_var1 ]).fillna (0 )
164184 df2 ["group_composition" ] = (df2 [group_pop_var2 ] / df2 [total_pop_var2 ]).fillna (0 )
@@ -198,7 +218,12 @@ def sim_dual_composition(
198218
199219
200220def sim_share (
201- df1 , df2 , group_pop_var1 , total_pop_var1 , group_pop_var2 , total_pop_var2 ,
221+ df1 ,
222+ df2 ,
223+ group_pop_var1 ,
224+ total_pop_var1 ,
225+ group_pop_var2 ,
226+ total_pop_var2 ,
202227):
203228 """Simulate the spatial population distribution of a region using the CDF of a comparison region.
204229
@@ -228,7 +253,9 @@ def sim_share(
228253 dataframes with simulated population columns appended
229254
230255 """
231- df1 , df2 = _prepare_comparative_data (df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2 )
256+ df1 , df2 = _prepare_comparative_data (
257+ df1 , df2 , group_pop_var1 , group_pop_var2 , total_pop_var1 , total_pop_var2
258+ )
232259
233260 df1 ["compl_pop_var" ] = df1 [total_pop_var1 ] - df1 [group_pop_var1 ]
234261 df2 ["compl_pop_var" ] = df2 [total_pop_var2 ] - df2 [group_pop_var2 ]
@@ -298,7 +325,6 @@ def _prepare_random_label(seg_class_1, seg_class_2):
298325 data_2 ["grouping_variable" ] = "Group_2"
299326
300327 if isinstance (seg_class_1 , SingleGroupIndex ):
301-
302328 # This step is just to make sure the each frequency column is integer for the approaches and from the same type in order to be able to stack them
303329 data_1 .loc [:, (seg_class_1 .group_pop_var , seg_class_1 .total_pop_var )] = (
304330 data_1 .loc [:, (seg_class_1 .group_pop_var , seg_class_1 .total_pop_var )]
@@ -308,7 +334,11 @@ def _prepare_random_label(seg_class_1, seg_class_2):
308334
309335 # random permutation needs the columns to have the same names
310336 data_1 = data_1 [
311- [seg_class_1 .group_pop_var , seg_class_1 .total_pop_var , "grouping_variable" ,]
337+ [
338+ seg_class_1 .group_pop_var ,
339+ seg_class_1 .total_pop_var ,
340+ "grouping_variable" ,
341+ ]
312342 ]
313343 data_1 .columns = ["group" , "total" , "grouping_variable" ]
314344
@@ -318,14 +348,17 @@ def _prepare_random_label(seg_class_1, seg_class_2):
318348 .astype (int )
319349 )
320350 data_2 = data_2 [
321- [seg_class_2 .group_pop_var , seg_class_2 .total_pop_var , "grouping_variable" ,]
351+ [
352+ seg_class_2 .group_pop_var ,
353+ seg_class_2 .total_pop_var ,
354+ "grouping_variable" ,
355+ ]
322356 ]
323357 data_2 .columns = ["group" , "total" , "grouping_variable" ]
324358
325359 stacked_data = pd .concat ([data_1 , data_2 ], axis = 0 )
326360
327361 elif isinstance (seg_class_1 , MultiGroupIndex ):
328-
329362 groups_list = seg_class_1 .groups
330363
331364 for i in range (len (groups_list )):
@@ -334,7 +367,9 @@ def _prepare_random_label(seg_class_1, seg_class_2):
334367
335368 if seg_class_1 .groups != seg_class_2 .groups :
336369 raise ValueError ("MultiGroup groups should be the same" )
337-
370+ # geometry has been discarded, but the CRS can cause concatenation problems
371+ data_1 .crs = None
372+ data_2 .crs = None
338373 stacked_data = pd .concat ([data_1 , data_2 ], ignore_index = True )
339374 return stacked_data
340375
@@ -343,7 +378,7 @@ def _estimate_random_label_difference(data):
343378 # note: if estimating a spatial implicit index, then "space" has already been accounted for...
344379 # when the index is computed, the underlying data are transformed to represent the *accessible* population
345380 # so when calculating the simulated difference, we need to pop spatial implicit parameters
346-
381+
347382 stacked_data = data [0 ]
348383 function = data [1 ]
349384 index_args_1 = data [2 ]
@@ -352,18 +387,20 @@ def _estimate_random_label_difference(data):
352387 groups = data [5 ]
353388 approach = data [6 ]
354389 for args in [index_args_1 , index_args_2 ]:
355- if ' network' in args :
356- args .pop (' network' )
357- elif ' distance' in args :
358- args .pop (' distance' )
390+ if " network" in args :
391+ args .pop (" network" )
392+ elif " distance" in args :
393+ args .pop (" distance" )
359394
360- if approach == ' person_permutation' :
361- grouping = stacked_data [' grouping_variable' ].copy ().values
395+ if approach == " person_permutation" :
396+ grouping = stacked_data [" grouping_variable" ].copy ().values
362397 if groups :
363398 stacked_data = simulate_person_permutation (stacked_data , groups = groups )
364399 else :
365- stacked_data = simulate_person_permutation (stacked_data , group = 'group' , total = 'total' )
366- stacked_data ['grouping_variable' ] = grouping
400+ stacked_data = simulate_person_permutation (
401+ stacked_data , group = "group" , total = "total"
402+ )
403+ stacked_data ["grouping_variable" ] = grouping
367404
368405 else :
369406 stacked_data ["grouping_variable" ] = np .random .permutation (
@@ -414,7 +451,10 @@ def _estimate_counterfac_difference(data):
414451 data_1_test = data_1 .drop ([group_1 ], axis = 1 )
415452
416453 simulations_1 = function (
417- data_1_test , "test_group_pop_var" , total_1 , ** index_args_1 ,
454+ data_1_test ,
455+ "test_group_pop_var" ,
456+ total_1 ,
457+ ** index_args_1 ,
418458 )[0 ]
419459
420460 # Dropping to avoid confusion in the next iteration
@@ -431,7 +471,10 @@ def _estimate_counterfac_difference(data):
431471 data_2_test = data_2 .drop ([group_2 ], axis = 1 )
432472
433473 simulations_2 = function (
434- data_2_test , "test_group_pop_var" , total_2 , ** index_args_2 ,
474+ data_2_test ,
475+ "test_group_pop_var" ,
476+ total_2 ,
477+ ** index_args_2 ,
435478 )[0 ]
436479
437480 # Dropping to avoid confusion in the next iteration
0 commit comments