|
11 | 11 | import time |
12 | 12 | from logging_logic import log_execution_time |
13 | 13 |
|
14 | | -@orca.step('household_rebalancing') |
| 14 | +STEP_NAME = "household_rebalancing" |
| 15 | + |
| 16 | +@orca.step(STEP_NAME) |
15 | 17 | def household_rebalancing(households, persons, year, get_new_households, get_new_person_id, rebalanced_households, rebalanced_persons): |
| 18 | + """ |
| 19 | + Adjust household counts to match control totals by geography and household size. |
| 20 | +
|
| 21 | + This step compares current household counts with control totals and duplicates or removes |
| 22 | + households as needed. It maintains population consistency by also updating the persons table |
| 23 | + and stores removed records for tracking purposes. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + households : orca.Table |
| 28 | + The households table containing household-level attributes. |
| 29 | + persons : orca.Table |
| 30 | + The persons table containing individual-level attributes. |
| 31 | + year : int |
| 32 | + The current simulation year. |
| 33 | + get_new_households : callable |
| 34 | + Function to generate new unique household IDs. |
| 35 | + get_new_person_id : callable |
| 36 | + Function to generate new unique person IDs. |
| 37 | + rebalanced_households : orca.Table |
| 38 | + Table for storing removed household records. |
| 39 | + rebalanced_persons : orca.Table |
| 40 | + Table for storing removed person records. |
| 41 | +
|
| 42 | + Returns |
| 43 | + ------- |
| 44 | + None |
| 45 | +
|
| 46 | + Notes |
| 47 | + ----- |
| 48 | + - Modifies households and persons tables in place by adding/removing records. |
| 49 | + - Uses module configuration to determine control table and column mappings. |
| 50 | + - Tracks marital status before and after operations in marital_rebalanced table. |
| 51 | + - Skips processing if no control data exists for the current year. |
| 52 | + - Sampling with replacement occurs when duplicating more households than available. |
| 53 | + """ |
16 | 54 | start_time = time.time() |
17 | 55 |
|
18 | 56 | # Load calibration config |
@@ -43,8 +81,6 @@ def household_rebalancing(households, persons, year, get_new_households, get_new |
43 | 81 | indices = index_df.groupby([GEOID_COL, CONTROL_COL]).indices |
44 | 82 | current_count = index_df.groupby([GEOID_COL, CONTROL_COL]).size() |
45 | 83 | hh_difference = control_table_wrapped.local.loc[year].astype({GEOID_COL: "str", CONTROL_COL:"str"}).set_index([GEOID_COL, CONTROL_COL])[value_column].loc[current_count.index] - current_count |
46 | | - |
47 | | - # TODO: Add assertions about rows being enough to make the sampling |
48 | 84 |
|
49 | 85 | to_remove_hh = [] |
50 | 86 | to_duplicate_hh = [] |
|
0 commit comments