more docs

yamilbknsu · yamilbknsu · commit c66913decc0d · 2025-09-12T10:02:29.000-04:00
diff --git a/demos/models/rebalancing.py b/demos/models/rebalancing.py
@@ -11,8 +11,46 @@
 import time
 from logging_logic import log_execution_time
 
-@orca.step('household_rebalancing')
+STEP_NAME = "household_rebalancing"
+
+@orca.step(STEP_NAME)
 def household_rebalancing(households, persons, year, get_new_households, get_new_person_id, rebalanced_households, rebalanced_persons):
+    """
+    Adjust household counts to match control totals by geography and household size.
+
+    This step compares current household counts with control totals and duplicates or removes
+    households as needed. It maintains population consistency by also updating the persons table
+    and stores removed records for tracking purposes.
+
+    Parameters
+    ----------
+    households : orca.Table
+        The households table containing household-level attributes.
+    persons : orca.Table
+        The persons table containing individual-level attributes.
+    year : int
+        The current simulation year.
+    get_new_households : callable
+        Function to generate new unique household IDs.
+    get_new_person_id : callable
+        Function to generate new unique person IDs.
+    rebalanced_households : orca.Table
+        Table for storing removed household records.
+    rebalanced_persons : orca.Table
+        Table for storing removed person records.
+
+    Returns
+    -------
+    None
+
+    Notes
+    -----
+    - Modifies households and persons tables in place by adding/removing records.
+    - Uses module configuration to determine control table and column mappings.
+    - Tracks marital status before and after operations in marital_rebalanced table.
+    - Skips processing if no control data exists for the current year.
+    - Sampling with replacement occurs when duplicating more households than available.
+    """
     start_time = time.time()
 
     # Load calibration config
@@ -43,8 +81,6 @@ def household_rebalancing(households, persons, year, get_new_households, get_new
     indices = index_df.groupby([GEOID_COL, CONTROL_COL]).indices
     current_count = index_df.groupby([GEOID_COL, CONTROL_COL]).size()
     hh_difference = control_table_wrapped.local.loc[year].astype({GEOID_COL: "str", CONTROL_COL:"str"}).set_index([GEOID_COL, CONTROL_COL])[value_column].loc[current_count.index] - current_count
-    
-    # TODO: Add assertions about rows being enough to make the sampling
 
     to_remove_hh = []
     to_duplicate_hh = []
diff --git a/docs/source/api/configuration_module.rst b/docs/source/api/configuration_module.rst
@@ -3,7 +3,7 @@ DEMOS Configuration Structure
 
 .. autoclass:: demos.config.DEMOSConfig
    :members:
-   :exclude-members: __init__, update, require_persons_and_households
+   :exclude-members: __init__, update, require_persons_and_households, model_post_init
 
 Individual Modules configuration
 --------------------------------
diff --git a/docs/source/api/rebalancing_module.rst b/docs/source/api/rebalancing_module.rst
@@ -0,0 +1,31 @@
+Household Rebalancing Module
+============================
+
+This module adjusts the synthetic population to match observed household size distributions by geography.
+It duplicates or removes households to align current counts with control totals, maintaining population
+consistency while preserving demographic characteristics. The module tracks marital status before and
+after rebalancing operations and stores removed households/persons in separate tables.
+
+Key features:
+
+- Matches household counts to control totals by geography and household size.
+- Duplicates households (and their members) when counts are below target.
+- Removes households (and their members) when counts exceed target.
+- Preserves demographic characteristics through exact duplication/removal.
+- Tracks marital status changes for validation purposes.
+- Stores removed records in rebalanced_households and rebalanced_persons tables.
+
+Caveats:
+
+- Requires a control table with year, geography, household size, and target count columns.
+- The control table index must be 'year' and must have exactly 3 columns.
+- Sampling with replacement is used when duplicating more households than available.
+- Most errors are handled with assertions; users should ensure data consistency.
+- Geographic and household size columns must match between households and control tables.
+
+Module function
+---------------
+
+Module configuration options: :py:class:`~demos.config.HHRebalancingModuleConfig`
+
+.. autofunction:: demos.models.rebalancing.household_rebalancing
diff --git a/docs/source/pages/intro.md b/docs/source/pages/intro.md
@@ -27,6 +27,7 @@ This document summarizes instructions to install, configure and run DEMOS. Secti
     ```
   
     **Build Docker Image** *(Development only)*
+    > NOTE: Ideally, our DEMOS Docker image is hosted in a public image registry so users will not need to build it themselves
     ```bash
     docker build -t demos:0.0.1 --platform=linux/amd64 -f Dockerfile .
     ```
@@ -86,7 +87,7 @@ open build/html/index.html
 
 ## 2. Preparing Your Configuration
 
-DEMOS is configured via a TOML file (see [example configuration](default_configuration) for a full example).  
+DEMOS is configured via a TOML file (see [example configuration](default_configuration) for a full example and [configuration API](../api/configuration_module.rst) for descriptions on each of the accepted parameters).  
 At minimum, you must define the `persons` and `households` tables:
 
 ```toml
@@ -170,17 +171,20 @@ To run DEMOS, organize your files as follows:
 ```
 DEMOS_NREL/
 ├── configuration/
-│   └── demos_config.toml # Main configuration file (TOML)
+│   └── demos_config.toml                   # Main configuration file (TOML)
 ├── data/
-|   ├── custom_mpo_06197001_model_data.h5 # Example HDF5 data file
-│   ├── relmap_06197001.csv # Example CSV data file
-│   ├── income_rates_06197001.csv # Example CSV data file
-│   ├── hsize_ct_06197001.csv # Example CSV data file 
-|   └── calibrated_configs/ # Here is were the parameters of the estimated models go
+|   ├── custom_mpo_06197001_model_data.h5   # Example HDF5 data file
+│   ├── relmap_06197001.csv                 # Example CSV data file
+│   ├── income_rates_06197001.csv           # Example CSV data file
+│   ├── hsize_ct_06197001.csv               # Example CSV data file 
+|   └── calibrated_configs/                 # Here is were the parameters of the estimated models go
 |       └── ...
 ├── demos/ # Source code 
-|   └── simulate.py # Main entry point for running DEMOS 
-├── docs/ # Documentation
+|   ├── simulate.py                         # Main entry point for running DEMOS
+|   ├── models                              # Logic of individual modules
+|       └── ... 
+|   └── ... 
+├── docs/                                   # Documentation
 └── ...
 ```