foss-transportationmodeling
diff --git a/‎popgen/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎popgen/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎popgen/config.py‎
Lines changed: 17 additions & 3 deletions b/‎popgen/config.py‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎popgen/data.py‎
Lines changed: 35 additions & 25 deletions b/‎popgen/data.py‎
Lines changed: 35 additions & 25 deletions
diff --git a/‎popgen/draw.py‎
Lines changed: 14 additions & 14 deletions b/‎popgen/draw.py‎
Lines changed: 14 additions & 14 deletions
@@ -1,3 +1,3 @@
-__author__ = 'Karthik Konduri'
-__email__ = '[email protected]'
-__version__ = '2.0'
+from .project import Project
+
+__version__ = '2.0.b1'
@@ -1,12 +1,20 @@
+import yaml
+
+
+class ConfigError(Exception):
+    pass
+
+
 def wrap_config_value(value):
     """The method is used to wrap YAML elements as Config objects. So the
     YAML properties can be accessed using attribute access.
-    E.g. If config object - x for the following YAML is given as:
+    E.g. If config object - x for is specificed as the following YAML:
 
     attribbute1:
         attribute2     : 'Value'
 
     then attribute access x.attribute1.attribute2 is used to access "Value".
+    Also, x.attribute can be used to access the dictionary {attribute: 'value'}
     """
     if isinstance(value, basestring):
         return value
@@ -38,8 +46,8 @@ def return_value(self, key):
         try:
             value = self._data[key]
         except KeyError, e:
-            raise KeyError("Key - %s doesn't exist in the YAML configuration"
-                           % key)
+            raise ConfigError(
+                "Key - %s doesn't exist in the YAML configuration" % key)
         return value
 
     def __len__(self):
@@ -57,6 +65,12 @@ def return_list(self):
     def return_dict(self):
         return self._data
 
+    def write_to_file(self, filepath):
+        with open(filepath, 'w') as outfile:
+            outfile.write(yaml.dump(self._data,
+                                    default_flow_style=False))
+
+
 if __name__ == "__main__":
     import yaml
 
 
@@ -3,6 +3,8 @@
 import pandas as pd
 import numpy as np
 
+from config import ConfigError
+
 
 class DB(object):
     """This class returns a Inputs object that can be used to handle all
@@ -45,9 +47,11 @@ def load_data(self):
         self.region_marginals = self.get_data(region_marginals_config,
                                               header=[0, 1])
 
+        self._enumerate_geo_ids()
+
     def get_data(self, config, header=0):
         config_dict = config.return_dict()
-        #print config_dict, type(config_dict)
+        # print config_dict, type(config_dict)
         data_dict = {}
         for item in config_dict:
             full_location = os.path.abspath(config_dict[item])
@@ -56,30 +60,38 @@ def get_data(self, config, header=0):
             data_dict[item].loc[:,
                                 data_dict[item].index.name] = (data_dict[item]
                                                                .index.values)
-            #print data_dict[item]
-        #print data_dict.keys()
+            # print data_dict[item]
+        # print data_dict.keys()
         return data_dict
 
-    def enumerate_geo_ids(self):
+    def _enumerate_geo_ids(self):
         geo_to_sample = self.geo["geo_to_sample"]
-        self.geo_ids = geo_to_sample.index.values
-        self.sample_geo_ids = np.unique(geo_to_sample[self._inputs_config
-                                                      .column_names
-                                                      .sample_geo].values)
-
+        self.geo_ids_all = geo_to_sample.index.tolist()
+        # self.sample_geo_ids = np.unique(geo_to_sample[self._inputs_config
+        #                                              .column_names
+        #                                              .sample_geo].values)
         region_to_geo = self.geo["region_to_geo"]
-        self.region_ids = np.unique(region_to_geo.index.values)
+        self.region_ids_all = np.unique(region_to_geo.index.values).tolist()
 
-        #region_to_sample = self.geo["region_to_sample"]
-        #self.region_ids = np.unique(region_to_sample.index.values)
+        # region_to_sample = self.geo["region_to_sample"]
+        # self.region_ids = np.unique(region_to_sample.index.values)
 
     def get_geo_ids_for_region(self, region_id):
         geo_name = self._inputs_config.column_names.geo
-        return self.geo["region_to_geo"].loc[region_id, geo_name].copy()
-
-    def enumerate_geo_ids_to_synthesize(self):
-        #TODO: Implement this to only synthesize a few geographies
-        pass
+        return (
+            self.geo["region_to_geo"].loc[region_id, geo_name].copy().tolist())
+
+    def enumerate_geo_ids_for_scenario(self, scenario_config):
+        try:
+            self.region_ids = scenario_config.geos_to_synthesize.region.ids
+            self.geo_ids = []
+            for region_id in self.region_ids:
+                self.geo_ids += self.get_geo_ids_for_region(region_id)
+        except ConfigError, e:
+            print "KeyError", e
+            self.geo_ids = self.geo_ids_all
+            # self.sample_geo_ids = self.sample_geo_ids_all
+            self.region_ids = self.region_ids_all
 
     def return_variables_cats(self, entity, variable_names):
         variables_cats = {}
@@ -95,12 +107,10 @@ def check_data(self):
         self.check_sample_marginals_consistency()
         self.check_marginals()
 
-    def check_sample_margianls_consistency(self):
-        #TODO: check consistency in variables across files
-        #TODO: check consistency in categories across files
-        pass
-
-    def check_marginals(self):
-        #TODO: check consistency in marginals across
-        #TODO: check geo ids, sample geo ids, region ids across files
+    def check(self):
+        # TODO: check if the ids entered are consistent with the region ids
+        # TODO: check consistency in variables across files
+        # TODO: check consistency in categories across files
+        # TODO: check consistency in marginals across
+        # TODO: check geo ids, sample geo ids, region ids across files
         pass
@@ -21,16 +21,16 @@ def __init__(self, scenario_config, geo_ids, geo_row_idx, geo_frequencies,
         self.pvalue_tolerance = (
             self.scenario_config.parameters.draws.pvalue_tolerance)
         self.geo_id_rows_syn_dict = {}
+        self.performance_columns = ["p_value", "iterations",
+                                    "chi_sq_stat"]
         self.draws_performance = pd.DataFrame(
-            index=self.geo_ids, columns=["p_value", "iterations",
-                                                 "chi_sq_stat"])
+            index=self.geo_ids, columns=self.performance_columns)
 
     def draw_population(self):
         np.random.seed(self.seed)
-        #print "Drawing Households"
-        performance_columns = ["p_value", "iterations", "chi_sq_stat"]
-        for geo_id in self.geo_ids:
-            #print "For geo:", geo_id
+        # print "Drawing Households"
+        for geo_id in self.geo_ids[:20]:
+            # print "For geo:", geo_id
             geo_sample_weights = self.region_sample_weights.loc[:, geo_id]
             geo_cumulative_weights = (self._return_cumulative_probability(
                                       geo_sample_weights))
@@ -42,7 +42,7 @@ def draw_population(self):
 
             p_value_max = -1
             for iter in range(self.iterations):
-                #print "Iter is:", iter, self.iterations
+                # print "Iter is:", iter, self.iterations
                 seed = self.seed + iter
                 geo_id_rows_syn = self._pick_households(
                     geo_id_frequencies, geo_cumulative_weights)
@@ -54,7 +54,7 @@ def draw_population(self):
                                                        geo_id_rows_syn, iter,
                                                        stat, True)
                     self.draws_performance.loc[geo_id,
-                                               performance_columns] = (
+                                               self.performance_columns] = (
                         p_value_max, iter, stat_max)
                     break
                 elif p_value > p_value_max:
@@ -63,15 +63,15 @@ def draw_population(self):
                                                        geo_id_rows_syn, iter,
                                                        stat, False)
                     self.draws_performance.loc[geo_id,
-                                               performance_columns] = (
+                                               self.performance_columns] = (
                         p_value_max, iter, stat_max)
 
-            #print "Max found:", max_found, geo_id_frequencies.sum()
-            #print "Max iter: %d, %f, %f" % (iter_max, p_value_max, stat_max)
-            #self.syn_population.add_records_for_geo_id(
+            # print "Max found:", max_found, geo_id_frequencies.sum()
+            # print "Max iter: %d, %f, %f" % (iter_max, p_value_max, stat_max)
+            # self.syn_population.add_records_for_geo_id(
             #    geo_id, geo_id_rows_syn_max)
             self.geo_id_rows_syn_dict[geo_id] = geo_id_rows_syn_max
-        #print self.draws_performance
+        # print self.draws_performance
 
     def _return_cumulative_probability(self, geo_sample_weights):
         geo_cumulative_weights = {}
@@ -81,7 +81,7 @@ def _return_cumulative_probability(self, geo_sample_weights):
             weights = geo_sample_weights.take(rows)
             geo_cumulative_weights[column] = (weights / weights.sum()).cumsum()
 
-            #print geo_cumulative_weights[column]
+            # print geo_cumulative_weights[column]
         return geo_cumulative_weights
 
     def _pick_households(self, geo_id_frequencies, geo_cumulative_weights):