11import numpy as np
22import pandas as pd
33
4+ import time
5+
6+
47#TODO: Reimplement all DS processing in the Syn_Population Class
58class Reweighting_DS (object ):
69 def __init__ (self ):
@@ -23,7 +26,8 @@ def get_row_idx(self, sample_restruct):
2326 for column in sample_restruct .columns .values .tolist ():
2427 rows = np .where (sample_restruct [column ] > 0 )[0 ]
2528 row_idx [column ] = rows
26- contrib [column ] = sample_restruct [column ].values
29+ contrib [column ] = np .array (
30+ sample_restruct [column ].values , order = "F" )
2731 return (row_idx , contrib )
2832
2933 def get_stacked_sample_restruct (self , sample_restruct_list ):
@@ -116,9 +120,10 @@ def run_ipu(self, region_constraints, geo_constraints):
116120 len_geo_ids = len (geo_ids )
117121 sample_weights = np .ones ((self .region_stacked .shape [0 ],
118122 len_geo_ids ),
119- dtype = float , order = "C " )
123+ dtype = float , order = "F " )
120124 #print "Outer iterations", self.outer_iterations
121125 for iter in range (self .outer_iterations ):
126+ #t = time.time()
122127 #print "Region: %s and Iter: %s" % (region_id, iter)
123128 if region_constraints is not None :
124129 sample_weights = (self ._adjust_sample_weights (
@@ -142,6 +147,7 @@ def run_ipu(self, region_constraints, geo_constraints):
142147 sample_weights [:, index ],
143148 geo_constraints .loc [geo_id ])
144149 pass
150+ #print ("One outer iteration complete in %.4f" % (time.time() - t))
145151 self ._populate_sample_weights (sample_weights , region_id , geo_ids )
146152 #print self.average_deviations
147153 print "\t sample_weights sum:" , sample_weights .sum ()
@@ -154,53 +160,36 @@ def _adjust_sample_weights(self, sample_weights, constraints,
154160 else :
155161 row_idx = self .region_row_idx
156162 contrib = self .region_contrib
157-
158- sample_weights = np .ascontiguousarray (sample_weights )
159-
163+ t = time .time ()
160164 for i in range (iters ):
161165 for column in reversed (constraints .index ):
162166 #TODO: the reversed iteration of list needs to be replaced with
163167 #a user specified ordering of the constraints
164168 if geo is False :
165- #t = time.time()
166- #weighted_sum = (sample_weights
167- # .sum(axis=1).dot(contrib[column]))
168- #print "Time taken: %.4f" % (time.time() - t)
169- #t = time.time()
170169 weighted_sum = (
171170 sample_weights .T .dot (contrib [column ])
172171 ).sum ()
173- #print "Time taken2: %.4ff" % (time.time() - t)
174- #print weighted_sum, weighted_sum1
175172 else :
176173 weighted_sum = sample_weights .dot (contrib [column ])
174+
175+ if weighted_sum == 0 :
176+ print ("""Weighted sum for column %s in iter %d"""
177+ """is zero so don't adjust""" % (column , i ))
178+ continue
179+
177180 adjustment = constraints [column ]/ weighted_sum
178181 sample_weights [row_idx [column ]] *= adjustment
179182
180- if (sample_weights [row_idx [column ]] == 0 ).any ():
181- zero_weights = sample_weights == 0
182- sample_weights [zero_weights ] = (
183- np .finfo (np .float64 ).tiny )
184- #print column, constraints[column], weighted_sum, adjustment
185- #raw_input("Zero sample weights adjusted")
186-
187- #if ((sample_weights == 0).any() or
188- # pd.isnull(sample_weights).any()):
189- # print constraints
190- # print column, constraints[column], weighted_sum, adjustment
191- # raw_input("Invalid row value of zero or null")
192183 return sample_weights
193184
194185 def _calculate_populate_average_deviation (
195186 self , geo_id , iter , sample_weights , constraints ):
196187 diff_sum = 0
197- sample_weights = np .ascontiguousarray (sample_weights )
198188
199189 for column in constraints .index :
200190 weighted_sum = sample_weights .dot (self .geo_contrib [column ])
201191 diff_sum += np .abs (weighted_sum - constraints [column ])
202192 average_diff = diff_sum / constraints .shape [0 ]
203- #print average_diff, sample_weights.sum()
204193 self .average_deviations .loc [geo_id , iter ] = average_diff
205194
206195 def _populate_sample_weights (self , sample_weights , region_id , geo_ids ):
0 commit comments