Skip to content

Commit 48e08b4

Browse files
the IPU procedure was modified to remove the consistency checks wherein zero weights were being repalced with least possible float value. This was a hack in the earlier code to account for issues resulting from precision. However, in this version if sum of weights corresponding to a set of rows for a column constraint is zero then we skip subsequent steps of updating weights. This was mainly done because this is a very extreme case and the earlier code to account for this issue was leading to perfromance inefficiencies - the solution implemented is appropriate and shouldn't have any detriment to the IPU procedure or its output.
1 parent 57cab0b commit 48e08b4

File tree

1 file changed

+15
-26
lines changed

1 file changed

+15
-26
lines changed

popgen/reweighting.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import numpy as np
22
import pandas as pd
33

4+
import time
5+
6+
47
#TODO: Reimplement all DS processing in the Syn_Population Class
58
class Reweighting_DS(object):
69
def __init__(self):
@@ -23,7 +26,8 @@ def get_row_idx(self, sample_restruct):
2326
for column in sample_restruct.columns.values.tolist():
2427
rows = np.where(sample_restruct[column] > 0)[0]
2528
row_idx[column] = rows
26-
contrib[column] = sample_restruct[column].values
29+
contrib[column] = np.array(
30+
sample_restruct[column].values, order="F")
2731
return (row_idx, contrib)
2832

2933
def get_stacked_sample_restruct(self, sample_restruct_list):
@@ -116,9 +120,10 @@ def run_ipu(self, region_constraints, geo_constraints):
116120
len_geo_ids = len(geo_ids)
117121
sample_weights = np.ones((self.region_stacked.shape[0],
118122
len_geo_ids),
119-
dtype=float, order="C")
123+
dtype=float, order="F")
120124
#print "Outer iterations", self.outer_iterations
121125
for iter in range(self.outer_iterations):
126+
#t = time.time()
122127
#print "Region: %s and Iter: %s" % (region_id, iter)
123128
if region_constraints is not None:
124129
sample_weights = (self._adjust_sample_weights(
@@ -142,6 +147,7 @@ def run_ipu(self, region_constraints, geo_constraints):
142147
sample_weights[:, index],
143148
geo_constraints.loc[geo_id])
144149
pass
150+
#print ("One outer iteration complete in %.4f" % (time.time() - t))
145151
self._populate_sample_weights(sample_weights, region_id, geo_ids)
146152
#print self.average_deviations
147153
print "\tsample_weights sum:", sample_weights.sum()
@@ -154,53 +160,36 @@ def _adjust_sample_weights(self, sample_weights, constraints,
154160
else:
155161
row_idx = self.region_row_idx
156162
contrib = self.region_contrib
157-
158-
sample_weights = np.ascontiguousarray(sample_weights)
159-
163+
t = time.time()
160164
for i in range(iters):
161165
for column in reversed(constraints.index):
162166
#TODO: the reversed iteration of list needs to be replaced with
163167
#a user specified ordering of the constraints
164168
if geo is False:
165-
#t = time.time()
166-
#weighted_sum = (sample_weights
167-
# .sum(axis=1).dot(contrib[column]))
168-
#print "Time taken: %.4f" % (time.time() - t)
169-
#t = time.time()
170169
weighted_sum = (
171170
sample_weights.T.dot(contrib[column])
172171
).sum()
173-
#print "Time taken2: %.4ff" % (time.time() - t)
174-
#print weighted_sum, weighted_sum1
175172
else:
176173
weighted_sum = sample_weights.dot(contrib[column])
174+
175+
if weighted_sum == 0:
176+
print ("""Weighted sum for column %s in iter %d"""
177+
"""is zero so don't adjust""" % (column, i))
178+
continue
179+
177180
adjustment = constraints[column]/weighted_sum
178181
sample_weights[row_idx[column]] *= adjustment
179182

180-
if (sample_weights[row_idx[column]] == 0).any():
181-
zero_weights = sample_weights == 0
182-
sample_weights[zero_weights] = (
183-
np.finfo(np.float64).tiny)
184-
#print column, constraints[column], weighted_sum, adjustment
185-
#raw_input("Zero sample weights adjusted")
186-
187-
#if ((sample_weights == 0).any() or
188-
# pd.isnull(sample_weights).any()):
189-
# print constraints
190-
# print column, constraints[column], weighted_sum, adjustment
191-
# raw_input("Invalid row value of zero or null")
192183
return sample_weights
193184

194185
def _calculate_populate_average_deviation(
195186
self, geo_id, iter, sample_weights, constraints):
196187
diff_sum = 0
197-
sample_weights = np.ascontiguousarray(sample_weights)
198188

199189
for column in constraints.index:
200190
weighted_sum = sample_weights.dot(self.geo_contrib[column])
201191
diff_sum += np.abs(weighted_sum - constraints[column])
202192
average_diff = diff_sum/constraints.shape[0]
203-
#print average_diff, sample_weights.sum()
204193
self.average_deviations.loc[geo_id, iter] = average_diff
205194

206195
def _populate_sample_weights(self, sample_weights, region_id, geo_ids):

0 commit comments

Comments
 (0)