Skip to content

Commit 0745388

Browse files
author
Hussain Jafari
committed
PR feedback
1 parent 3e04940 commit 0745388

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

src/pseudopeople/dataset.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,19 @@ def _reformat_dates_for_noising(self) -> None:
168168
def drop_non_schema_columns(
169169
data: pd.DataFrame, dataset_schema: DatasetSchema
170170
) -> pd.DataFrame:
171+
"""Returns data with only the columns in the dataset schema.
172+
173+
Parameters
174+
----------
175+
data
176+
The pd.DataFrame to update.
177+
dataset_schema
178+
A DatasetSchema which contains the columns of interest in its column attribute.
179+
180+
Returns
181+
-------
182+
A pd.DataFrame with the columns in the dataset schema.
183+
"""
171184
return data[[c.name for c in dataset_schema.columns]]
172185

173186
@staticmethod

src/pseudopeople/noise_functions.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ def duplicate_with_guardian(
216216
if index_to_copy.empty:
217217
continue
218218
noised_group_df = group_df.loc[index_to_copy]
219-
noised_group_df["old_housing_type"] = noised_group_df["housing_type"]
220219
noised_group_df[GUARDIAN_DUPLICATION_ADDRESS_COLUMNS] = group_df.loc[
221220
index_to_copy,
222221
[f"{guardian}_" + column for column in GUARDIAN_DUPLICATION_ADDRESS_COLUMNS],
@@ -231,9 +230,7 @@ def duplicate_with_guardian(
231230
].map(HOUSING_TYPE_GUARDIAN_DUPLICATION_RELATONSHIP_MAP)
232231

233232
# Clean columns
234-
duplicated_rows_df = duplicated_rows_df[
235-
list(dataset.data.columns) + ["old_housing_type"]
236-
]
233+
duplicated_rows_df = duplicated_rows_df[dataset.data.columns]
237234

238235
# Add duplicated rows to the original data and make sure that households
239236
# are grouped together by sorting by date and household_id

tests/integration/release/test_release.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,12 @@ def test_guardian_duplication(
241241
duplicated = noised.loc[noised["simulant_id"].duplicated()]
242242
duplicated["age"] = duplicated["age"].astype(int)
243243

244+
# add old housing type data to duplicated simulants
245+
old_housing_data = unnoised[["simulant_id", "housing_type"]].rename(
246+
{"housing_type": "unnoised_housing_type"}, axis=1
247+
)
248+
duplicated = duplicated.merge(old_housing_data)
249+
244250
# separate tests for household under 18 and for college under 24
245251
for probability_name, age, housing_type in zip(
246252
[
@@ -267,7 +273,7 @@ def test_guardian_duplication(
267273
)
268274
]
269275
duplicated_in_group = duplicated.loc[
270-
(duplicated["age"] < age) & (duplicated["old_housing_type"] == housing_type)
276+
(duplicated["age"] < age) & (duplicated["unnoised_housing_type"] == housing_type)
271277
]
272278

273279
fuzzy_checker.fuzzy_assert_proportion(

0 commit comments

Comments
 (0)