88import numpy as np
99import numpy .typing as npt
1010import pandas as pd
11+ from pytest_check import check
1112from vivarium_testing_utils import FuzzyChecker
1213
1314from pseudopeople .configuration import Keys , get_configuration
@@ -34,7 +35,8 @@ def run_column_noising_tests(
3435
3536 # Check that originally missing data remained missing
3637 originally_missing_idx = check_original .index [check_original [col .name ].isna ()]
37- assert check_noised .loc [originally_missing_idx , col .name ].isna ().all ()
38+ with check :
39+ assert check_noised .loc [originally_missing_idx , col .name ].isna ().all ()
3840
3941 # Check for noising where applicable
4042 to_compare_idx = shared_idx .difference (originally_missing_idx )
@@ -43,7 +45,8 @@ def run_column_noising_tests(
4345 check_original .loc [to_compare_idx , col .name ].values
4446 != check_noised .loc [to_compare_idx , col .name ].values
4547 )
46- assert different_check .any ()
48+ with check :
49+ assert different_check .any ()
4750
4851 noise_level = different_check .sum ()
4952
@@ -64,7 +67,8 @@ def run_column_noising_tests(
6467 == check_noised .loc [to_compare_idx , col .name ].values
6568 )
6669
67- assert same_check .all ()
70+ with check :
71+ assert same_check .all ()
6872
6973
7074def run_omit_row_or_do_not_respond_tests (
@@ -87,15 +91,20 @@ def run_omit_row_or_do_not_respond_tests(
8791 ]:
8892 # Census and household surveys have do_not_respond and omit_row.
8993 # For all other datasets they are mutually exclusive
90- assert len (noise_types ) == 2
94+ with check :
95+ assert len (noise_types ) == 2
9196 else :
92- assert len (noise_types ) < 2
97+ with check :
98+ assert len (noise_types ) < 2
9399 if not noise_types : # Check that there are no missing indexes
94- assert noised_data .index .symmetric_difference (original_data .index ).empty
100+ with check :
101+ assert noised_data .index .symmetric_difference (original_data .index ).empty
95102 else : # Check that there are some omissions
96103 # TODO: assert levels are as expected
97- assert noised_data .index .difference (original_data .index ).empty
98- assert not original_data .index .difference (noised_data .index ).empty
104+ with check :
105+ assert noised_data .index .difference (original_data .index ).empty
106+ with check :
107+ assert not original_data .index .difference (noised_data .index ).empty
99108
100109
101110def validate_column_noise_level (
@@ -158,7 +167,8 @@ def validate_column_noise_level(
158167 [1 - p for p in token_probability ]
159168 )
160169 else :
161- assert isinstance (tokens_per_string , pd .Series )
170+ with check :
171+ assert isinstance (tokens_per_string , pd .Series )
162172 avg_probability_any_token_noised = (
163173 1 - (1 - token_probability ) ** tokens_per_string
164174 ).mean ()
0 commit comments