1717from loguru import logger
1818from vivarium_testing_utils import FuzzyChecker
1919
20+ from pseudopeople .configuration import Keys , get_configuration
21+ from pseudopeople .noise_entities import NOISE_TYPES
22+ from pseudopeople .schema_entities import COLUMNS , DATASET_SCHEMAS
23+ from tests .integration .conftest import CELL_PROBABILITY
24+
2025
2126def pytest_addoption (parser : argparsing .Parser ) -> None :
2227 parser .addoption ("--runslow" , action = "store_true" , default = False , help = "run slow tests" )
28+ parser .addoption (
29+ "--release" , action = "store_true" , default = False , help = "run release tests"
30+ )
2331 parser .addoption (
2432 "--limit" ,
2533 action = "store" ,
@@ -38,15 +46,24 @@ def pytest_configure(config: Config) -> None:
3846
3947
4048def pytest_collection_modifyitems (config : Config , items : list [Function ]) -> None :
49+ skip_release = pytest .mark .skip (reason = "need --release to run" )
50+ if not config .getoption ("--release" ):
51+ for item in items :
52+ if "release" in item .keywords :
53+ item .add_marker (skip_release )
54+
4155 if config .getoption ("--runslow" ):
4256 # --runslow given in cli: do not skip slow tests
4357 return
58+
4459 skip_slow = pytest .mark .skip (reason = "need --runslow option to run" )
4560 for item in items :
4661 # Automatically tag all tests in the tests/integration dir as slow
47- if item .parent and Path (item .parent .path ).parent .stem == "integration" :
48- item .add_marker (pytest .mark .slow )
49- if "slow" in item .keywords :
62+ test_in_slow_directory = (
63+ item .parent and Path (item .parent .path ).parent .stem == "integration"
64+ )
65+ test_is_slow = "slow" in item .keywords
66+ if test_in_slow_directory or test_is_slow :
5067 item .add_marker (skip_slow )
5168
5269 # Limit the number of permutations of parametrised tests to run.
@@ -88,3 +105,50 @@ def fuzzy_checker(output_directory: Path) -> Generator[FuzzyChecker, None, None]
88105 yield checker
89106
90107 checker .save_diagnostic_output (output_directory )
108+
109+
110+ @pytest .fixture (scope = "session" )
111+ def config () -> dict [str , Any ]:
112+ """Returns a custom configuration dict to be used in noising"""
113+ ROW_PROBABILITY = 0.05
114+ config = get_configuration ().to_dict () # default config
115+
116+ # Increase row noise probabilities to 5% and column cell_probabilities to 25%
117+ for dataset_name in config :
118+ dataset_schema = DATASET_SCHEMAS .get_dataset_schema (dataset_name )
119+ config [dataset_schema .name ][Keys .ROW_NOISE ] = {
120+ noise_type .name : {
121+ Keys .ROW_PROBABILITY : ROW_PROBABILITY ,
122+ }
123+ for noise_type in dataset_schema .row_noise_types
124+ if noise_type != NOISE_TYPES .duplicate_with_guardian
125+ }
126+ for col in [c for c in dataset_schema .columns if c .noise_types ]:
127+ config [dataset_name ][Keys .COLUMN_NOISE ][col .name ] = {
128+ noise_type .name : {
129+ Keys .CELL_PROBABILITY : CELL_PROBABILITY ,
130+ }
131+ for noise_type in col .noise_types
132+ }
133+
134+ # FIXME: Remove when record_id is added as the truth deck for datasets.
135+ # For integration tests, we will NOT duplicate rows with guardian duplication.
136+ # This is because we want to be able to compare the noised and unnoised data
137+ # and a big assumption we make is that simulant_id and household_id are the
138+ # truth decks in our datasets.
139+ config [DATASET_SCHEMAS .census .name ][Keys .ROW_NOISE ][
140+ NOISE_TYPES .duplicate_with_guardian .name
141+ ] = {
142+ Keys .ROW_PROBABILITY_IN_HOUSEHOLDS_UNDER_18 : 0.0 ,
143+ Keys .ROW_PROBABILITY_IN_COLLEGE_GROUP_QUARTERS_UNDER_24 : 0.0 ,
144+ }
145+ # Update SSA dataset to noise 'ssn' but NOT noise 'ssa_event_type' since that
146+ # will be used as an identifier along with simulant_id
147+ # TODO: Noise ssa_event_type when record IDs are implemented (MIC-4039)
148+ config [DATASET_SCHEMAS .ssa .name ][Keys .COLUMN_NOISE ][COLUMNS .ssa_event_type .name ] = {
149+ noise_type .name : {
150+ Keys .CELL_PROBABILITY : 0 ,
151+ }
152+ for noise_type in COLUMNS .ssa_event_type .noise_types
153+ }
154+ return config
0 commit comments