4646
4747SEED = 0
4848CLI_DEFAULT_OUTPUT_DIR = (
49- "/mnt/team/simulation_science/priv/engineering/pseudopeople_release_testing "
49+ "/mnt/team/simulation_science/priv/engineering/pseudopeople/release_testing "
5050)
51+ # some of these defaults are here for ease of testing and will be removed later
5152CLI_DEFAULT_DATASET = "acs"
5253CLI_DEFAULT_POP = "sample"
5354CLI_DEFAULT_YEAR = 2020
@@ -62,44 +63,48 @@ def pytest_addoption(parser: pytest.Parser) -> None:
6263 parser .addoption (
6364 "--output-dir" ,
6465 action = "store" ,
65- default = CLI_DEFAULT_OUTPUT_DIR ,
66- help = "The output directory to write to. Defaults to /mnt/team/simulation_science/priv/engineering/pseudopeople_release_testing." ,
66+ help = f"The output directory to write to. Defaults to { CLI_DEFAULT_OUTPUT_DIR } ." ,
6767 )
6868 parser .addoption (
6969 "--dataset" ,
7070 action = "store" ,
71- default = CLI_DEFAULT_DATASET ,
7271 help = "The dataset to generate. Options are 'census', 'acs', 'cps', 'ssa', 'tax_w2_1099', 'wic', and 'tax_1040'. No argument will default to acs." ,
7372 )
7473 parser .addoption (
7574 "--population" ,
7675 action = "store" ,
77- default = CLI_DEFAULT_POP ,
7876 help = "The simulated population to generate. Options are 'USA', 'RI', and 'sample'. sample will generate very small sample data." ,
7977 )
8078 parser .addoption (
8179 "--year" ,
8280 action = "store" ,
83- default = CLI_DEFAULT_YEAR ,
8481 help = "The year to subset our data to." ,
8582 )
8683 parser .addoption (
8784 "--state" ,
8885 action = "store" ,
89- default = CLI_DEFAULT_STATE ,
9086 help = "The state to subset our data to (if using full USA population) using 2-letter abbreviations. No argument means no subsetting will be done." ,
9187 )
9288 parser .addoption (
9389 "--engine" ,
9490 action = "store" ,
95- default = CLI_DEFAULT_ENGINE ,
9691 help = "The engine used to generate data. Options are 'pandas' and 'dask'." ,
9792 )
9893
9994
10095############
10196# Fixtures #
10297############
98+ @pytest .fixture (scope = "session" )
99+ def release_output_dir () -> Path | None :
100+ output_dir_name = request .config .getoption ("--output-dir" , default = None )
101+ if not output_dir_name :
102+ return None
103+ output_dir = Path (output_dir_name ) / f"{ time .strftime ('%Y%m%d_%H%M%S' )} "
104+ output_dir .mkdir (parents = True , exist_ok = False )
105+ return output_dir .resolve ()
106+
107+
103108@pytest .fixture (scope = "session" )
104109def dataset_params (
105110 request : pytest .FixtureRequest ,
@@ -130,6 +135,7 @@ def dataset_params(
130135
131136@pytest .fixture (scope = "session" )
132137def noised_data (
138+ release_output_dir : Path ,
133139 dataset_params : tuple [str | int | Callable [..., pd .DataFrame ] | None , ...],
134140 request : pytest .FixtureRequest ,
135141 config : dict [str , Any ],
@@ -148,8 +154,7 @@ def noised_data(
148154 }
149155 if dataset_func != generate_social_security :
150156 kwargs ["state" ] = state
151- timestamped_dir = request .config .getoption ("--output-dir" )
152- profiling_dir = Path (timestamped_dir ) / "profiling"
157+ profiling_dir = Path (release_output_dir ) / "profiling"
153158 profiling_dir .mkdir (parents = True , exist_ok = True )
154159 noised_data = profile_data_generation (profiling_dir )(dataset_func )(** kwargs )
155160 if engine == "dask" :
0 commit comments