Skip to content

Commit 48c09da

Browse files
full scale test missingness (#501)
Category: feature JIRA issue: MIC-5515 Add full scale test_dataset_missingness. Typing. Add default values to overload functions for generating data with dask. Testing Ran tests on acs, cps, and wic for RI and USA.
1 parent 8ca7d61 commit 48c09da

File tree

6 files changed

+207
-120
lines changed

6 files changed

+207
-120
lines changed

src/pseudopeople/interface.py

Lines changed: 96 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -236,26 +236,26 @@ def _get_data_changelog_version(changelog: Path) -> Version:
236236

237237
@overload
238238
def generate_decennial_census(
239-
source: Path | str | None = None,
240-
seed: int = 0,
241-
config: Path | str | dict[str, Any] | None = None,
242-
year: int | None = 2020,
243-
state: str | None = None,
244-
verbose: bool = False,
245-
engine: Literal["pandas"] = "pandas",
239+
source: Path | str | None = ...,
240+
seed: int = ...,
241+
config: Path | str | dict[str, Any] | None = ...,
242+
year: int | None = ...,
243+
state: str | None = ...,
244+
verbose: bool = ...,
245+
engine: Literal["pandas"] = ...,
246246
) -> pd.DataFrame:
247247
...
248248

249249

250250
@overload
251251
def generate_decennial_census(
252-
source: Path | str | None,
253-
seed: int,
254-
config: Path | str | dict[str, Any] | None,
255-
year: int | None,
256-
state: str | None,
257-
verbose: bool,
258-
engine: Literal["dask"],
252+
source: Path | str | None = ...,
253+
seed: int = ...,
254+
config: Path | str | dict[str, Any] | None = ...,
255+
year: int | None = ...,
256+
state: str | None = ...,
257+
verbose: bool = ...,
258+
engine: Literal["dask"] = ...,
259259
) -> dd.DataFrame:
260260
...
261261

@@ -360,26 +360,26 @@ def generate_decennial_census(
360360

361361
@overload
362362
def generate_american_community_survey(
363-
source: Path | str | None = None,
364-
seed: int = 0,
365-
config: Path | str | dict[str, Any] | None = None,
366-
year: int | None = 2020,
367-
state: str | None = None,
368-
verbose: bool = False,
369-
engine: Literal["pandas"] = "pandas",
363+
source: Path | str | None = ...,
364+
seed: int = ...,
365+
config: Path | str | dict[str, Any] | None = ...,
366+
year: int | None = ...,
367+
state: str | None = ...,
368+
verbose: bool = ...,
369+
engine: Literal["pandas"] = ...,
370370
) -> pd.DataFrame:
371371
...
372372

373373

374374
@overload
375375
def generate_american_community_survey(
376-
source: Path | str | None,
377-
seed: int,
378-
config: Path | str | dict[str, Any] | None,
379-
year: int | None,
380-
state: str | None,
381-
verbose: bool,
382-
engine: Literal["dask"],
376+
source: Path | str | None = ...,
377+
seed: int = ...,
378+
config: Path | str | dict[str, Any] | None = ...,
379+
year: int | None = ...,
380+
state: str | None = ...,
381+
verbose: bool = ...,
382+
engine: Literal["dask"] = ...,
383383
) -> dd.DataFrame:
384384
...
385385

@@ -499,26 +499,26 @@ def generate_american_community_survey(
499499

500500
@overload
501501
def generate_current_population_survey(
502-
source: Path | str | None = None,
503-
seed: int = 0,
504-
config: Path | str | dict[str, Any] | None = None,
505-
year: int | None = 2020,
506-
state: str | None = None,
507-
verbose: bool = False,
508-
engine: Literal["pandas"] = "pandas",
502+
source: Path | str | None = ...,
503+
seed: int = ...,
504+
config: Path | str | dict[str, Any] | None = ...,
505+
year: int | None = ...,
506+
state: str | None = ...,
507+
verbose: bool = ...,
508+
engine: Literal["pandas"] = ...,
509509
) -> pd.DataFrame:
510510
...
511511

512512

513513
@overload
514514
def generate_current_population_survey(
515-
source: Path | str | None,
516-
seed: int,
517-
config: Path | str | dict[str, Any] | None,
518-
year: int | None,
519-
state: str | None,
520-
verbose: bool,
521-
engine: Literal["dask"],
515+
source: Path | str | None = ...,
516+
seed: int = ...,
517+
config: Path | str | dict[str, Any] | None = ...,
518+
year: int | None = ...,
519+
state: str | None = ...,
520+
verbose: bool = ...,
521+
engine: Literal["dask"] = ...,
522522
) -> dd.DataFrame:
523523
...
524524

@@ -639,26 +639,26 @@ def generate_current_population_survey(
639639

640640
@overload
641641
def generate_taxes_w2_and_1099(
642-
source: Path | str | None = None,
643-
seed: int = 0,
644-
config: Path | str | dict[str, Any] | None = None,
645-
year: int | None = 2020,
646-
state: str | None = None,
647-
verbose: bool = False,
648-
engine: Literal["pandas"] = "pandas",
642+
source: Path | str | None = ...,
643+
seed: int = ...,
644+
config: Path | str | dict[str, Any] | None = ...,
645+
year: int | None = ...,
646+
state: str | None = ...,
647+
verbose: bool = ...,
648+
engine: Literal["pandas"] = ...,
649649
) -> pd.DataFrame:
650650
...
651651

652652

653653
@overload
654654
def generate_taxes_w2_and_1099(
655-
source: Path | str | None,
656-
seed: int,
657-
config: Path | str | dict[str, Any] | None,
658-
year: int | None,
659-
state: str | None,
660-
verbose: bool,
661-
engine: Literal["dask"],
655+
source: Path | str | None = ...,
656+
seed: int = ...,
657+
config: Path | str | dict[str, Any] | None = ...,
658+
year: int | None = ...,
659+
state: str | None = ...,
660+
verbose: bool = ...,
661+
engine: Literal["dask"] = ...,
662662
) -> dd.DataFrame:
663663
...
664664

@@ -763,26 +763,26 @@ def generate_taxes_w2_and_1099(
763763

764764
@overload
765765
def generate_women_infants_and_children(
766-
source: Path | str | None = None,
767-
seed: int = 0,
768-
config: Path | str | dict[str, Any] | None = None,
769-
year: int | None = 2020,
770-
state: str | None = None,
771-
verbose: bool = False,
772-
engine: Literal["pandas"] = "pandas",
766+
source: Path | str | None = ...,
767+
seed: int = ...,
768+
config: Path | str | dict[str, Any] | None = ...,
769+
year: int | None = ...,
770+
state: str | None = ...,
771+
verbose: bool = ...,
772+
engine: Literal["pandas"] = ...,
773773
) -> pd.DataFrame:
774774
...
775775

776776

777777
@overload
778778
def generate_women_infants_and_children(
779-
source: Path | str | None,
780-
seed: int,
781-
config: Path | str | dict[str, Any] | None,
782-
year: int | None,
783-
state: str | None,
784-
verbose: bool,
785-
engine: Literal["dask"],
779+
source: Path | str | None = ...,
780+
seed: int = ...,
781+
config: Path | str | dict[str, Any] | None = ...,
782+
year: int | None = ...,
783+
state: str | None = ...,
784+
verbose: bool = ...,
785+
engine: Literal["dask"] = ...,
786786
) -> dd.DataFrame:
787787
...
788788

@@ -892,24 +892,24 @@ def generate_women_infants_and_children(
892892

893893
@overload
894894
def generate_social_security(
895-
source: Path | str | None = None,
896-
seed: int = 0,
897-
config: Path | str | dict[str, Any] | None = None,
898-
year: int | None = 2020,
899-
verbose: bool = False,
900-
engine: Literal["pandas"] = "pandas",
895+
source: Path | str | None = ...,
896+
seed: int = ...,
897+
config: Path | str | dict[str, Any] | None = ...,
898+
year: int | None = ...,
899+
verbose: bool = ...,
900+
engine: Literal["pandas"] = ...,
901901
) -> pd.DataFrame:
902902
...
903903

904904

905905
@overload
906906
def generate_social_security(
907-
source: Path | str | None,
908-
seed: int,
909-
config: Path | str | dict[str, Any] | None,
910-
year: int | None,
911-
verbose: bool,
912-
engine: Literal["dask"],
907+
source: Path | str | None = ...,
908+
seed: int = ...,
909+
config: Path | str | dict[str, Any] | None = ...,
910+
year: int | None = ...,
911+
verbose: bool = ...,
912+
engine: Literal["dask"] = ...,
913913
) -> dd.DataFrame:
914914
...
915915

@@ -1004,26 +1004,26 @@ def generate_social_security(
10041004

10051005
@overload
10061006
def generate_taxes_1040(
1007-
source: Path | str | None = None,
1008-
seed: int = 0,
1009-
config: Path | str | dict[str, Any] | None = None,
1010-
year: int | None = 2020,
1011-
state: str | None = None,
1012-
verbose: bool = False,
1013-
engine: Literal["pandas"] = "pandas",
1007+
source: Path | str | None = ...,
1008+
seed: int = ...,
1009+
config: Path | str | dict[str, Any] | None = ...,
1010+
year: int | None = ...,
1011+
state: str | None = ...,
1012+
verbose: bool = ...,
1013+
engine: Literal["pandas"] = ...,
10141014
) -> pd.DataFrame:
10151015
...
10161016

10171017

10181018
@overload
10191019
def generate_taxes_1040(
1020-
source: Path | str | None,
1021-
seed: int,
1022-
config: Path | str | dict[str, Any] | None,
1023-
year: int | None,
1024-
state: str | None,
1025-
verbose: bool,
1026-
engine: Literal["dask"],
1020+
source: Path | str | None = ...,
1021+
seed: int = ...,
1022+
config: Path | str | dict[str, Any] | None = ...,
1023+
year: int | None = ...,
1024+
state: str | None = ...,
1025+
verbose: bool = ...,
1026+
engine: Literal["dask"] = ...,
10271027
) -> dd.DataFrame:
10281028
...
10291029

src/pseudopeople/noise_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,12 @@ def duplicate_with_guardian(
145145
formatted_group_data = {}
146146
# Get dataframe for each dependent group to merge with guardians
147147
in_households_under_18 = dataset.data.loc[
148-
(dataset.data["age"] < 18)
148+
(dataset.data["age"].astype(int) < 18)
149149
& (dataset.data["housing_type"] == "Household")
150150
& (dataset.data["guardian_1"].notna())
151151
]
152152
in_college_under_24 = dataset.data.loc[
153-
(dataset.data["age"] < 24)
153+
(dataset.data["age"].astype(int) < 24)
154154
& (dataset.data["housing_type"] == "College")
155155
& (dataset.data["guardian_1"].notna())
156156
]

tests/integration/release/conftest.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import time
66
from collections.abc import Callable
77
from pathlib import Path
8-
from typing import Any
8+
from typing import Any, Literal
99

1010
import pandas as pd
1111
import pytest
@@ -39,8 +39,8 @@
3939
"cps": "current_population_survey",
4040
"census": "decennial_census",
4141
"ssa": "social_security",
42-
"taxes_1040": "taxes_1040",
43-
"taxes_w2_and_1099": "taxes_w2_and_1099",
42+
"tax_1040": "taxes_1040",
43+
"tax_w2_1099": "taxes_w2_and_1099",
4444
"wic": "women_infants_and_children",
4545
}
4646

@@ -112,7 +112,14 @@ def release_output_dir(request: pytest.FixtureRequest) -> Path:
112112
@pytest.fixture(scope="session")
113113
def dataset_params(
114114
request: pytest.FixtureRequest,
115-
) -> tuple[str | int | Callable[..., pd.DataFrame] | None, ...]:
115+
) -> tuple[
116+
str,
117+
Callable[..., pd.DataFrame],
118+
str | None,
119+
int | None,
120+
str | None,
121+
Literal["pandas", "dask"],
122+
]:
116123
dataset_name = request.config.getoption("--dataset")
117124
try:
118125
dataset_func = DATASET_GENERATION_FUNCS[dataset_name]

0 commit comments

Comments
 (0)