Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 96 additions & 96 deletions src/pseudopeople/interface.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From https://mypy.readthedocs.io/en/stable/more_types.html#function-overloading:

The default values of a function’s arguments don’t affect its signature – only the absence or presence of a default value does. So in order to reduce redundancy, it’s possible to replace default values in overload definitions with ... as a placeholder

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know. I guess we should replace the defaults in all of our overloads with ... then

Original file line number Diff line number Diff line change
Expand Up @@ -236,26 +236,26 @@ def _get_data_changelog_version(changelog: Path) -> Version:

@overload
def generate_decennial_census(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_decennial_census(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -360,26 +360,26 @@ def generate_decennial_census(

@overload
def generate_american_community_survey(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_american_community_survey(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -499,26 +499,26 @@ def generate_american_community_survey(

@overload
def generate_current_population_survey(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_current_population_survey(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -639,26 +639,26 @@ def generate_current_population_survey(

@overload
def generate_taxes_w2_and_1099(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_taxes_w2_and_1099(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -763,26 +763,26 @@ def generate_taxes_w2_and_1099(

@overload
def generate_women_infants_and_children(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_women_infants_and_children(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -892,24 +892,24 @@ def generate_women_infants_and_children(

@overload
def generate_social_security(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_social_security(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down Expand Up @@ -1004,26 +1004,26 @@ def generate_social_security(

@overload
def generate_taxes_1040(
source: Path | str | None = None,
seed: int = 0,
config: Path | str | dict[str, Any] | None = None,
year: int | None = 2020,
state: str | None = None,
verbose: bool = False,
engine: Literal["pandas"] = "pandas",
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["pandas"] = ...,
) -> pd.DataFrame:
...


@overload
def generate_taxes_1040(
source: Path | str | None,
seed: int,
config: Path | str | dict[str, Any] | None,
year: int | None,
state: str | None,
verbose: bool,
engine: Literal["dask"],
source: Path | str | None = ...,
seed: int = ...,
config: Path | str | dict[str, Any] | None = ...,
year: int | None = ...,
state: str | None = ...,
verbose: bool = ...,
engine: Literal["dask"] = ...,
) -> dd.DataFrame:
...

Expand Down
4 changes: 2 additions & 2 deletions src/pseudopeople/noise_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,12 @@ def duplicate_with_guardian(
formatted_group_data = {}
# Get dataframe for each dependent group to merge with guardians
in_households_under_18 = dataset.data.loc[
(dataset.data["age"] < 18)
(dataset.data["age"].astype(int) < 18)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So cols are all strings, right? How was this ever working?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This column dtype is an int when it's first read in and during noising during data generation, but a str/object in our tests which noise post-processed unnoised data.

& (dataset.data["housing_type"] == "Household")
& (dataset.data["guardian_1"].notna())
]
in_college_under_24 = dataset.data.loc[
(dataset.data["age"] < 24)
(dataset.data["age"].astype(int) < 24)
& (dataset.data["housing_type"] == "College")
& (dataset.data["guardian_1"].notna())
]
Expand Down
4 changes: 2 additions & 2 deletions src/pseudopeople/noise_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _get_census_omission_noise_levels(
.astype(str)
.map(data_values.DO_NOT_RESPOND_ADDITIVE_PROBABILITY_BY_RACE)
)
ages = pd.Series(np.arange(population["age"].max() + 1))
ages = pd.Series(np.arange(population["age"].astype(int).max() + 1))
for sex in ["Female", "Male"]:
effect_by_age_bin = data_values.DO_NOT_RESPOND_ADDITIVE_PROBABILITY_BY_SEX_AGE[sex]
# NOTE: calling pd.cut on a large array with an IntervalIndex is slow,
Expand All @@ -44,7 +44,7 @@ def _get_census_omission_noise_levels(
)
sex_mask = population["sex"] == sex
probabilities[sex_mask] += (
population[sex_mask]["age"].map(effect_by_age).astype(float)
population[sex_mask]["age"].astype(int).map(effect_by_age).astype(float)
)
probabilities[probabilities < 0.0] = 0.0
probabilities[probabilities > 1.0] = 1.0
Expand Down
15 changes: 11 additions & 4 deletions tests/integration/release/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time
from collections.abc import Callable
from pathlib import Path
from typing import Any
from typing import Any, Literal

import pandas as pd
import pytest
Expand Down Expand Up @@ -39,8 +39,8 @@
"cps": "current_population_survey",
"census": "decennial_census",
"ssa": "social_security",
"taxes_1040": "taxes_1040",
"taxes_w2_and_1099": "taxes_w2_and_1099",
"tax_1040": "taxes_1040",
"tax_w2_1099": "taxes_w2_and_1099",
"wic": "women_infants_and_children",
}

Expand Down Expand Up @@ -112,7 +112,14 @@ def release_output_dir(request: pytest.FixtureRequest) -> Path:
@pytest.fixture(scope="session")
def dataset_params(
request: pytest.FixtureRequest,
) -> tuple[str | int | Callable[..., pd.DataFrame] | None, ...]:
) -> tuple[
str,
Callable[..., pd.DataFrame],
str | None,
int | None,
str | None,
Literal["pandas", "dask"],
]:
dataset_name = request.config.getoption("--dataset")
try:
dataset_func = DATASET_GENERATION_FUNCS[dataset_name]
Expand Down
Loading