Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ python:
# Doc builds will fail if there are any warnings
sphinx:
fail_on_warning: true
configuration: docs/conf.py

formats:
- pdf
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ exclude = [
[[tool.mypy.overrides]]
module = [
"scipy.*",
"pytest_check",
]
ignore_missing_imports = true
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"tqdm",
"layered_config_tree>=2.1.0",
"loguru",
"pytest_check",
# type stubs
"pandas-stubs",
"types-PyYAML",
Expand Down
1 change: 1 addition & 0 deletions tests/integration/release/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import pytest
from _pytest.fixtures import FixtureRequest
from pytest_check import check
from vivarium_testing_utils import FuzzyChecker

from pseudopeople.dataset import Dataset
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/release/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
def test_release_tests(pytest_args: list[str]) -> None:
os.chdir(Path(__file__).parent) # need this to access cli options from conftest.py
base_cmd = ["pytest", "--release", "test_release.py"]
base_cmd = ["pytest", "--release", "test_release.py", "--check-max-tb=1000"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this doing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This flag controls how many failures are shown at once and, oddly enough, defaults to 1.

cmd = base_cmd + pytest_args
result = subprocess.run(cmd, capture_output=True, text=True)
assert result.returncode == 0
Expand Down
28 changes: 19 additions & 9 deletions tests/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import numpy.typing as npt
import pandas as pd
from pytest_check import check
from vivarium_testing_utils import FuzzyChecker

from pseudopeople.configuration import Keys, get_configuration
Expand All @@ -34,7 +35,8 @@ def run_column_noising_tests(

# Check that originally missing data remained missing
originally_missing_idx = check_original.index[check_original[col.name].isna()]
assert check_noised.loc[originally_missing_idx, col.name].isna().all()
with check:
assert check_noised.loc[originally_missing_idx, col.name].isna().all()

# Check for noising where applicable
to_compare_idx = shared_idx.difference(originally_missing_idx)
Expand All @@ -43,7 +45,8 @@ def run_column_noising_tests(
check_original.loc[to_compare_idx, col.name].values
!= check_noised.loc[to_compare_idx, col.name].values
)
assert different_check.any()
with check:
assert different_check.any()

noise_level = different_check.sum()

Expand All @@ -64,7 +67,8 @@ def run_column_noising_tests(
== check_noised.loc[to_compare_idx, col.name].values
)

assert same_check.all()
with check:
assert same_check.all()


def run_omit_row_or_do_not_respond_tests(
Expand All @@ -87,15 +91,20 @@ def run_omit_row_or_do_not_respond_tests(
]:
# Census and household surveys have do_not_respond and omit_row.
# For all other datasets they are mutually exclusive
assert len(noise_types) == 2
with check:
assert len(noise_types) == 2
else:
assert len(noise_types) < 2
with check:
assert len(noise_types) < 2
if not noise_types: # Check that there are no missing indexes
assert noised_data.index.symmetric_difference(original_data.index).empty
with check:
assert noised_data.index.symmetric_difference(original_data.index).empty
else: # Check that there are some omissions
# TODO: assert levels are as expected
assert noised_data.index.difference(original_data.index).empty
assert not original_data.index.difference(noised_data.index).empty
with check:
assert noised_data.index.difference(original_data.index).empty
with check:
assert not original_data.index.difference(noised_data.index).empty
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be in two separate checks?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is confusing to me



def validate_column_noise_level(
Expand Down Expand Up @@ -158,7 +167,8 @@ def validate_column_noise_level(
[1 - p for p in token_probability]
)
else:
assert isinstance(tokens_per_string, pd.Series)
with check:
assert isinstance(tokens_per_string, pd.Series)
avg_probability_any_token_noised = (
1 - (1 - token_probability) ** tokens_per_string
).mean()
Expand Down
Loading