-
Notifications
You must be signed in to change notification settings - Fork 4
batch error logs #484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
batch error logs #484
Changes from 8 commits
b89c5dc
4930a25
ab4f072
4f51104
19d3846
1cfab6d
12e4249
e4be3b5
7266440
8a71f53
643a7bd
305ac06
49e8e6b
d76d577
40b1c3b
990510f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| import pandas as pd | ||
| import pytest | ||
| from _pytest.fixtures import FixtureRequest | ||
| from pytest_check import check # type: ignore | ||
| from vivarium_testing_utils import FuzzyChecker | ||
|
|
||
| from pseudopeople.dataset import Dataset | ||
|
|
@@ -60,11 +61,12 @@ def test_unnoised_id_cols(dataset_name: str, request: FixtureRequest) -> None: | |
| original = initialize_dataset_with_sample(dataset_name) | ||
| noised_data = request.getfixturevalue("noised_data") | ||
| check_noised, check_original, _ = _get_common_datasets(original, noised_data) | ||
| assert ( | ||
| ( | ||
| check_original.reset_index()[unnoised_id_cols] | ||
| == check_noised.reset_index()[unnoised_id_cols] | ||
| with check: | ||
|
||
| assert ( | ||
| ( | ||
| check_original.reset_index()[unnoised_id_cols] | ||
| == check_noised.reset_index()[unnoised_id_cols] | ||
| ) | ||
| .all() | ||
| .all() | ||
| ) | ||
| .all() | ||
| .all() | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
| ) | ||
| def test_release_tests(pytest_args: list[str]) -> None: | ||
| os.chdir(Path(__file__).parent) # need this to access cli options from conftest.py | ||
| base_cmd = ["pytest", "--release", "test_release.py"] | ||
| base_cmd = ["pytest", "--release", "test_release.py", "--check-max-tb=1000"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this doing?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This flag controls how many failures are shown at once and, oddly enough, defaults to 1. |
||
| cmd = base_cmd + pytest_args | ||
| result = subprocess.run(cmd, capture_output=True, text=True) | ||
| assert result.returncode == 0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| import numpy as np | ||
| import numpy.typing as npt | ||
| import pandas as pd | ||
| from pytest_check import check # type: ignore | ||
| from vivarium_testing_utils import FuzzyChecker | ||
|
|
||
| from pseudopeople.configuration import Keys, get_configuration | ||
|
|
@@ -34,7 +35,8 @@ def run_column_noising_tests( | |
|
|
||
| # Check that originally missing data remained missing | ||
| originally_missing_idx = check_original.index[check_original[col.name].isna()] | ||
| assert check_noised.loc[originally_missing_idx, col.name].isna().all() | ||
| with check: | ||
| assert check_noised.loc[originally_missing_idx, col.name].isna().all() | ||
|
|
||
| # Check for noising where applicable | ||
| to_compare_idx = shared_idx.difference(originally_missing_idx) | ||
|
|
@@ -43,7 +45,8 @@ def run_column_noising_tests( | |
| check_original.loc[to_compare_idx, col.name].values | ||
| != check_noised.loc[to_compare_idx, col.name].values | ||
| ) | ||
| assert different_check.any() | ||
| with check: | ||
| assert different_check.any() | ||
|
|
||
| noise_level = different_check.sum() | ||
|
|
||
|
|
@@ -64,7 +67,8 @@ def run_column_noising_tests( | |
| == check_noised.loc[to_compare_idx, col.name].values | ||
| ) | ||
|
|
||
| assert same_check.all() | ||
| with check: | ||
| assert same_check.all() | ||
|
|
||
|
|
||
| def run_omit_row_or_do_not_respond_tests( | ||
|
|
@@ -87,15 +91,19 @@ def run_omit_row_or_do_not_respond_tests( | |
| ]: | ||
| # Census and household surveys have do_not_respond and omit_row. | ||
| # For all other datasets they are mutually exclusive | ||
| assert len(noise_types) == 2 | ||
| with check: | ||
| assert len(noise_types) == 2 | ||
| else: | ||
| assert len(noise_types) < 2 | ||
| with check: | ||
| assert len(noise_types) < 2 | ||
| if not noise_types: # Check that there are no missing indexes | ||
| assert noised_data.index.symmetric_difference(original_data.index).empty | ||
| with check: | ||
| assert noised_data.index.symmetric_difference(original_data.index).empty | ||
| else: # Check that there are some omissions | ||
| # TODO: assert levels are as expected | ||
| assert noised_data.index.difference(original_data.index).empty | ||
| assert not original_data.index.difference(noised_data.index).empty | ||
| with check: | ||
| assert noised_data.index.difference(original_data.index).empty | ||
| assert not original_data.index.difference(noised_data.index).empty | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be in two separate checks?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is confusing to me |
||
|
|
||
|
|
||
| def validate_column_noise_level( | ||
|
|
@@ -158,7 +166,8 @@ def validate_column_noise_level( | |
| [1 - p for p in token_probability] | ||
| ) | ||
| else: | ||
| assert isinstance(tokens_per_string, pd.Series) | ||
| with check: | ||
| assert isinstance(tokens_per_string, pd.Series) | ||
| avg_probability_any_token_noised = ( | ||
| 1 - (1 - token_probability) ** tokens_per_string | ||
| ).mean() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
jw why this type errors?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There aren't any stubs for pytest_check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can just ignore that in the project level file and not in this specific file