Skip to content

Commit 588d3a8

Browse files
Merge pull request #63 from ihmeuw/develop
release v0.5.0
2 parents efa6aa9 + f22c9d8 commit 588d3a8

File tree

16 files changed

+651
-253
lines changed

16 files changed

+651
-253
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
# default owners
2-
* @albrja @hussain-jafari @mattkappel @ramittal @rmudambi @stevebachmeier
2+
* @ihmeuw/vivarium-dev
3+
/docs/* @ihmeuw/vivarium-research @ihmeuw/vivarium-dev
4+
*.rst @ihmeuw/vivarium-research @ihmeuw/vivarium-dev

CHANGELOG.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
**0.4.0 - 04/11/23**
1+
**0.5.0 - 04/13/23**
2+
- Bugfix to apply incorrect selection noising at the expected probabilility
3+
- Implement the omission noise function
4+
- Implement schema for output columns and their dtypes
5+
- Implement a year filter to the form generation functions
6+
- Support a path to data root directory as form generation function argument
7+
- Update documentation
8+
9+
**0.4.0 - 04/11/23**
210
- Generate default configuration instead of maintaining a static file
311
- Read sample data if no data argument is provided
412
- Update sample datasets

README.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ or build it from source with
1919

2020
``> python setup.py install``
2121

22-
This will make the ``pseudopeople`` library available to python and install a
23-
command-line executable called ``...`` that you can use to verify your
24-
installation with
25-
26-
``> ... test``
22+
Documentation
23+
======================
24+
You can view documentation at https://pseudopeople.readthedocs.io/en/latest/

docs/source/index.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ Pseudopeople is a package intended to add configurable noise to a simulated
66
census-scale data-set written using standard
77
`scientific Python <https://www.scipy.org/>`_ tools.
88

9+
The github repo can be found at: https://github.com/ihmeuw/pseudopeople
10+
911
.. toctree::
1012
:maxdepth: 2
1113

1214
installation
15+
noise_functions/index
1316
tutorials/index
1417
concepts/index
1518
api_reference/index
16-
glossary
19+
glossary
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
.. _noise_functions_main:
2+
3+
=================
4+
Noise Functions
5+
=================
6+
7+
.. contents::
8+
:depth: 2
9+
:local:
10+
:backlinks: none

src/pseudopeople/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
__summary__ = "pseudopeople is package which adds noise to simulated census-scale data using standard scientific Python tools."
1414
__uri__ = "https://github.com/ihmeuw/pseudopeople"
1515

16-
__version__ = "0.4.0"
16+
__version__ = "0.5.0"
1717

1818
__author__ = "The pseudopeople developers"
1919
__email__ = "vivarium.dev@gmail.com"

src/pseudopeople/entity_types.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Callable, Dict
33

44
import pandas as pd
5+
from loguru import logger
56
from vivarium import ConfigTree
67
from vivarium.framework.randomness import RandomnessStream
78

@@ -53,6 +54,7 @@ class ColumnNoiseType:
5354
noise_function: Callable[[pd.Series, ConfigTree, RandomnessStream, Any], pd.Series]
5455
row_noise_level: float = 0.01
5556
token_noise_level: float = 0.1
57+
noise_level_scaling_function: Callable[[str], float] = lambda x: 1.0
5658
additional_parameters: Dict[str, Any] = None
5759

5860
def __call__(
@@ -62,18 +64,27 @@ def __call__(
6264
randomness_stream: RandomnessStream,
6365
additional_key: Any,
6466
) -> pd.Series:
65-
# TODO: this is a temporary hack to account for all string columns having been made categorical
66-
# We should record expected output dtype in the columns data structure
67-
if column.dtype.name == "category":
68-
column = column.astype(str)
69-
else:
70-
column = column.copy()
71-
noise_level = configuration.row_noise_level
67+
column = column.copy()
68+
noise_level = configuration.row_noise_level * self.noise_level_scaling_function(
69+
column.name
70+
)
7271
to_noise_idx = get_index_to_noise(
7372
column, noise_level, randomness_stream, f"{self.name}_{additional_key}"
7473
)
74+
if to_noise_idx.empty:
75+
logger.debug(
76+
f"No cells chosen to noise for noise function {self.name} on column {column.name}. "
77+
"This is likely due to a combination of the configuration noise levels and the input data."
78+
)
79+
return column
7580
noised_data = self.noise_function(
7681
column.loc[to_noise_idx], configuration, randomness_stream, additional_key
7782
)
83+
84+
# Coerce noised column dtype back to original column's if it has changed
85+
if noised_data.dtype.name != column.dtype.name:
86+
noised_data = noised_data.astype(column.dtype)
87+
7888
column.loc[to_noise_idx] = noised_data
89+
7990
return column

0 commit comments

Comments
 (0)