Skip to content

Commit c3bf67b

Browse files
authored
Merge pull request #123 from ihmeuw/develop
rc/v0.6.2
2 parents 178775d + 4704ad6 commit c3bf67b

File tree

4 files changed

+38
-19
lines changed

4 files changed

+38
-19
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
**0.6.2 - 04/21/23**
2+
- Updated documentation
3+
- Updated progress bar behavior
4+
15
**0.6.1 - 04/21/23**
26
- Updated documentation
37
- Standardized configuration key names

src/pseudopeople/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
__summary__ = "pseudopeople is package which adds noise to simulated census-scale data using standard scientific Python tools."
1414
__uri__ = "https://github.com/ihmeuw/pseudopeople"
1515

16-
__version__ = "0.6.1"
16+
__version__ = "0.6.2"
1717

1818
__author__ = "The pseudopeople developers"
1919
__email__ = "vivarium.dev@gmail.com"

src/pseudopeople/interface.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,12 @@
3131
"""
3232

3333
from pathlib import Path
34-
from typing import Dict, List, Union
34+
from typing import Dict, List, Optional, Union
3535

3636
import pandas as pd
3737
import pyarrow.parquet as pq
3838
from loguru import logger
39+
from tqdm import tqdm
3940

4041
from pseudopeople.configuration import get_configuration
4142
from pseudopeople.constants import paths
@@ -90,8 +91,14 @@ def _generate_dataset(
9091
"Please provide the path to the unmodified root data directory."
9192
)
9293
noised_dataset = []
93-
for data_path in data_paths:
94-
logger.info(f"Loading data from {data_path}.")
94+
iterator = (
95+
tqdm(data_paths, desc="Noising data", leave=False)
96+
if len(data_paths) > 1
97+
else data_paths
98+
)
99+
# for data_path in tqdm(data_paths, desc="Noising data", leave=False):
100+
for data_path in iterator:
101+
logger.debug(f"Loading data from {data_path}.")
95102
data = _load_data_from_path(data_path, year_filter)
96103

97104
data = _reformat_dates_for_noising(data, dataset)
@@ -106,6 +113,8 @@ def _generate_dataset(
106113
# again after concat (https://github.com/pandas-dev/pandas/issues/51362)
107114
noised_dataset = _coerce_dtypes(noised_dataset, dataset)
108115

116+
logger.debug("*** Finished ***")
117+
109118
return noised_dataset
110119

111120

@@ -162,7 +171,7 @@ def generate_decennial_census(
162171
source: Union[Path, str] = None,
163172
seed: int = 0,
164173
config: Union[Path, str, Dict[str, Dict]] = None,
165-
year: int = 2020,
174+
year: Optional[int] = 2020,
166175
verbose: bool = False,
167176
) -> pd.DataFrame:
168177
"""
@@ -175,8 +184,9 @@ def generate_decennial_census(
175184
:param config: An optional override to the default configuration. Can be a path
176185
to a configuration YAML file or a dictionary.
177186
:param year: The year (format YYYY) to include in the dataset. Must be a decennial
178-
year (e.g. 2020, 2030, 2040). Will return an empty pd.DataFrame if there is no
179-
data with this year.
187+
year (e.g. 2020, 2030, 2040). Will return an empty pd.DataFrame if there are no
188+
data with this year. If None is provided, data from all years are
189+
included in the dataset.
180190
:param verbose: Log with verbosity if True.
181191
:return: A pd.DataFrame of simulated decennial census data.
182192
:raises ConfigurationError: An incorrect config is provided.
@@ -193,7 +203,7 @@ def generate_american_community_survey(
193203
source: Union[Path, str] = None,
194204
seed: int = 0,
195205
config: Union[Path, str, Dict[str, Dict]] = None,
196-
year: int = 2020,
206+
year: Optional[int] = 2020,
197207
verbose: bool = False,
198208
) -> pd.DataFrame:
199209
"""
@@ -212,7 +222,8 @@ def generate_american_community_survey(
212222
:param config: An optional override to the default configuration. Can be a path
213223
to a configuration YAML file or a dictionary.
214224
:param year: The survey date year (format YYYY) to include in the dataset. Will
215-
return an empty pd.DataFrame if there is no data with this year.
225+
return an empty pd.DataFrame if there are no data with this year. If None is
226+
provided, data from all years are included in the dataset.
216227
:param verbose: Log with verbosity if True.
217228
:return: A pd.DataFrame of simulated ACS data.
218229
:raises ConfigurationError: An incorrect config is provided.
@@ -235,7 +246,7 @@ def generate_current_population_survey(
235246
source: Union[Path, str] = None,
236247
seed: int = 0,
237248
config: Union[Path, str, Dict[str, Dict]] = None,
238-
year: int = 2020,
249+
year: Optional[int] = 2020,
239250
verbose: bool = False,
240251
) -> pd.DataFrame:
241252
"""
@@ -255,7 +266,8 @@ def generate_current_population_survey(
255266
:param config: An optional override to the default configuration. Can be a path
256267
to a configuration YAML file or a dictionary.
257268
:param year: The survey date year (format YYYY) to include in the dataset. Will
258-
return an empty pd.DataFrame if there is no data with this year.
269+
return an empty pd.DataFrame if there are no data with this year. If None is
270+
provided, data from all years are included in the dataset.
259271
:param verbose: Log with verbosity if True.
260272
:return: A pd.DataFrame of simulated CPS data.
261273
:raises ConfigurationError: An incorrect config is provided.
@@ -278,7 +290,7 @@ def generate_taxes_w2_and_1099(
278290
source: Union[Path, str] = None,
279291
seed: int = 0,
280292
config: Union[Path, str, Dict[str, Dict]] = None,
281-
year: int = 2020,
293+
year: Optional[int] = 2020,
282294
verbose: bool = False,
283295
) -> pd.DataFrame:
284296
"""
@@ -291,7 +303,8 @@ def generate_taxes_w2_and_1099(
291303
:param config: An optional override to the default configuration. Can be a path
292304
to a configuration YAML file or a dictionary.
293305
:param year: The tax year (format YYYY) to include in the dataset. Will return
294-
an empty pd.DataFrame if there is no data with this year.
306+
an empty pd.DataFrame if there are no data with this year. If None is provided,
307+
data from all years are included in the dataset.
295308
:param verbose: Log with verbosity if True.
296309
:return: A pd.DataFrame of simulated W2 and 1099 tax data.
297310
:raises ConfigurationError: An incorrect config is provided.
@@ -309,7 +322,7 @@ def generate_women_infants_and_children(
309322
source: Union[Path, str] = None,
310323
seed: int = 0,
311324
config: Union[Path, str, Dict[str, Dict]] = None,
312-
year: int = 2020,
325+
year: Optional[int] = 2020,
313326
verbose: bool = False,
314327
) -> pd.DataFrame:
315328
"""
@@ -327,7 +340,8 @@ def generate_women_infants_and_children(
327340
:param config: An optional override to the default configuration. Can be a path
328341
to a configuration YAML file or a dictionary.
329342
:param year: The year (format YYYY) to include in the dataset. Will return an
330-
empty pd.DataFrame if there is no data with this year.
343+
empty pd.DataFrame if there are no data with this year. If None is provided,
344+
data from all years are included in the dataset.
331345
:param verbose: Log with verbosity if True.
332346
:return: A pd.DataFrame of simulated WIC data.
333347
:raises ConfigurationError: An incorrect config is provided.
@@ -345,7 +359,7 @@ def generate_social_security(
345359
source: Union[Path, str] = None,
346360
seed: int = 0,
347361
config: Union[Path, str, Dict[str, Dict]] = None,
348-
year: int = 2020,
362+
year: Optional[int] = 2020,
349363
verbose: bool = False,
350364
) -> pd.DataFrame:
351365
"""
@@ -358,8 +372,9 @@ def generate_social_security(
358372
:param config: An optional override to the default configuration. Can be a path
359373
to a configuration YAML file or a dictionary.
360374
:param year: The latest year (format YYYY) to include in the dataset; will also
361-
include all previous years. Will return an empty pd.DataFrame if there is no
362-
data on or before this year.
375+
include all previous years. Will return an empty pd.DataFrame if there are no
376+
data on or before this year. If None is provided, data from all years are
377+
included in the dataset.
363378
:param verbose: Log with verbosity if True.
364379
:return: A pd.DataFrame of simulated SSA data.
365380
:raises ConfigurationError: An incorrect config is provided.

src/pseudopeople/noise.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def noise_dataset(
4949
randomness = get_randomness_stream(dataset.name, seed)
5050

5151
noise_configuration = configuration[dataset.name]
52-
for noise_type in tqdm(NOISE_TYPES, desc="Applying noise", unit="type"):
52+
for noise_type in tqdm(NOISE_TYPES, desc="Applying noise", unit="type", leave=False):
5353
if isinstance(noise_type, RowNoiseType):
5454
if (
5555
Keys.ROW_NOISE in noise_configuration

0 commit comments

Comments
 (0)