Skip to content

Commit ae36bf5

Browse files
committed
Limit age columns for business coherence
1 parent 9a356f0 commit ae36bf5

1 file changed

Lines changed: 28 additions & 0 deletions

File tree

pointblank/generate/generators.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from __future__ import annotations
66

77
import random
8+
import re
89
import string
10+
from dataclasses import replace as dc_replace
911
from datetime import date, datetime, time, timedelta
1012
from typing import TYPE_CHECKING, Any, Callable
1113

@@ -448,6 +450,13 @@ def generate_column(
448450
PERSON_RELATED_PRESETS = {"name", "name_full", "first_name", "last_name", "email", "user_name"}
449451
BUSINESS_RELATED_PRESETS = {"job", "company"}
450452

453+
# Default working-age bounds applied when business coherence is active
454+
_WORKING_AGE_MIN = 22
455+
_WORKING_AGE_MAX = 65
456+
457+
# Pattern to detect age-like integer columns (case-insensitive)
458+
_AGE_COLUMN_RE = re.compile(r"(?:^|_)age(?:$|_)", re.IGNORECASE)
459+
451460

452461
def _get_coherence_needs(fields: dict[str, Field]) -> tuple[bool, bool, bool]:
453462
"""Check what coherence is needed for the given fields."""
@@ -549,6 +558,25 @@ def generate_dataframe(
549558
if needs_business:
550559
coherent_presets.update(BUSINESS_RELATED_PRESETS)
551560

561+
# When business coherence is active, constrain age-like integer columns to
562+
# working-age range so generated data doesn't have 15-year-old professionals
563+
# or 85-year-old active employees with fictitious employers.
564+
if needs_business:
565+
fields = dict(fields) # shallow copy so we don't mutate the caller's dict
566+
for col_name, col_field in list(fields.items()):
567+
if col_field.is_integer() and _AGE_COLUMN_RE.search(col_name):
568+
cur_min = getattr(col_field, "min_val", None)
569+
cur_max = getattr(col_field, "max_val", None)
570+
new_min = (
571+
max(cur_min, _WORKING_AGE_MIN) if cur_min is not None else _WORKING_AGE_MIN
572+
)
573+
new_max = (
574+
min(cur_max, _WORKING_AGE_MAX) if cur_max is not None else _WORKING_AGE_MAX
575+
)
576+
# Only replace if the bounds actually changed
577+
if new_min != cur_min or new_max != cur_max:
578+
fields[col_name] = dc_replace(col_field, min_val=new_min, max_val=new_max)
579+
552580
# Generate data for each column
553581
data: dict[str, list[Any]] = {}
554582
for col_name, field in fields.items():

0 commit comments

Comments
 (0)