Skip to content

Commit 3fd6cbc

Browse files
chg ! rebase
1 parent fd2034f commit 3fd6cbc

File tree

2 files changed

+36
-32
lines changed

2 files changed

+36
-32
lines changed

src/country_workspace/contrib/aurora/pipeline.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from country_workspace.contrib.aurora.client import AuroraClient
66
from country_workspace.models import AsyncJob, Batch, Household, Individual
77
from country_workspace.utils.config import BatchNameConfig, FailIfAlienConfig
8-
from country_workspace.utils.fields import uppercase_field_value, clean_field_names
8+
from country_workspace.utils.fields import clean_field_names
99

1010

1111
class Config(BatchNameConfig, FailIfAlienConfig):
@@ -70,12 +70,11 @@ def create_household(batch: Batch, data: dict[str, Any], prefix: str) -> Househo
7070
ValueError: If multiple household entries are found in the provided data.
7171
7272
"""
73-
flex_fields = _collect_by_prefix(data, prefix)
74-
if len(flex_fields) > 1:
73+
hh_data = _collect_by_prefix(data, prefix)
74+
if len(hh_data) > 1:
7575
raise ValueError("Multiple households found")
76-
flex_fields = next(iter(flex_fields.values()), {})
77-
return batch.program.households.create(batch=batch, flex_fields=clean_field_names(flex_fields))
78-
# return batch.program.households.create(batch=batch, flex_fields=flex_fields)
76+
flex_fields = clean_field_names(next(iter(hh_data.values()), {}))
77+
return batch.program.households.create(batch=batch, flex_fields=flex_fields)
7978

8079

8180
def create_individuals(
@@ -97,24 +96,21 @@ def create_individuals(
9796
household, individuals = None, []
9897
head_found = False
9998

100-
# for raw_individual in data.values():
101-
# individual = clean_field_names(raw_individual)
102-
# if not head_found:
10399
inds_data = _collect_by_prefix(data, cfg.get("individuals_column_prefix"))
104100

105101
if inds_data and cfg["master_detail"] and (hh_prefix := cfg.get("household_column_prefix")):
106102
household = create_household(batch, data, hh_prefix)
107103

108-
for individual in inds_data.values():
109-
household_label_column = cfg.get("household_label_column")
110-
if household and household_label_column and not head_found:
111-
head_found = _update_household_label_from_individual(household, individual, household_label_column)
104+
for ind_data in inds_data.values():
105+
flex_fields = clean_field_names(ind_data)
106+
if household and (hh_label := cfg.get("household_label_column")) and not head_found:
107+
head_found = _update_household_label_from_individual(household, flex_fields, hh_label)
112108
individuals.append(
113109
Individual(
114110
batch=batch,
115111
household_id=household.pk if household else None,
116-
name=individual.get("given_name", ""),
117-
flex_fields=individual,
112+
name=flex_fields.get("given_name", ""),
113+
flex_fields=flex_fields,
118114
)
119115
)
120116
return batch.program.individuals.bulk_create(individuals, batch_size=1000)
@@ -144,25 +140,24 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
144140
result = {}
145141
for k, v in data.items():
146142
if (stripped := k.removeprefix(prefix)) != k:
147-
index, field = stripped.split("_", 1)
148-
result.setdefault(index, {})[field] = uppercase_field_value(field, v)
149-
# for key, value in data.items():
150-
# if not key.startswith(prefix):
151-
# continue
152-
# index, field = key.removeprefix(prefix).split("_", 1)
153-
# clean_field = clean_field_name(field)
154-
# result.setdefault(index, {})[clean_field] = uppercase_field_value(clean_field, value)
155-
# return result
143+
try:
144+
index, field = stripped.split("_", 1)
145+
result.setdefault(index, {})[field] = v
146+
except ValueError:
147+
raise ValueError(
148+
f"Field name '{k}' after removing prefix '{prefix}' must contain at least two underscores."
149+
)
150+
return result
156151

157152

158153
def _update_household_label_from_individual(
159-
household: Household, individual: Mapping[str, Any], household_label_column: str
154+
household: Household, ind_data: Mapping[str, Any], household_label_column: str
160155
) -> bool:
161156
"""Update the household's name based on an individual's role and specified name field.
162157
163158
Args:
164159
household (Household): The household instance to update.
165-
individual (dict[str, Any]): A dictionary containing the individual's data,
160+
ind_data (dict[str, Any]): A dictionary containing the individual's data,
166161
including relationship status and potential household name.
167162
household_label_column (str): The key in the individual's data that stores
168163
the name to assign to the household.
@@ -171,8 +166,8 @@ def _update_household_label_from_individual(
171166
bool: True if the household name was updated (individual is head and name provided), False otherwise.
172167
173168
"""
174-
is_head = any(individual.get(k) == RELATIONSHIP_HEAD for k in individual if k.startswith(RELATIONSHIP_FIELDNAME))
175-
name = individual.get(household_label_column)
169+
is_head = any(ind_data.get(k) == RELATIONSHIP_HEAD for k in ind_data if k == RELATIONSHIP_FIELDNAME)
170+
name = ind_data.get(household_label_column)
176171
if is_head and name:
177172
household.name = name
178173
household.save(update_fields=["name"])

src/country_workspace/utils/fields.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212

1313
TO_REMOVE = "_h_c", "_h_f", "_i_c", "_i_f"
14+
TO_UPPERCASE = "relationship", "gender", "disability", "residence_status"
1415

1516

1617
def clean_field_name(v: str) -> str:
@@ -27,12 +28,21 @@ def clean_field_name(v: str) -> str:
2728

2829

2930
def clean_field_names(record: Record) -> Record:
30-
return {clean_field_name(k): v for k, v in record.items()}
31+
"""Clean all field names in a record by normalizing them.
32+
33+
Args:
34+
record (dict): A dictionary with field names as keys and their values.
35+
36+
Returns:
37+
dict: A new dictionary with cleaned field names and original values.
38+
39+
"""
40+
return {clean_field_name(k): uppercase_field_value(k, v) for k, v in record.items()}
3141

3242

3343
def uppercase_field_value(k: str, v: Any) -> str:
3444
"""
35-
Convert the given field value to uppercase if applicable.
45+
Convert the given field value to uppercase if its name starts with specific prefixes.
3646
3747
Args:
3848
k (str): The name of the field.
@@ -42,5 +52,4 @@ def uppercase_field_value(k: str, v: Any) -> str:
4252
str: The uppercase value if applicable or the original value.
4353
4454
"""
45-
to_uppercase = ("relationship", "gender", "disability", "residence_status")
46-
return v.upper() if isinstance(v, str) and k in to_uppercase else v
55+
return v.upper() if isinstance(v, str) and any(k.startswith(prefix) for prefix in TO_UPPERCASE) else v

0 commit comments

Comments
 (0)