Skip to content

Commit 8387600

Browse files
AB#241769: aurora import according to use beneficiary groups (#63)
* chg ! aurora import takes into account beneficiary groups * chg ! rebase
1 parent fc2f57f commit 8387600

File tree

6 files changed

+190
-118
lines changed

6 files changed

+190
-118
lines changed

src/country_workspace/contrib/aurora/forms.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,37 @@
66

77
class ImportAuroraForm(forms.Form):
88
batch_name = forms.CharField(required=False, help_text="Label for this batch.")
9-
109
registration = forms.ModelChoiceField(
1110
queryset=Registration.objects.none(),
1211
help_text="What type of registrations are being imported.",
1312
)
14-
1513
household_column_prefix = forms.CharField(
16-
initial="household_",
17-
help_text="Household's column group prefix",
14+
initial="household_", help_text="Household's column group prefix", required=False
1815
)
19-
2016
individuals_column_prefix = forms.CharField(
2117
initial="individuals_",
2218
help_text="Individuals' column group prefix",
2319
)
24-
2520
household_label_column = forms.CharField(
2621
required=False,
2722
initial="family_name",
2823
help_text="Which Individual's column should be used as label for the household.",
2924
)
30-
3125
check_before = forms.BooleanField(
3226
required=False, help_text="Prevent import if errors if data is not valid against data checker."
3327
)
34-
3528
fail_if_alien = forms.BooleanField(
3629
required=False, help_text="Fails if it finds fields which do not exists in data checker."
3730
)
3831

3932
def __init__(self, *args: tuple, program: Program | None = None, **kwargs: dict) -> None:
4033
super().__init__(*args, **kwargs)
34+
self.program = program
4135
if program:
4236
self.fields["registration"].queryset = Registration.objects.filter(project__program=program, active=True)
37+
if not (program.beneficiary_group and program.beneficiary_group.master_detail):
38+
self.fields = {
39+
key: value
40+
for key, value in self.fields.items()
41+
if key not in ("household_column_prefix", "household_label_column")
42+
}

src/country_workspace/contrib/aurora/pipeline.py

Lines changed: 76 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,62 @@
1-
from typing import Any, Mapping
1+
from typing import Any, Mapping, Final, NotRequired
22

33
from django.db.transaction import atomic
44

55
from country_workspace.contrib.aurora.client import AuroraClient
66
from country_workspace.models import AsyncJob, Batch, Household, Individual
7-
from country_workspace.utils.config import BatchNameConfig, FailIfAlienConfig
8-
from country_workspace.utils.fields import clean_field_names, uppercase_field_value
7+
from country_workspace.utils.config import BatchNameConfig
8+
from country_workspace.utils.fields import clean_field_names
99

1010

11-
class Config(BatchNameConfig, FailIfAlienConfig):
11+
class Config(BatchNameConfig):
1212
registration_reference_pk: str | None
13-
household_column_prefix: str
13+
master_detail: bool
14+
household_column_prefix: NotRequired[str]
1415
individuals_column_prefix: str
15-
household_label_column: str
16+
household_label_column: NotRequired[str]
17+
18+
19+
RELATIONSHIP_HEAD: Final[str] = "HEAD"
20+
RELATIONSHIP_FIELDNAME: Final[str] = "relationship"
1621

1722

1823
def import_from_aurora(job: AsyncJob) -> dict[str, int]:
1924
"""Import data from the Aurora system into the database within an atomic transaction.
2025
2126
Args:
2227
job (AsyncJob): The job instance containing the configuration and context for data import.
23-
Expected keys in `job.config`:
24-
- "batch_name" (str): The name for the newly created batch.
25-
- "registration_reference_pk" (int): The unique identifier of the registration to import.
26-
- "household_column_prefix" (str, optional): The prefix for household-related columns.
27-
- "individuals_column_prefix" (str, optional): The prefix for individual-related columns.
28-
- "household_label_column" (str, optional): The column name used to determine the household label.
28+
Expected keys in `job.config` correspond to the `Config` TypedDict.
2929
3030
Returns:
31-
dict[str, int]: A dictionary with the counts of successfully created records:
32-
- "households": The number of households imported.
33-
- "individuals": The total number of individuals imported.
31+
dict[str, int]: Counts of imported records:
32+
- "households": Number of households imported (0 if `master_detail` is False or None).
33+
- "individuals": Total number of individuals imported.
3434
3535
"""
36-
config: Config = job.config
37-
total_hh = total_ind = 0
38-
batch = Batch.objects.create(
39-
name=config["batch_name"],
40-
program=job.program,
41-
country_office=job.program.country_office,
42-
imported_by=job.owner,
43-
source=Batch.BatchSource.AURORA,
44-
)
45-
client = AuroraClient()
4636
with atomic():
47-
for record in client.get(f"registration/{config['registration_reference_pk']}/records/"):
48-
inds_data = _collect_by_prefix(record["flatten"], config.get("individuals_column_prefix"))
49-
if inds_data:
50-
hh = create_household(batch, record["flatten"], config.get("household_column_prefix"))
51-
total_hh += 1
52-
total_ind += len(
53-
create_individuals(
54-
household=hh,
55-
data=inds_data,
56-
household_label_column=config.get("household_label_column"),
57-
)
58-
)
59-
return {"households": total_hh, "individuals": total_ind}
37+
total = {"households": 0, "individuals": 0}
38+
cfg: Config = job.config
39+
40+
batch = Batch.objects.create(
41+
name=cfg["batch_name"],
42+
program=job.program,
43+
country_office=job.program.country_office,
44+
imported_by=job.owner,
45+
source=Batch.BatchSource.AURORA,
46+
)
47+
48+
client = AuroraClient()
49+
for record in client.get(f"registration/{cfg['registration_reference_pk']}/records/"):
50+
individuals = create_individuals(batch, record["flatten"], cfg)
51+
total["individuals"] += len(individuals)
52+
if cfg["master_detail"] and individuals and individuals[0].household_id:
53+
total["households"] += 1
54+
55+
return total
6056

6157

6258
def create_household(batch: Batch, data: dict[str, Any], prefix: str) -> Household:
63-
"""
64-
Create a Household object from the provided data and associate it with a batch.
59+
"""Create a Household object from the provided data and associate it with a batch.
6560
6661
Args:
6762
batch (Batch): The batch to which the household will be linked.
@@ -75,41 +70,50 @@ def create_household(batch: Batch, data: dict[str, Any], prefix: str) -> Househo
7570
ValueError: If multiple household entries are found in the provided data.
7671
7772
"""
78-
flex_fields = _collect_by_prefix(data, prefix)
79-
if len(flex_fields) > 1:
73+
hh_data = _collect_by_prefix(data, prefix)
74+
if len(hh_data) > 1:
8075
raise ValueError("Multiple households found")
81-
flex_fields = next(iter(flex_fields.values()), {})
82-
return batch.program.households.create(batch=batch, flex_fields=clean_field_names(flex_fields))
76+
flex_fields = clean_field_names(next(iter(hh_data.values()), {}))
77+
return batch.program.households.create(batch=batch, flex_fields=flex_fields)
8378

8479

85-
def create_individuals(household: Household, data: dict[str, Any], household_label_column: str) -> list[Individual]:
86-
"""Create and associate Individual objects with a given Household.
80+
def create_individuals(
81+
batch: Batch,
82+
data: dict[str, Any],
83+
cfg: Config,
84+
) -> list[Individual]:
85+
"""Create and associate Individual objects with an optional Household.
8786
8887
Args:
89-
household (Household): The household to which the individuals will be linked.
90-
data (dict[str, Any]): A dictionary mapping indices to individual details.
91-
household_label_column (str): The key in the individual data used to determine the household label.
88+
batch (Batch): The batch to which individuals will be linked.
89+
data (dict[str, Any]): A dictionary containing related information.
90+
cfg (Config): Configuration dictionary containing various settings for the import process.
9291
9392
Returns:
9493
list[Individual]: A list of successfully created Individual instances.
9594
9695
"""
97-
individuals = []
96+
household, individuals = None, []
9897
head_found = False
9998

100-
for raw_individual in data.values():
101-
individual = clean_field_names(raw_individual)
102-
if not head_found:
103-
head_found = _update_household_label_from_individual(household, individual, household_label_column)
99+
inds_data = _collect_by_prefix(data, cfg.get("individuals_column_prefix"))
100+
101+
if inds_data and cfg["master_detail"] and (hh_prefix := cfg.get("household_column_prefix")):
102+
household = create_household(batch, data, hh_prefix)
103+
104+
for ind_data in inds_data.values():
105+
flex_fields = clean_field_names(ind_data)
106+
if household and (hh_label := cfg.get("household_label_column")) and not head_found:
107+
head_found = _update_household_label_from_individual(household, flex_fields, hh_label)
104108
individuals.append(
105109
Individual(
106-
batch=household.batch,
107-
household_id=household.pk,
108-
name=individual.get("given_name", ""),
109-
flex_fields=individual,
110-
),
110+
batch=batch,
111+
household_id=household.pk if household else None,
112+
name=flex_fields.get("given_name", ""),
113+
flex_fields=flex_fields,
114+
)
111115
)
112-
return household.program.individuals.bulk_create(individuals)
116+
return batch.program.individuals.bulk_create(individuals, batch_size=1000)
113117

114118

115119
def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str, Any]]:
@@ -125,6 +129,9 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
125129
and, for specific fields, values converted to uppercase. Returns an empty dictionary if no
126130
matching keys are found.
127131
132+
Raises:
133+
ValueError: If a key with the specified prefix does not contain an underscore after the prefix.
134+
128135
Examples:
129136
>>> data = {"user_0_relationship": "head", "user_0_gender": "male", "user_1_gender": "female"}
130137
>>> _collect_by_prefix(data, "user_")
@@ -136,19 +143,22 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
136143
result = {}
137144
for k, v in data.items():
138145
if (stripped := k.removeprefix(prefix)) != k:
139-
index, field = stripped.split("_", 1)
140-
result.setdefault(index, {})[field] = uppercase_field_value(field, v)
146+
try:
147+
index, field = stripped.split("_", 1)
148+
result.setdefault(index, {})[field] = v
149+
except ValueError:
150+
raise ValueError(f"Field name '{k}' after removing prefix '{prefix}' must contain an underscore.")
141151
return result
142152

143153

144154
def _update_household_label_from_individual(
145-
household: Household, individual: Mapping[str, Any], household_label_column: str
155+
household: Household, ind_data: Mapping[str, Any], household_label_column: str
146156
) -> bool:
147157
"""Update the household's name based on an individual's role and specified name field.
148158
149159
Args:
150160
household (Household): The household instance to update.
151-
individual (dict[str, Any]): A dictionary containing the individual's data,
161+
ind_data (dict[str, Any]): A dictionary containing the individual's data,
152162
including relationship status and potential household name.
153163
household_label_column (str): The key in the individual's data that stores
154164
the name to assign to the household.
@@ -157,8 +167,8 @@ def _update_household_label_from_individual(
157167
bool: True if the household name was updated (individual is head and name provided), False otherwise.
158168
159169
"""
160-
is_head = any(individual.get(k, "").upper() == "HEAD" for k in individual if k.startswith("relationship"))
161-
name = individual.get(household_label_column)
170+
is_head = any(ind_data.get(k) == RELATIONSHIP_HEAD for k in ind_data if k == RELATIONSHIP_FIELDNAME)
171+
name = ind_data.get(household_label_column)
162172
if is_head and name:
163173
household.name = name
164174
household.save(update_fields=["name"])

src/country_workspace/utils/fields.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212

1313
TO_REMOVE = "_h_c", "_h_f", "_i_c", "_i_f"
14+
TO_UPPERCASE = "relationship", "gender", "disability", "residence_status"
1415

1516

1617
def clean_field_name(v: str) -> str:
@@ -27,12 +28,21 @@ def clean_field_name(v: str) -> str:
2728

2829

2930
def clean_field_names(record: Record) -> Record:
30-
return {clean_field_name(k): v for k, v in record.items()}
31+
"""Clean all field names in a record by normalizing them.
32+
33+
Args:
34+
record (dict): A dictionary with field names as keys and their values.
35+
36+
Returns:
37+
dict: A new dictionary with cleaned field names and original values.
38+
39+
"""
40+
return {clean_field_name(k): uppercase_field_value(k, v) for k, v in record.items()}
3141

3242

3343
def uppercase_field_value(k: str, v: Any) -> str:
3444
"""
35-
Convert the given field value to uppercase if applicable.
45+
Convert the given field value to uppercase if its name starts with specific prefixes.
3646
3747
Args:
3848
k (str): The name of the field.
@@ -42,5 +52,4 @@ def uppercase_field_value(k: str, v: Any) -> str:
4252
str: The uppercase value if applicable or the original value.
4353
4454
"""
45-
to_uppercase = ("relationship", "gender", "disability", "residence_status")
46-
return v.upper() if isinstance(v, str) and k in to_uppercase else v
55+
return v.upper() if isinstance(v, str) and any(k.startswith(prefix) for prefix in TO_UPPERCASE) else v

src/country_workspace/workspaces/admin/program.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from django.utils.translation import gettext as _
1414
from strategy_field.utils import fqn
1515

16-
from country_workspace.contrib.aurora.pipeline import import_from_aurora
16+
from country_workspace.contrib.aurora.pipeline import import_from_aurora, Config as AuroraConfig
1717
from country_workspace.state import state
1818
from country_workspace.utils.fields import batch_name_default
1919

@@ -314,14 +314,21 @@ def import_rdi(self, request: HttpRequest, program: CountryProgram) -> "ImportFi
314314
def import_aurora(self, request: HttpRequest, program: "CountryProgram") -> "ImportAuroraForm|None":
315315
form = ImportAuroraForm(request.POST, prefix="aurora", program=program)
316316
if form.is_valid():
317-
registration_reference_pk = getattr(form.cleaned_data["registration"], "reference_pk", None)
318317
config: AuroraConfig = {
319318
"batch_name": form.cleaned_data["batch_name"] or batch_name_default(),
320-
"registration_reference_pk": registration_reference_pk,
321-
"household_column_prefix": form.cleaned_data["household_column_prefix"],
319+
"registration_reference_pk": getattr(form.cleaned_data.get("registration"), "reference_pk", None),
322320
"individuals_column_prefix": form.cleaned_data["individuals_column_prefix"],
323-
"household_label_column": form.cleaned_data["household_label_column"],
324-
"fail_if_alien": form.cleaned_data["fail_if_alien"],
321+
"master_detail": (
322+
master_detail := (program.beneficiary_group.master_detail if program.beneficiary_group else False)
323+
),
324+
**(
325+
{
326+
"household_column_prefix": form.cleaned_data.get("household_column_prefix"),
327+
"household_label_column": form.cleaned_data.get("household_label_column"),
328+
}
329+
if master_detail
330+
else {}
331+
),
325332
}
326333
job: AsyncJob = AsyncJob.objects.create(
327334
description="Aurora importing",

tests/contrib/aurora/stub.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,16 @@
131131
},
132132
],
133133
},
134+
"invalid_key": {
135+
"page": 1,
136+
"results": [
137+
{
138+
"id": 9,
139+
"flatten": {
140+
"individuals_wrong": "value",
141+
"household_invalid": "data",
142+
},
143+
},
144+
],
145+
},
134146
}

0 commit comments

Comments
 (0)