Skip to content

Commit c6c81a3

Browse files
chg ! import parsing to support custom prefixes and optional hh/ind data
1 parent b0612cf commit c6c81a3

File tree

6 files changed

+140
-190
lines changed

6 files changed

+140
-190
lines changed

docs/src/import_data/aurora.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,12 @@ menu, then press the **[Import Data]** button and select the **[Aurora]** tab. H
3636
By default, will be used: *<"Batch " + the current datetime>*
3737

3838
- **Registration** – Select the specific Aurora registration to import. If needed, [synchronize](../interfaces.md#synchronize-unified-classifiers) unified classifiers before proceeding.
39-
- **Household Name Column** – Specify which Individual's column contains the Household's name.
39+
40+
- **Household column prefix** - A string added at the beginning of column names to indicate household-related data. It can appear in various forms (e.g., "household_" or "household-info") and is used to group these columns.
41+
42+
- **Individuals column prefix** - A string added at the beginning of column names to indicate individual-related data. It can appear in various forms (e.g., "individual-details_" or "personas_") and is used to group these columns.
43+
44+
- **Household label column** – Specify which Individual's column should be used as label for the household.
4045

4146
By default, this is set to *family_name*.
4247

src/country_workspace/contrib/aurora/forms.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,26 @@ class ImportAuroraForm(forms.Form):
1212
help_text="What type of registrations are being imported.",
1313
)
1414

15-
household_name_column = forms.CharField(
15+
household_column_prefix = forms.CharField(
16+
initial="household_",
17+
help_text="Household's column group prefix",
18+
)
19+
20+
individuals_column_prefix = forms.CharField(
21+
initial="individuals_",
22+
help_text="Individuals' column group prefix",
23+
)
24+
25+
household_label_column = forms.CharField(
1626
required=False,
1727
initial="family_name",
18-
help_text="Which Individual's column contains the Household's name.",
28+
help_text="Which Individual's column should be used as label for the household.",
1929
)
2030

2131
check_before = forms.BooleanField(
2232
required=False, help_text="Prevent import if errors if data is not valid against data checker."
2333
)
34+
2435
fail_if_alien = forms.BooleanField(
2536
required=False, help_text="Fails if it finds fields which do not exists in data checker."
2637
)

src/country_workspace/contrib/aurora/pipeline.py

Lines changed: 44 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ def import_from_aurora(job: AsyncJob) -> dict[str, int]:
1111
"""Import data from the Aurora system into the database within an atomic transaction.
1212
1313
Args:
14-
job (AsyncJob): The job instance containing the configuration and context for data synchronization.
14+
job (AsyncJob): The job instance containing the configuration and context for data import.
1515
Expected keys in `job.config`:
1616
- "batch_name" (str): The name for the newly created batch.
1717
- "registration_reference_pk" (int): The unique identifier of the registration to import.
18-
- "household_name_column" (str, optional): The column name used to determine the household head.
18+
- "household_column_prefix" (str, optional): The prefix for household-related columns.
19+
- "individuals_column_prefix" (str, optional): The prefix for individual-related columns.
20+
- "household_label_column" (str, optional): The column name used to determine the household label.
1921
2022
Returns:
2123
dict[str, int]: A dictionary with the counts of successfully created records:
@@ -24,40 +26,38 @@ def import_from_aurora(job: AsyncJob) -> dict[str, int]:
2426
2527
"""
2628
total_hh = total_ind = 0
27-
batch_name = job.config["batch_name"]
2829
batch = Batch.objects.create(
29-
name=batch_name,
30+
name=job.config["batch_name"],
3031
program=job.program,
3132
country_office=job.program.country_office,
3233
imported_by=job.owner,
3334
source=Batch.BatchSource.AURORA,
3435
)
35-
36-
registration = job.config["registration_reference_pk"]
3736
client = AuroraClient()
3837
with atomic():
39-
for record in client.get(f"registration/{registration}/records/"):
40-
hh = create_household(batch, record["flatten"])
41-
total_hh += 1
42-
total_ind += len(
43-
create_individuals(
44-
household=hh,
45-
data=record["flatten"],
46-
household_name_column=job.config.get("household_name_column"),
38+
for record in client.get(f"registration/{job.config['registration_reference_pk']}/records/"):
39+
inds_data = _collect_by_prefix(record["flatten"], job.config.get("individuals_column_prefix"))
40+
if inds_data:
41+
hh = create_household(batch, record["flatten"], job.config.get("household_column_prefix"))
42+
total_hh += 1
43+
total_ind += len(
44+
create_individuals(
45+
household=hh,
46+
data=inds_data,
47+
household_label_column=job.config.get("household_label_column"),
48+
)
4749
)
48-
)
49-
5050
return {"households": total_hh, "individuals": total_ind}
5151

5252

53-
def create_household(batch: Batch, data: dict[str, Any]) -> Household:
53+
def create_household(batch: Batch, data: dict[str, Any], prefix: str) -> Household:
5454
"""
5555
Create a Household object from the provided data and associate it with a batch.
5656
5757
Args:
5858
batch (Batch): The batch to which the household will be linked.
59-
data (dict[str, Any]): A dictionary containing household-related information,
60-
typically prefixed with "household_".
59+
data (dict[str, Any]): A dictionary containing household-related information.
60+
prefix (str): The prefix used to filter and group household-related information.
6161
6262
Returns:
6363
Household: The newly created household instance.
@@ -66,55 +66,38 @@ def create_household(batch: Batch, data: dict[str, Any]) -> Household:
6666
ValueError: If multiple household entries are found in the provided data.
6767
6868
"""
69-
flex_fields = _collect_by_prefix(data, prefix="household_")
70-
71-
if len(flex_fields) == 1:
72-
flex_fields = next(iter(flex_fields.values()))
73-
else:
69+
flex_fields = _collect_by_prefix(data, prefix)
70+
if len(flex_fields) > 1:
7471
raise ValueError("Multiple households found")
75-
7672
return batch.program.households.create(batch=batch, flex_fields=flex_fields)
7773

7874

79-
def create_individuals(
80-
household: Household,
81-
data: dict[str, Any],
82-
household_name_column: str,
83-
) -> list[Individual]:
75+
def create_individuals(household: Household, data: dict[str, Any], household_label_column: str) -> list[Individual]:
8476
"""Create and associate Individual objects with a given Household.
8577
8678
Args:
8779
household (Household): The household to which the individuals will be linked.
88-
data (dict[str, Any]): A dictionary containing individual details, typically
89-
structured with a prefix for multiple individuals.
90-
household_name_column (str): The key in the individual data used to determine
91-
the household head's name.
80+
data (dict[str, Any]): A dictionary mapping indices to individual details.
81+
household_label_column (str): The key in the individual data used to determine the household label.
9282
9383
Returns:
9484
list[Individual]: A list of successfully created Individual instances.
9585
96-
Raises:
97-
ValueError: If no household head is identified in the provided data.
98-
9986
"""
10087
individuals = []
10188
head_found = False
10289

103-
individuals_data = _collect_by_prefix(data, prefix="individuals_")
104-
for individual in individuals_data.values():
90+
for individual in data.values():
10591
if not head_found:
106-
head_found = _update_household_name_from_individual(household, individual, household_name_column)
107-
fullname_field = next((k for k in individual if k.startswith("given_name")), None)
92+
head_found = _update_household_label_from_individual(household, individual, household_label_column)
10893
individuals.append(
10994
Individual(
11095
batch=household.batch,
11196
household_id=household.pk,
112-
name=individual.get(fullname_field, ""),
97+
name=individual.get("given_name", ""),
11398
flex_fields=individual,
11499
),
115100
)
116-
if not head_found:
117-
raise ValueError(f"No head of household {household.flex_fields} found")
118101
return household.program.individuals.bulk_create(individuals)
119102

120103

@@ -127,28 +110,16 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
127110
128111
Returns:
129112
dict[str, dict[str, Any]]: A dictionary where each key is an index extracted from the original keys,
130-
and each value is a dictionary of the corresponding grouped fields (with normalized field names and,
131-
for specific fields, values converted to uppercase).
132-
133-
Raises:
134-
ValueError: If no matching data is found with the specified prefix.
113+
and each value is a dictionary of the corresponding grouped fields with normalized field names
114+
and, for specific fields, values converted to uppercase. Returns an empty dictionary if no
115+
matching keys are found.
135116
136117
Examples:
137-
>>> data = {
138-
... "user_0_relationship_h_c": "head",
139-
... "user_0_gender_i_c": "male",
140-
... "user_0_other_key": "other",
141-
... "user_1_relationship_h_c": "son_daughter",
142-
... "user_1_gender_i_c": "female",
143-
... "user_1_other_key": "moreover",
144-
... }
118+
>>> data = {"user_0_relationship": "head", "user_0_gender": "male", "user_1_gender": "female"}
145119
>>> _collect_by_prefix(data, "user_")
146-
{'0': {'relationship': 'HEAD', 'gender': 'MALE', 'other_key': 'other'},
147-
'1': {'relationship': 'SON_DAUGHTER', 'gender': 'FEMALE', 'other_key': 'moreover'}}
148-
>>> _collect_by_prefix(data, "nonexistent_")
149-
Traceback (most recent call last):
150-
...
151-
ValueError: No data found with prefix 'nonexistent_'
120+
{'0': {'relationship': 'HEAD', 'gender': 'MALE'}, '1': {'gender': 'FEMALE'}}
121+
>>> _collect_by_prefix(data, "other_")
122+
{}
152123
153124
"""
154125
result = {}
@@ -157,33 +128,29 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
157128
index, field = stripped.split("_", 1)
158129
field_clean = clean_field_name(field)
159130
result.setdefault(index, {})[field_clean] = uppercase_field_value(field_clean, v)
160-
if not result:
161-
raise ValueError(f"No data found with prefix '{prefix}'")
162131
return result
163132

164133

165-
def _update_household_name_from_individual(
166-
household: Household,
167-
individual: dict[str, Any],
168-
household_name_column: str,
134+
def _update_household_label_from_individual(
135+
household: Household, individual: dict[str, Any], household_label_column: str
169136
) -> bool:
170137
"""Update the household's name based on an individual's role and specified name field.
171138
172139
Args:
173140
household (Household): The household instance to update.
174141
individual (dict[str, Any]): A dictionary containing the individual's data,
175142
including relationship status and potential household name.
176-
household_name_column (str): The key in the individual's data that stores
143+
household_label_column (str): The key in the individual's data that stores
177144
the name to assign to the household.
178145
179146
Returns:
180-
bool: True if the household name was updated, False otherwise.
147+
bool: True if the household name was updated (individual is head and name provided), False otherwise.
181148
182149
"""
183-
if any(individual.get(k) == "HEAD" for k in individual if k.startswith("relationship")):
184-
name = individual.get(household_name_column)
185-
if name:
186-
household.name = name
187-
household.save()
188-
return True
150+
is_head = any(individual.get(k) == "HEAD" for k in individual if k.startswith("relationship"))
151+
name = individual.get(household_label_column)
152+
if is_head and name:
153+
household.name = name
154+
household.save(update_fields=["name"])
155+
return True
189156
return False

src/country_workspace/workspaces/admin/program.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def import_aurora(self, request: HttpRequest, program: "CountryProgram") -> "Imp
298298
config={
299299
"batch_name": form.cleaned_data["batch_name"] or BATCH_NAME_DEFAULT,
300300
"registration_reference_pk": registration_reference_pk,
301-
"household_name_column": form.cleaned_data["household_name_column"],
301+
"household_column_prefix": form.cleaned_data["household_column_prefix"],
302+
"individuals_column_prefix": form.cleaned_data["individuals_column_prefix"],
303+
"household_label_column": form.cleaned_data["household_label_column"],
302304
},
303305
)
304306
job.queue()

0 commit comments

Comments
 (0)