1- from typing import Any , Mapping
1+ from typing import Any , Mapping , Final , NotRequired
22
33from django .db .transaction import atomic
44
55from country_workspace .contrib .aurora .client import AuroraClient
66from country_workspace .models import AsyncJob , Batch , Household , Individual
7- from country_workspace .utils .config import BatchNameConfig , FailIfAlienConfig
8- from country_workspace .utils .fields import uppercase_field_value , clean_field_names
7+ from country_workspace .utils .config import BatchNameConfig
8+ from country_workspace .utils .fields import clean_field_names
99
1010
11- class Config (BatchNameConfig , FailIfAlienConfig ):
11+ class Config (BatchNameConfig ):
1212 registration_reference_pk : str | None
13- household_column_prefix : str
13+ master_detail : bool
14+ household_column_prefix : NotRequired [str ]
1415 individuals_column_prefix : str
15- household_label_column : str
16+ household_label_column : NotRequired [str ]
17+
18+
19+ RELATIONSHIP_HEAD : Final [str ] = "HEAD"
20+ RELATIONSHIP_FIELDNAME : Final [str ] = "relationship"
1621
1722
1823def import_from_aurora (job : AsyncJob ) -> dict [str , int ]:
1924 """Import data from the Aurora system into the database within an atomic transaction.
2025
2126 Args:
2227 job (AsyncJob): The job instance containing the configuration and context for data import.
23- Expected keys in `job.config`:
24- - "batch_name" (str): The name for the newly created batch.
25- - "registration_reference_pk" (int): The unique identifier of the registration to import.
26- - "household_column_prefix" (str, optional): The prefix for household-related columns.
27- - "individuals_column_prefix" (str, optional): The prefix for individual-related columns.
28- - "household_label_column" (str, optional): The column name used to determine the household label.
28+ Expected keys in `job.config` correspond to the `Config` TypedDict.
2929
3030 Returns:
31- dict[str, int]: A dictionary with the counts of successfully created records:
32- - "households": The number of households imported.
33- - "individuals": The total number of individuals imported.
31+ dict[str, int]: Counts of imported records:
32+ - "households": Number of households imported (0 if `master_detail` is False or None) .
33+ - "individuals": Total number of individuals imported.
3434
3535 """
36- config : Config = job .config
37- total_hh = total_ind = 0
38- batch = Batch .objects .create (
39- name = config ["batch_name" ],
40- program = job .program ,
41- country_office = job .program .country_office ,
42- imported_by = job .owner ,
43- source = Batch .BatchSource .AURORA ,
44- )
45- client = AuroraClient ()
4636 with atomic ():
47- for record in client .get (f"registration/{ config ['registration_reference_pk' ]} /records/" ):
48- inds_data = _collect_by_prefix (record ["flatten" ], config .get ("individuals_column_prefix" ))
49- if inds_data :
50- hh = create_household (batch , record ["flatten" ], config .get ("household_column_prefix" ))
51- total_hh += 1
52- total_ind += len (
53- create_individuals (
54- household = hh ,
55- data = inds_data ,
56- household_label_column = config .get ("household_label_column" ),
57- )
58- )
59- return {"households" : total_hh , "individuals" : total_ind }
37+ total = {"households" : 0 , "individuals" : 0 }
38+ cfg : Config = job .config
39+
40+ batch = Batch .objects .create (
41+ name = cfg ["batch_name" ],
42+ program = job .program ,
43+ country_office = job .program .country_office ,
44+ imported_by = job .owner ,
45+ source = Batch .BatchSource .AURORA ,
46+ )
47+
48+ client = AuroraClient ()
49+ for record in client .get (f"registration/{ cfg ['registration_reference_pk' ]} /records/" ):
50+ individuals = create_individuals (batch , record ["flatten" ], cfg )
51+ total ["individuals" ] += len (individuals )
52+ if cfg ["master_detail" ] and individuals and individuals [0 ].household_id :
53+ total ["households" ] += 1
54+
55+ return total
6056
6157
6258def create_household (batch : Batch , data : dict [str , Any ], prefix : str ) -> Household :
63- """
64- Create a Household object from the provided data and associate it with a batch.
59+ """Create a Household object from the provided data and associate it with a batch.
6560
6661 Args:
6762 batch (Batch): The batch to which the household will be linked.
@@ -75,41 +70,50 @@ def create_household(batch: Batch, data: dict[str, Any], prefix: str) -> Househo
7570 ValueError: If multiple household entries are found in the provided data.
7671
7772 """
78- flex_fields = _collect_by_prefix (data , prefix )
79- if len (flex_fields ) > 1 :
73+ hh_data = _collect_by_prefix (data , prefix )
74+ if len (hh_data ) > 1 :
8075 raise ValueError ("Multiple households found" )
81- flex_fields = next (iter (flex_fields .values ()), {})
82- return batch .program .households .create (batch = batch , flex_fields = clean_field_names ( flex_fields ) )
76+ flex_fields = clean_field_names ( next (iter (hh_data .values ()), {}) )
77+ return batch .program .households .create (batch = batch , flex_fields = flex_fields )
8378
8479
85- def create_individuals (household : Household , data : dict [str , Any ], household_label_column : str ) -> list [Individual ]:
86- """Create and associate Individual objects with a given Household.
80+ def create_individuals (
81+ batch : Batch ,
82+ data : dict [str , Any ],
83+ cfg : Config ,
84+ ) -> list [Individual ]:
85+ """Create and associate Individual objects with an optional Household.
8786
8887 Args:
89- household (Household ): The household to which the individuals will be linked.
90- data (dict[str, Any]): A dictionary mapping indices to individual details .
91- household_label_column (str ): The key in the individual data used to determine the household label .
88+ batch (Batch ): The batch to which individuals will be linked.
89+ data (dict[str, Any]): A dictionary containing related information .
90+ cfg (Config ): Configuration dictionary containing various settings for the import process .
9291
9392 Returns:
9493 list[Individual]: A list of successfully created Individual instances.
9594
9695 """
97- individuals = []
96+ household , individuals = None , []
9897 head_found = False
9998
100- for raw_individual in data .values ():
101- individual = clean_field_names (raw_individual )
102- if not head_found :
103- head_found = _update_household_label_from_individual (household , individual , household_label_column )
99+ inds_data = _collect_by_prefix (data , cfg .get ("individuals_column_prefix" ))
100+
101+ if inds_data and cfg ["master_detail" ] and (hh_prefix := cfg .get ("household_column_prefix" )):
102+ household = create_household (batch , data , hh_prefix )
103+
104+ for ind_data in inds_data .values ():
105+ flex_fields = clean_field_names (ind_data )
106+ if household and (hh_label := cfg .get ("household_label_column" )) and not head_found :
107+ head_found = _update_household_label_from_individual (household , flex_fields , hh_label )
104108 individuals .append (
105109 Individual (
106- batch = household . batch ,
107- household_id = household .pk ,
108- name = individual .get ("given_name" , "" ),
109- flex_fields = individual ,
110- ),
110+ batch = batch ,
111+ household_id = household .pk if household else None ,
112+ name = flex_fields .get ("given_name" , "" ),
113+ flex_fields = flex_fields ,
114+ )
111115 )
112- return household .program .individuals .bulk_create (individuals )
116+ return batch .program .individuals .bulk_create (individuals , batch_size = 1000 )
113117
114118
115119def _collect_by_prefix (data : dict [str , Any ], prefix : str ) -> dict [str , dict [str , Any ]]:
@@ -125,6 +129,9 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
125129 and, for specific fields, values converted to uppercase. Returns an empty dictionary if no
126130 matching keys are found.
127131
132+ Raises:
133+ ValueError: If a key with the specified prefix does not contain an underscore after the prefix.
134+
128135 Examples:
129136 >>> data = {"user_0_relationship": "head", "user_0_gender": "male", "user_1_gender": "female"}
130137 >>> _collect_by_prefix(data, "user_")
@@ -136,19 +143,22 @@ def _collect_by_prefix(data: dict[str, Any], prefix: str) -> dict[str, dict[str,
136143 result = {}
137144 for k , v in data .items ():
138145 if (stripped := k .removeprefix (prefix )) != k :
139- index , field = stripped .split ("_" , 1 )
140- result .setdefault (index , {})[field ] = uppercase_field_value (field , v )
146+ try :
147+ index , field = stripped .split ("_" , 1 )
148+ result .setdefault (index , {})[field ] = v
149+ except ValueError :
150+ raise ValueError (f"Field name '{ k } ' after removing prefix '{ prefix } ' must contain an underscore." )
141151 return result
142152
143153
144154def _update_household_label_from_individual (
145- household : Household , individual : Mapping [str , Any ], household_label_column : str
155+ household : Household , ind_data : Mapping [str , Any ], household_label_column : str
146156) -> bool :
147157 """Update the household's name based on an individual's role and specified name field.
148158
149159 Args:
150160 household (Household): The household instance to update.
151- individual (dict[str, Any]): A dictionary containing the individual's data,
161+ ind_data (dict[str, Any]): A dictionary containing the individual's data,
152162 including relationship status and potential household name.
153163 household_label_column (str): The key in the individual's data that stores
154164 the name to assign to the household.
@@ -157,8 +167,8 @@ def _update_household_label_from_individual(
157167 bool: True if the household name was updated (individual is head and name provided), False otherwise.
158168
159169 """
160- is_head = any (individual .get (k , "" ). upper () == "HEAD" for k in individual if k . startswith ( "relationship" ) )
161- name = individual .get (household_label_column )
170+ is_head = any (ind_data .get (k ) == RELATIONSHIP_HEAD for k in ind_data if k == RELATIONSHIP_FIELDNAME )
171+ name = ind_data .get (household_label_column )
162172 if is_head and name :
163173 household .name = name
164174 household .save (update_fields = ["name" ])
0 commit comments