11import io
22from base64 import b64encode
3- from enum import Enum
43from collections import defaultdict
54from collections .abc import Iterable , Generator
5+ from enum import StrEnum
66from typing import Any , Mapping , cast , NotRequired
77
88import openpyxl
1212from openpyxl .drawing .image import Image as RDIImage
1313
1414from country_workspace .contrib .kobo .api .data .helpers import VALUE_FORMAT
15+ from country_workspace .datasources .utils import datetime_to_date , date_to_iso_string
1516from country_workspace .models import AsyncJob , Batch , Household , Individual
1617from country_workspace .utils .config import BatchNameConfig , FailIfAlienConfig
1718from country_workspace .utils .fields import Record , clean_field_names
1819from country_workspace .utils .functional import compose
1920from country_workspace .validators .beneficiaries import validate_beneficiaries
20- from country_workspace .datasources .utils import datetime_to_date , date_to_iso_string
21-
2221
2322RDI = str | io .BytesIO
2423Sheet = Iterable [Record ]
@@ -39,10 +38,10 @@ class Config(BatchNameConfig, FailIfAlienConfig):
3938 first_line : int
4039
4140
42- class SheetName (Enum ):
43- HOUSEHOLDS : int = 0
44- INDIVIDUALS : int = 1
45- PEOPLE : int = 2
41+ class SheetName (StrEnum ):
42+ HOUSEHOLDS = "Households"
43+ INDIVIDUALS = "Individuals"
44+ PEOPLE = "People"
4645
4746
4847class ColumnConfigurationError (Exception ):
@@ -65,16 +64,16 @@ def __str__(self) -> str:
6564
6665
6766class SheetNotFoundError (Exception ):
68- def __init__ (self , sheet_indices : int | tuple [int , ...]) -> None :
69- if isinstance (sheet_indices , int ):
70- sheet_indices = (sheet_indices ,)
71- super ().__init__ (sheet_indices )
72- self .sheet_indices = sheet_indices
67+ def __init__ (self , sheet_names : str | tuple [str , ...]) -> None :
68+ if isinstance (sheet_names , str ):
69+ sheet_names = (sheet_names ,)
70+ super ().__init__ (sheet_names )
71+ self .sheet_names = sheet_names
7372
7473 def __str__ (self ) -> str :
75- if len (self .sheet_indices ) == 1 :
76- return f"Sheet with index { self .sheet_indices [0 ]} was not found in the provided file."
77- indices_str = ", " .join (map (str , self .sheet_indices ))
74+ if len (self .sheet_names ) == 1 :
75+ return f"Sheet with index { self .sheet_names [0 ]} was not found in the provided file."
76+ indices_str = ", " .join (map (str , self .sheet_names ))
7877 return f"Sheets with indices { indices_str } were not found in the provided file."
7978
8079
@@ -175,10 +174,10 @@ def image_content(rdi_image: RDIImage) -> tuple[str | None, str]:
175174 return content_type , content
176175
177176
178- def extract_images (filepath : str , * sheet_indices : int ) -> Generator [Mapping [int , Mapping [int , str ]], None , None ]:
177+ def extract_images (filepath : str , * sheet_names : str ) -> Generator [Mapping [int , Mapping [int , str ]], None , None ]:
179178 workbook = openpyxl .load_workbook (filepath )
180- for i in sheet_indices :
181- worksheet = workbook . worksheets [ i ]
179+ for n in sheet_names :
180+ worksheet = workbook [ n ]
182181 images : dict [int , dict [int , str ]] = defaultdict (dict )
183182 for rdi_image in worksheet ._images :
184183 row , column = image_location (rdi_image )
@@ -195,19 +194,19 @@ def merge_images(sheet: Sheet, sheet_images: Mapping[int, Mapping[int, str]]) ->
195194 yield row
196195
197196
198- def read_sheets (config : Config , filepath : str , * sheet_indices : int ) -> Generator [Sheet , None , None ]:
197+ def read_sheets (config : Config , filepath : str , * sheet_names : str ) -> Generator [Sheet , None , None ]:
199198 cell_mapper = compose (datetime_to_date , date_to_iso_string )
200199 try :
201- sheets = open_xls_multi (filepath , sheets = list (sheet_indices ), value_mapper = cell_mapper )
202- sheet_images = extract_images (filepath , * sheet_indices )
200+ sheets = open_xls_multi (filepath , indices_or_names = list (sheet_names ), value_mapper = cell_mapper )
201+ sheet_images = extract_images (filepath , * sheet_names )
203202 for (_ , sheet ), images in zip (sheets , sheet_images , strict = False ):
204203 sheet_with_images = merge_images (sheet , images )
205204 if config ["master_detail" ]:
206205 yield filter_rows_with_household_pk (config , sheet_with_images )
207206 else :
208207 yield sheet_with_images
209208 except IndexError as e :
210- raise SheetNotFoundError (sheet_indices ) from e
209+ raise SheetNotFoundError (sheet_names ) from e
211210
212211
213212def import_from_rdi (job : AsyncJob ) -> dict [str , int ]:
@@ -226,16 +225,14 @@ def import_from_rdi(job: AsyncJob) -> dict[str, int]:
226225
227226
228227def _import_master_detail (job : AsyncJob , batch : Batch , config : dict ) -> dict [str , int ]:
229- household_sheet , individual_sheet = read_sheets (
230- config , job .file , SheetName .HOUSEHOLDS .value , SheetName .INDIVIDUALS .value
231- )
228+ household_sheet , individual_sheet = read_sheets (config , job .file , SheetName .HOUSEHOLDS , SheetName .INDIVIDUALS )
232229 household_mapping = process_households (household_sheet , job , batch , config )
233230 individuals_mapping = process_beneficiaries (individual_sheet , job , batch , config , household_mapping )
234231 validate_beneficiaries (config , household_mapping )
235232 return {"household" : len (household_mapping ), "individual" : len (individuals_mapping )}
236233
237234
238235def _import_people_only (job : AsyncJob , batch : Batch , config : dict ) -> dict [str , int ]:
239- (people_sheet ,) = read_sheets (config , job .file , SheetName .PEOPLE . value )
236+ (people_sheet ,) = read_sheets (config , job .file , SheetName .PEOPLE )
240237 validate_beneficiaries (config , people_mapping := process_beneficiaries (people_sheet , job , batch , config ))
241238 return {"people" : len (people_mapping )}
0 commit comments