|
1 | | -"""Concours cleaner adapter.""" |
2 | | - |
| 1 | +import unicodedata |
3 | 2 | from datetime import datetime |
4 | | -from typing import List, Optional |
| 3 | +from typing import Dict, List, Optional, Tuple |
5 | 4 |
|
6 | 5 | import polars as pl |
7 | 6 | from django.utils import timezone |
@@ -262,34 +261,51 @@ def _map_category(self, category_str: Optional[str]) -> Optional[Category]: |
262 | 261 | else: |
263 | 262 | return Category.HORS_CATEGORIE |
264 | 263 |
|
| 264 | + @staticmethod |
| 265 | + def _normalize(text: str) -> str: |
| 266 | + nfkd = unicodedata.normalize("NFKD", text) |
| 267 | + return nfkd.encode("ascii", "ignore").decode("ascii").lower() |
| 268 | + |
265 | 269 | def _map_ministry(self, ministry_str: Optional[str]) -> Ministry: |
266 | 270 | if not ministry_str: |
267 | | - raise InvalidMinistryError("Unknown minnistry") |
268 | | - |
269 | | - # Direct mappings for known ministry names |
270 | | - ministry_mappings = { |
271 | | - "Météo France": Ministry.METEO_FRANCE, |
272 | | - "Ministère de la Culture": Ministry.MC, |
273 | | - "Ministère de l'Europe et des Affaires Etrangères": Ministry.MEAE, |
274 | | - "Premier ministre": Ministry.PREMIER_MINISTRE, |
275 | | - "Ministère de l'Économie, des Finances et de la Souveraineté industrielle et numérique": Ministry.MEF, # noqa: E501 |
276 | | - "Ministère de l'Agriculture et de la Souveraineté alimentaire": Ministry.MAA, # noqa: E501 |
277 | | - "Ministère de la Transition écologique et de la Cohésion des territoires": Ministry.MTE, # noqa: E501 |
278 | | - "Ministère de l'Enseignement supérieur et de la Recherche": Ministry.MESRI, |
279 | | - "Ministère de l'Education Nationale et de la Jeunesse": Ministry.MEN, |
280 | | - "Ministère du Travail, du Plein emploi et de l'Insertion": Ministry.MTEI, |
281 | | - "Ministère de la Justice": Ministry.MJ, |
282 | | - "Ministère Solidarités et Santé": Ministry.MSS, |
283 | | - "Ministère de l'Intérieur et des Outre-mer": Ministry.MI, |
284 | | - "Conseil d'Etat": Ministry.CONSEIL_ETAT, |
285 | | - "Caisse des Dépôts et Consignations": Ministry.CAISSE_DES_DEPOTS_ET_CONSIGNATIONS, # noqa: E501 |
286 | | - "Cour des comptes": Ministry.COUR_COMPTES, |
| 271 | + raise InvalidMinistryError("Unknown ministry") |
| 272 | + |
| 273 | + # Fuzzy matching by normalized keywords for robustness |
| 274 | + # against case, accent and wording variations |
| 275 | + ministry_keywords: Dict[Ministry, Tuple[str, ...]] = { |
| 276 | + Ministry.METEO_FRANCE: ("meteo",), |
| 277 | + Ministry.MC: ("culture",), |
| 278 | + Ministry.MEAE: ("europe", "etrangeres"), |
| 279 | + Ministry.PREMIER_MINISTRE: ("premier",), |
| 280 | + Ministry.MEF: ("economie", "finances"), |
| 281 | + Ministry.MAA: ("agriculture", "alimentaire"), |
| 282 | + Ministry.MTE: ("ecologique", "cohesion"), |
| 283 | + Ministry.MESRI: ("recherche", "enseignement superieur"), |
| 284 | + Ministry.MEN: ("education", "jeunesse"), |
| 285 | + Ministry.MTEI: ("travail", "plein emploi", "insertion"), |
| 286 | + Ministry.MJ: ("justice",), |
| 287 | + Ministry.MSS: ("sante", "solidarites"), |
| 288 | + Ministry.MI: ("interieur",), |
| 289 | + Ministry.CONSEIL_ETAT: ("conseil d'etat", "conseil"), |
| 290 | + Ministry.CAISSE_DES_DEPOTS_ET_CONSIGNATIONS: ("caisse", "depots"), |
| 291 | + Ministry.COUR_COMPTES: ("cour", "comptes"), |
| 292 | + Ministry.MAA: ("armees",), |
287 | 293 | } |
288 | 294 |
|
289 | | - if ministry_str in ministry_mappings: |
290 | | - return ministry_mappings[ministry_str] |
291 | | - else: |
292 | | - raise InvalidMinistryError("Unknown minnistry") |
| 295 | + normalized_input = self._normalize(ministry_str) |
| 296 | + best_match: Optional[Ministry] = None |
| 297 | + best_score = 0 |
| 298 | + |
| 299 | + for ministry, keywords in ministry_keywords.items(): |
| 300 | + score = sum(1 for kw in keywords if kw in normalized_input) |
| 301 | + if score > best_score: |
| 302 | + best_score = score |
| 303 | + best_match = ministry |
| 304 | + |
| 305 | + if best_match is not None and best_score > 0: |
| 306 | + return best_match |
| 307 | + |
| 308 | + raise InvalidMinistryError(f"Unknown ministry: {ministry_str}") |
293 | 309 |
|
294 | 310 | def _map_access_modalities( |
295 | 311 | self, access_mod_list: List[str] |
|
0 commit comments