Skip to content

Commit 92dac9c

Browse files
authored
🐛(ingestion-infrastructure) make ministry mapping more robust (#548)
* 🐛(ingestion-infrastructure) make ministry mapping more robust * 📝(ingestion-infrastructure) delete docstring
1 parent 1cbd37f commit 92dac9c

2 files changed

Lines changed: 43 additions & 32 deletions

File tree

src/web/domain/value_objects/ministry.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
"""Ministry value object."""
2-
31
from enum import Enum
42

53

64
class Ministry(Enum):
7-
"""Enumeration of ministry types."""
8-
95
MAA = "MAA"
106
MESRI = "MESRI"
117
MEF = "MEF"
@@ -29,5 +25,4 @@ class Ministry(Enum):
2925
IGN = "IGN"
3026

3127
def __str__(self):
32-
"""Return string representation."""
3328
return self.value

src/web/infrastructure/gateways/ingestion/concours_cleaner.py

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
"""Concours cleaner adapter."""
2-
1+
import unicodedata
32
from datetime import datetime
4-
from typing import List, Optional
3+
from typing import Dict, List, Optional, Tuple
54

65
import polars as pl
76
from django.utils import timezone
@@ -262,34 +261,51 @@ def _map_category(self, category_str: Optional[str]) -> Optional[Category]:
262261
else:
263262
return Category.HORS_CATEGORIE
264263

264+
@staticmethod
265+
def _normalize(text: str) -> str:
266+
nfkd = unicodedata.normalize("NFKD", text)
267+
return nfkd.encode("ascii", "ignore").decode("ascii").lower()
268+
265269
def _map_ministry(self, ministry_str: Optional[str]) -> Ministry:
266270
if not ministry_str:
267-
raise InvalidMinistryError("Unknown minnistry")
268-
269-
# Direct mappings for known ministry names
270-
ministry_mappings = {
271-
"Météo France": Ministry.METEO_FRANCE,
272-
"Ministère de la Culture": Ministry.MC,
273-
"Ministère de l'Europe et des Affaires Etrangères": Ministry.MEAE,
274-
"Premier ministre": Ministry.PREMIER_MINISTRE,
275-
"Ministère de l'Économie, des Finances et de la Souveraineté industrielle et numérique": Ministry.MEF, # noqa: E501
276-
"Ministère de l'Agriculture et de la Souveraineté alimentaire": Ministry.MAA, # noqa: E501
277-
"Ministère de la Transition écologique et de la Cohésion des territoires": Ministry.MTE, # noqa: E501
278-
"Ministère de l'Enseignement supérieur et de la Recherche": Ministry.MESRI,
279-
"Ministère de l'Education Nationale et de la Jeunesse": Ministry.MEN,
280-
"Ministère du Travail, du Plein emploi et de l'Insertion": Ministry.MTEI,
281-
"Ministère de la Justice": Ministry.MJ,
282-
"Ministère Solidarités et Santé": Ministry.MSS,
283-
"Ministère de l'Intérieur et des Outre-mer": Ministry.MI,
284-
"Conseil d'Etat": Ministry.CONSEIL_ETAT,
285-
"Caisse des Dépôts et Consignations": Ministry.CAISSE_DES_DEPOTS_ET_CONSIGNATIONS, # noqa: E501
286-
"Cour des comptes": Ministry.COUR_COMPTES,
271+
raise InvalidMinistryError("Unknown ministry")
272+
273+
# Fuzzy matching by normalized keywords for robustness
274+
# against case, accent and wording variations
275+
ministry_keywords: Dict[Ministry, Tuple[str, ...]] = {
276+
Ministry.METEO_FRANCE: ("meteo",),
277+
Ministry.MC: ("culture",),
278+
Ministry.MEAE: ("europe", "etrangeres"),
279+
Ministry.PREMIER_MINISTRE: ("premier",),
280+
Ministry.MEF: ("economie", "finances"),
281+
Ministry.MAA: ("agriculture", "alimentaire"),
282+
Ministry.MTE: ("ecologique", "cohesion"),
283+
Ministry.MESRI: ("recherche", "enseignement superieur"),
284+
Ministry.MEN: ("education", "jeunesse"),
285+
Ministry.MTEI: ("travail", "plein emploi", "insertion"),
286+
Ministry.MJ: ("justice",),
287+
Ministry.MSS: ("sante", "solidarites"),
288+
Ministry.MI: ("interieur",),
289+
Ministry.CONSEIL_ETAT: ("conseil d'etat", "conseil"),
290+
Ministry.CAISSE_DES_DEPOTS_ET_CONSIGNATIONS: ("caisse", "depots"),
291+
Ministry.COUR_COMPTES: ("cour", "comptes"),
292+
Ministry.MAA: ("armees",),
287293
}
288294

289-
if ministry_str in ministry_mappings:
290-
return ministry_mappings[ministry_str]
291-
else:
292-
raise InvalidMinistryError("Unknown minnistry")
295+
normalized_input = self._normalize(ministry_str)
296+
best_match: Optional[Ministry] = None
297+
best_score = 0
298+
299+
for ministry, keywords in ministry_keywords.items():
300+
score = sum(1 for kw in keywords if kw in normalized_input)
301+
if score > best_score:
302+
best_score = score
303+
best_match = ministry
304+
305+
if best_match is not None and best_score > 0:
306+
return best_match
307+
308+
raise InvalidMinistryError(f"Unknown ministry: {ministry_str}")
293309

294310
def _map_access_modalities(
295311
self, access_mod_list: List[str]

0 commit comments

Comments
 (0)