Skip to content

Commit ca9a0f4

Browse files
committed
CR fixes
1 parent 4f8479d commit ca9a0f4

File tree

3 files changed

+209
-101
lines changed

3 files changed

+209
-101
lines changed

presidio-analyzer/presidio_analyzer/nlp_engine/nlp_engine_provider.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,37 @@ def _get_full_conf_path(
139139
@staticmethod
140140
def _validate_yaml_config_format(nlp_configuration: Dict) -> None:
141141
"""Validate the YAML configuration file format."""
142-
required_fields = ["nlp_engine_name", "ner_model_configuration", "models"]
143-
for field in required_fields:
144-
if field not in nlp_configuration:
145-
raise ValueError(f"Configuration file is missing '{field}'.")
142+
logger = logging.getLogger("presidio-analyzer")
143+
144+
for key in ("nlp_engine_name", "models"):
145+
if key not in nlp_configuration:
146+
raise ValueError(f"Configuration file is missing '{key}'.")
147+
148+
if nlp_configuration.get("ner_model_configuration"):
149+
return
150+
151+
cfg_langs = {
152+
str(l).lower()
153+
for l in nlp_configuration.get("supported_languages", []) or []
154+
}
155+
156+
recog_langs = {
157+
str(l).lower()
158+
for l in (
159+
nlp_configuration.get("recognizer_registry", {})
160+
.get("supported_languages", [])
161+
or []
162+
)
163+
}
164+
165+
requested_langs = cfg_langs | recog_langs
166+
english_only = not requested_langs or requested_langs == {"en"}
167+
168+
if english_only:
169+
logger.warning("ner_model_configuration is missing, Default English configuration will be used.")
170+
else:
171+
raise ValueError(
172+
"Configuration file is missing 'ner_model_configuration', "
173+
"which is required when requested languages are not only English. "
174+
f"Detected languages: {sorted(requested_langs)}"
175+
)

presidio-analyzer/tests/conf/test_stanza.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ models:
33
-
44
lang_code: en
55
model_name: en
6-
ner_model_configuration:

0 commit comments

Comments
 (0)