@@ -139,7 +139,37 @@ def _get_full_conf_path(
139
139
@staticmethod
140
140
def _validate_yaml_config_format (nlp_configuration : Dict ) -> None :
141
141
"""Validate the YAML configuration file format."""
142
- required_fields = ["nlp_engine_name" , "ner_model_configuration" , "models" ]
143
- for field in required_fields :
144
- if field not in nlp_configuration :
145
- raise ValueError (f"Configuration file is missing '{ field } '." )
142
+ logger = logging .getLogger ("presidio-analyzer" )
143
+
144
+ for key in ("nlp_engine_name" , "models" ):
145
+ if key not in nlp_configuration :
146
+ raise ValueError (f"Configuration file is missing '{ key } '." )
147
+
148
+ if nlp_configuration .get ("ner_model_configuration" ):
149
+ return
150
+
151
+ cfg_langs = {
152
+ str (l ).lower ()
153
+ for l in nlp_configuration .get ("supported_languages" , []) or []
154
+ }
155
+
156
+ recog_langs = {
157
+ str (l ).lower ()
158
+ for l in (
159
+ nlp_configuration .get ("recognizer_registry" , {})
160
+ .get ("supported_languages" , [])
161
+ or []
162
+ )
163
+ }
164
+
165
+ requested_langs = cfg_langs | recog_langs
166
+ english_only = not requested_langs or requested_langs == {"en" }
167
+
168
+ if english_only :
169
+ logger .warning ("ner_model_configuration is missing, Default English configuration will be used." )
170
+ else :
171
+ raise ValueError (
172
+ "Configuration file is missing 'ner_model_configuration', "
173
+ "which is required when requested languages are not only English. "
174
+ f"Detected languages: { sorted (requested_langs )} "
175
+ )
0 commit comments