22
33from presidio_anonymizer import AnonymizerEngine
44
5+ from llm_guard .exception import LLMGuardValidationError
56from llm_guard .input_scanners .anonymize import (
67 ALL_SUPPORTED_LANGUAGES ,
78 DEFAULT_ENTITY_TYPES ,
@@ -42,6 +43,7 @@ def __init__(
4243 recognizer_conf : NERConfig | None = None ,
4344 threshold : float = 0.5 ,
4445 use_onnx : bool = False ,
46+ language : str = "en" ,
4547 ) -> None :
4648 """
4749 Initializes an instance of the Sensitive class.
@@ -55,6 +57,11 @@ def __init__(
5557 threshold (float): Acceptance threshold. Default is 0.
5658 use_onnx (bool): Use ONNX model for inference. Default is False.
5759 """
60+ if language not in ALL_SUPPORTED_LANGUAGES :
61+ raise LLMGuardValidationError (
62+ f"Language must be in the list of allowed: { ALL_SUPPORTED_LANGUAGES } "
63+ )
64+
5865 if not entity_types :
5966 LOGGER .debug (
6067 "No entity types provided, using default" , default_entity_types = DEFAULT_ENTITY_TYPES
@@ -74,7 +81,10 @@ def __init__(
7481 use_onnx = use_onnx ,
7582 )
7683 self ._analyzer = get_analyzer (
77- transformers_recognizer , get_regex_patterns (regex_patterns ), [], ALL_SUPPORTED_LANGUAGES
84+ transformers_recognizer ,
85+ get_regex_patterns (regex_patterns ),
86+ [],
87+ list (set (["en" , language ])),
7888 )
7989 self ._anonymizer = AnonymizerEngine ()
8090
0 commit comments