microsoft · TheSabari07 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/presidio-analyzer/presidio_analyzer/analyzer_engine.py b/presidio-analyzer/presidio_analyzer/analyzer_engine.py
@@ -157,6 +157,7 @@ def analyze(
         return_decision_process: Optional[bool] = False,
         ad_hoc_recognizers: Optional[List[EntityRecognizer]] = None,
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
         allow_list: Optional[List[str]] = None,
         allow_list_match: Optional[str] = "exact",
         regex_flags: Optional[int] = re.DOTALL | re.MULTILINE | re.IGNORECASE,
@@ -178,11 +179,17 @@ def analyze(
         for this specific request.
         :param context: List of context words to enhance confidence score if matched
         with the recognized entity's recognizer context
+        :param negative_context: List of negative context words to reduce confidence
+        score if matched with the recognized entity's context. Works in addition to
+        recognizer-level negative context configuration.
         :param allow_list: List of words that the user defines as being allowed to keep
         in the text
-        :param allow_list_match: How the allow_list should be interpreted; either as "exact" or as "regex".
-        - If `regex`, results which match with any regex condition in the allow_list would be allowed and not be returned as potential PII.
-        - if `exact`, results which exactly match any value in the allow_list would be allowed and not be returned as potential PII.
+        :param allow_list_match: How the allow_list should be interpreted; either as
+        "exact" or as "regex".
+        - If `regex`, results which match with any regex condition in the allow_list
+          would be allowed and not be returned as potential PII.
+        - if `exact`, results which exactly match any value in the allow_list would be
+          allowed and not be returned as potential PII.
         :param regex_flags: regex flags to be used for when allow_list_match is "regex"
         :param nlp_artifacts: precomputed NlpArtifacts
         :return: an array of the found entities in the text
@@ -197,7 +204,11 @@ def analyze(
         analyzer = AnalyzerEngine()
 
         # Call analyzer to get results
-        results = analyzer.analyze(text='My phone number is 212-555-5555', entities=['PHONE_NUMBER'], language='en')
+        results = analyzer.analyze(
+            text='My phone number is 212-555-5555',
+            entities=['PHONE_NUMBER'],
+            language='en',
+        )
         print(results)
         ```
 
@@ -245,7 +256,7 @@ def analyze(
                 results.extend(current_results)
 
         results = self._enhance_using_context(
-            text, results, nlp_artifacts, recognizers, context
+            text, results, nlp_artifacts, recognizers, context, negative_context
         )
 
         if self.log_decision_process:
@@ -275,6 +286,7 @@ def _enhance_using_context(
         nlp_artifacts: NlpArtifacts,
         recognizers: List[EntityRecognizer],
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
     ) -> List[RecognizerResult]:
         """
         Enhance confidence score using context words.
@@ -287,6 +299,7 @@ def _enhance_using_context(
                               accuracy of the context enhancement process
         :param recognizers: the list of recognizers
         :param context: list of context words
+        :param negative_context: list of negative context words to reduce confidence
         """
         results = []
 
@@ -325,6 +338,7 @@ def _enhance_using_context(
             nlp_artifacts=nlp_artifacts,
             recognizers=recognizers,
             context=context,
+            negative_context=negative_context,
         )
 
         return results

diff --git a/presidio-analyzer/presidio_analyzer/context_aware_enhancers/context_aware_enhancer.py b/presidio-analyzer/presidio_analyzer/context_aware_enhancers/context_aware_enhancer.py
@@ -20,6 +20,8 @@ class ContextAwareEnhancer:
     :param min_score_with_context_similarity: Minimum confidence score
     :param context_prefix_count: how many words before the entity to match context
     :param context_suffix_count: how many words after the entity to match context
+    :param negative_context_penalty: How much to reduce confidence when negative
+      context words are found. Default 0.3. Applied after positive context boost.
     """
 
     MIN_SCORE = 0
@@ -31,11 +33,13 @@ def __init__(
         min_score_with_context_similarity: float,
         context_prefix_count: int,
         context_suffix_count: int,
+        negative_context_penalty: float = 0.3,
     ):
         self.context_similarity_factor = context_similarity_factor
         self.min_score_with_context_similarity = min_score_with_context_similarity
         self.context_prefix_count = context_prefix_count
         self.context_suffix_count = context_suffix_count
+        self.negative_context_penalty = negative_context_penalty
 
     @abstractmethod
     def enhance_using_context(
@@ -45,6 +49,7 @@ def enhance_using_context(
         nlp_artifacts: NlpArtifacts,
         recognizers: List[EntityRecognizer],
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
     ) -> List[RecognizerResult]:
         """
         Update results in case surrounding words are relevant to the context words.
@@ -62,5 +67,6 @@ def enhance_using_context(
                               accuracy of the context enhancement process
         :param recognizers: the list of recognizers
         :param context: list of context words
+        :param negative_context: list of negative context words to reduce confidence
         """
         return raw_results
diff --git a/presidio-analyzer/presidio_analyzer/context_aware_enhancers/lemma_context_aware_enhancer.py b/presidio-analyzer/presidio_analyzer/context_aware_enhancers/lemma_context_aware_enhancer.py
@@ -13,7 +13,10 @@ class LemmaContextAwareEnhancer(ContextAwareEnhancer):
     """
     A class representing a lemma based context aware enhancer logic.
 
-    Context words might enhance confidence score of a recognized entity,
+    Context words might enhance or reduce confidence score of a recognized entity:
+    - Positive context: boosts confidence (e.g., "social" for SSN)
+    - Negative context: reduces confidence (e.g., "test" for SSN)
+
     LemmaContextAwareEnhancer is an implementation of Lemma based context aware logic,
     it compares spacy lemmas of each word in context of the matched entity to given
     context and the recognizer context words,
@@ -30,6 +33,8 @@ class LemmaContextAwareEnhancer(ContextAwareEnhancer):
         - "whole_word": Match context words only as whole words
           (e.g., 'lic' matches 'lic' but not 'duplicate').
           Prevents false positives.
+    :param negative_context_penalty: How much to reduce confidence when negative
+      context words are found. Default 0.3. Applied after positive context boost.
     """
 
     def __init__(
@@ -39,12 +44,14 @@ def __init__(
         context_prefix_count: int = 5,
         context_suffix_count: int = 0,
         context_matching_mode: str = "substring",
+        negative_context_penalty: float = 0.3,
     ):
         super().__init__(
             context_similarity_factor=context_similarity_factor,
             min_score_with_context_similarity=min_score_with_context_similarity,
             context_prefix_count=context_prefix_count,
             context_suffix_count=context_suffix_count,
+            negative_context_penalty=negative_context_penalty,
         )
         if context_matching_mode not in ["whole_word", "substring"]:
             raise ValueError(
@@ -60,6 +67,7 @@ def enhance_using_context(
         nlp_artifacts: NlpArtifacts,
         recognizers: List[EntityRecognizer],
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
     ) -> List[RecognizerResult]:
         """
         Update results in case the lemmas of surrounding words or input context
@@ -78,6 +86,7 @@ def enhance_using_context(
                               accuracy of the context enhancement process
         :param recognizers: the list of recognizers
         :param context: list of context words
+        :param negative_context: list of negative context words to reduce confidence
         """  # noqa: D205,D400
 
         # create a deep copy of the results object, so we can manipulate it
@@ -92,6 +101,12 @@ def enhance_using_context(
         else:
             context = [word.lower() for word in context]
 
+        # Create empty list in None or lowercase all negative context words in the list
+        if not negative_context:
+            negative_context = []
+        else:
+            negative_context = [word.lower() for word in negative_context]
+
         # Sanity
         if nlp_artifacts is None:
             logger.warning("NLP artifacts were not provided")
@@ -119,21 +134,14 @@ def enhance_using_context(
                 continue
 
             # skip recognizer result if the recognizer doesn't support
-            # context enhancement
-            if not recognizer.context:
+            # context enhancement (either positive or negative)
+            if not (recognizer.context or recognizer.negative_context):
                 logger.debug(
                     "recognizer '%s' does not support context enhancement",
                     recognizer.name,
                 )
                 continue
 
-            # skip context enhancement if already boosted by recognizer level
-            if result.recognition_metadata.get(
-                RecognizerResult.IS_SCORE_ENHANCED_BY_CONTEXT_KEY
-            ):
-                logger.debug("result score already boosted, skipping")
-                continue
-
             # extract lemmatized context from the surrounding of the match
             word = text[result.start : result.end]
 
@@ -144,20 +152,55 @@ def enhance_using_context(
             # combine other sources of context with surrounding words
             surrounding_words.extend(context)
 
-            supportive_context_word = self._find_supportive_word_in_context(
-                surrounding_words, recognizer.context, self.context_matching_mode
+            # Check if result was already boosted by recognizer to avoid double boost
+            already_boosted = result.recognition_metadata.get(
+                RecognizerResult.IS_SCORE_ENHANCED_BY_CONTEXT_KEY
             )
-            if supportive_context_word != "":
-                result.score += self.context_similarity_factor
-                result.score = max(result.score, self.min_score_with_context_similarity)
-                result.score = min(result.score, ContextAwareEnhancer.MAX_SCORE)
-
-                # Update the explainability object with context information
-                # helped to improve the score
-                result.analysis_explanation.set_supportive_context_word(
-                    supportive_context_word
+
+            # Apply positive context only if not already boosted
+            if not already_boosted:
+                supportive_context_word = self._find_supportive_word_in_context(
+                    surrounding_words, recognizer.context, self.context_matching_mode
+                )
+                if supportive_context_word != "":
+                    result.score += self.context_similarity_factor
+                    result.score = max(
+                        result.score, self.min_score_with_context_similarity
+                    )
+                    result.score = min(result.score, ContextAwareEnhancer.MAX_SCORE)
+
+                    # Update the explainability object with context information
+                    # helped to improve the score
+                    result.analysis_explanation.set_supportive_context_word(
+                        supportive_context_word
+                    )
+                    result.analysis_explanation.set_improved_score(result.score)
+
+            # Apply negative context penalty if recognizer has negative_context defined
+            # or if negative_context is provided at runtime
+            # This is independent of positive boost to always catch negative context
+            effective_negative_context = []
+            if recognizer.negative_context:
+                effective_negative_context.extend(recognizer.negative_context)
+            if negative_context:
+                effective_negative_context.extend(negative_context)
+
+            if effective_negative_context:
+                negative_context_word = self._find_supportive_word_in_context(
+                    surrounding_words,
+                    effective_negative_context,
+                    self.context_matching_mode,
                 )
-                result.analysis_explanation.set_improved_score(result.score)
+                if negative_context_word != "":
+                    result.score -= self.negative_context_penalty
+                    result.score = max(result.score, ContextAwareEnhancer.MIN_SCORE)
+                    logger.debug(
+                        "Applied negative context penalty for word '%s'",
+                        negative_context_word,
+                    )
+                    # Update explanation to reflect the final score
+                    # after negative penalty
+                    result.analysis_explanation.set_improved_score(result.score)
         return results
 
     @staticmethod

diff --git a/presidio-analyzer/presidio_analyzer/entity_recognizer.py b/presidio-analyzer/presidio_analyzer/entity_recognizer.py
@@ -29,6 +29,8 @@ class EntityRecognizer:
     :param version: the recognizer current version
     :param context: a list of words which can help boost confidence score
     when they appear in context of the matched entity
+    :param negative_context: a list of words which can reduce confidence score
+    when they appear in context of the matched entity
     """
 
     MIN_SCORE = 0
@@ -41,6 +43,7 @@ def __init__(
         supported_language: str = "en",
         version: str = "0.0.1",
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
     ):
         self.supported_entities = supported_entities
 
@@ -54,7 +57,8 @@ def __init__(
         self.supported_language = supported_language
         self.version = version
         self.is_loaded = False
-        self.context = context if context else []
+        self.context = context if context is not None else []
+        self.negative_context = negative_context if negative_context is not None else []
 
         self.load()
         logger.info("Loaded recognizer: %s", self.name)

diff --git a/presidio-analyzer/presidio_analyzer/pattern_recognizer.py b/presidio-analyzer/presidio_analyzer/pattern_recognizer.py
@@ -29,6 +29,8 @@ class PatternRecognizer(LocalRecognizer):
     :param deny_list: A list of words to detect,
     in case our recognizer uses a predefined list of words (deny list)
     :param context: list of context words
+    :param negative_context: a list of words which can reduce confidence score
+    based on the context in which the entity appears
     :param deny_list_score: confidence score for a term
     identified using a deny-list
     :param global_regex_flags: regex flags to be used in regex matching,
@@ -43,6 +45,7 @@ def __init__(
         patterns: List[Pattern] = None,
         deny_list: List[str] = None,
         context: List[str] = None,
+        negative_context: Optional[List[str]] = None,
         deny_list_score: float = 1.0,
         global_regex_flags: Optional[int] = re.DOTALL | re.MULTILINE | re.IGNORECASE,
         version: str = "0.0.1",
@@ -60,6 +63,8 @@ def __init__(
             supported_entities=[supported_entity],
             supported_language=supported_language,
             name=name,
+            context=context,
+            negative_context=negative_context,
             version=version,
         )
         if patterns is None:
@@ -273,6 +278,7 @@ def to_dict(self) -> Dict:
         return_dict["patterns"] = [pat.to_dict() for pat in self.patterns]
         return_dict["deny_list"] = self.deny_list
         return_dict["context"] = self.context
+        return_dict["negative_context"] = self.negative_context
         return_dict["supported_entity"] = return_dict["supported_entities"][0]
         del return_dict["supported_entities"]
 
@@ -289,6 +295,10 @@ def from_dict(cls, entity_recognizer_dict: Dict) -> "PatternRecognizer":
             patterns_list = [Pattern.from_dict(pat) for pat in patterns]
             entity_recognizer_dict["patterns"] = patterns_list
 
+        # Ensure negative_context is safely loaded with a default
+        if "negative_context" not in entity_recognizer_dict:
+            entity_recognizer_dict["negative_context"] = None
+
         # Transform supported_entities (plural) to supported_entity (singular)
         # PatternRecognizer only accepts supported_entity (singular)
         if (

diff --git a/...nalyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py b/...nalyzer/presidio_analyzer/predefined_recognizers/country_specific/us/us_ssn_recognizer.py
@@ -9,6 +9,7 @@ class UsSsnRecognizer(PatternRecognizer):
 
     :param patterns: List of patterns to be used by this recognizer
     :param context: List of context words to increase confidence in detection
+    :param negative_context: List of context words to decrease confidence in detection
     :param supported_language: Language this recognizer supports
     :param supported_entity: The entity this recognizer can detect
     """
@@ -32,20 +33,31 @@ class UsSsnRecognizer(PatternRecognizer):
         "ssid",
     ]
 
+    NEGATIVE_CONTEXT = [
+        "test",
+        "example",
+        "dummy",
+    ]
+
     def __init__(
         self,
         patterns: Optional[List[Pattern]] = None,
         context: Optional[List[str]] = None,
+        negative_context: Optional[List[str]] = None,
         supported_language: str = "en",
         supported_entity: str = "US_SSN",
         name: Optional[str] = None,
     ):
         patterns = patterns if patterns else self.PATTERNS
         context = context if context else self.CONTEXT
+        negative_context = (
+            negative_context if negative_context else self.NEGATIVE_CONTEXT
-        patterns = patterns if patterns else self.PATTERNS
-        context = context if context else self.CONTEXT
-        negative_context = (
-            negative_context if negative_context else self.NEGATIVE_CONTEXT
+        patterns = patterns if patterns is not None else self.PATTERNS
+        context = context if context is not None else self.CONTEXT
+        negative_context = (
+            negative_context
+            if negative_context is not None
+            else self.NEGATIVE_CONTEXT
-        patterns = patterns if patterns else self.PATTERNS
-        context = context if context else self.CONTEXT
-        negative_context = (
-            negative_context if negative_context else self.NEGATIVE_CONTEXT
+        patterns = patterns if patterns is not None else self.PATTERNS
+        context = context if context is not None else self.CONTEXT
+        negative_context = (
+            negative_context
+            if negative_context is not None
+            else self.NEGATIVE_CONTEXT
+        )
         super().__init__(
             supported_entity=supported_entity,
             patterns=patterns,
             context=context,
+            negative_context=negative_context,
             supported_language=supported_language,
             name=name,
         )