Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ All notable changes to this project will be documented in this file.

## [unreleased]


### Analyzer
#### Added
- Swedish PII recognizers for `SE_PERSONNUMMER` to identify Swedish Personal ID Numbers using pattern match and checksum. The recognizer also supports Swedish coordination numbers (samordningsnummer), issued to individuals who are not registered residents in Sweden but require identification. All disabled by default.

- German PII recognizers for `DE_TAX_ID` (Steueridentifikationsnummer, §§ 139a–139e AO, ISO 7064 Mod 11,10 checksum), `DE_TAX_NUMBER` (Steuernummer, § 139a AO, ELSTER and slash formats), `DE_PASSPORT` (Reisepassnummer, PassG § 4, ICAO Doc 9303), `DE_ID_CARD` (Personalausweisnummer, PAuswG), `DE_SOCIAL_SECURITY` (Rentenversicherungsnummer, § 147 SGB VI, DRV checksum), `DE_HEALTH_INSURANCE` (Krankenversicherungsnummer/KVNR, § 290 SGB V, GKV checksum), `DE_KFZ` (KFZ-Kennzeichen, FZV § 8), `DE_HANDELSREGISTER` (Handelsregisternummer HRA/HRB, §§ 9/14 HGB), and `DE_PLZ` (Postleitzahl, very low base confidence, context-only). All disabled by default.

- Added recognizer for Swedish Organisationsnummer, ID number for all Swedish oragnisations.

## [2.2.362] - 2026-03-15
### General
#### Added
Expand Down
1 change: 1 addition & 0 deletions docs/supported_entities.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ For more information, refer to the [adding new recognizers documentation](analyz
### Sweden
| FieldType | Description | Detection Method |
|------------|---------------------------------------------------------------------------------------------------------|------------------------------------------|
| SE_ORGANISATIONSNUMMER | The Swedish Organisations ID Number is a unique 10-digit number issued to all Swedish organisations. | Pattern match, context, and checksum. |
| SE_PERSONNUMMER | The Swedish Personal ID Number is a unique 10/12-digit number issued to all Swedish residents. The recognizer also supports Samordningsnummer (coordination numbers) issued to individuals who are not (yet) registered residents but need a Swedish identifier (e.g., temporary workers, students). | Pattern match, context, and checksum. |

### Thai
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ recognizers:
type: predefined
enabled: false

- name: SeOrganisationsnummerRecognizer
supported_languages:
- sv
type: predefined
enabled: false

- name: SePersonnummerRecognizer
supported_languages:
- sv
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
from .country_specific.spain.es_nif_recognizer import EsNifRecognizer

# Sweden recognizers
from .country_specific.sweden.se_organisationsnummer_recognizer import (
SeOrganisationsnummerRecognizer,
)
from .country_specific.sweden.se_personnummer_recognizer import SePersonnummerRecognizer

# Thai recognizers
Expand Down Expand Up @@ -213,6 +216,7 @@
"KrRrnRecognizer",
"KrDriverLicenseRecognizer",
"KrFrnRecognizer",
"SeOrganisationsnummerRecognizer",
"ThTninRecognizer",
"SePersonnummerRecognizer",
"LangExtractRecognizer",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Sweden-specific recognizers."""

from .se_organisationsnummer_recognizer import SeOrganisationsnummerRecognizer
from .se_personnummer_recognizer import SePersonnummerRecognizer

__all__ = [
"SeOrganisationsnummerRecognizer",
"SePersonnummerRecognizer",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""Swedish Organisationsnummer recognizer."""

from __future__ import annotations

from typing import List, Optional

from presidio_analyzer import Pattern, PatternRecognizer


class SeOrganisationsnummerRecognizer(PatternRecognizer):
"""Recognizes and validates Swedish Organisationsnummer.

Rules:
* 10 digits: NNNNNNXXXX (optional hyphen)
* Third digit >= 2
* Luhn checksum validation
"""

PATTERNS = [
Pattern(
"Swedish Organisationsnummer (Medium)",
r"\b\d{6}[-]?\d{4}\b",
0.6,
),
Pattern(
"Swedish Organisationsnummer (Weak)",
r"\d{6}[-]?\d{4}",
0.2,
),
]

CONTEXT = [
"organisationsnummer",
"orgnr",
"org nr",
"företagsnummer",
]

def __init__(
self,
patterns: Optional[List[Pattern]] = None,
context: Optional[List[str]] = None,
supported_language: str = "sv",
supported_entity: str = "SE_ORGANISATIONSNUMMER",
name: Optional[str] = None,
):
patterns = patterns if patterns else self.PATTERNS
context = context if context else self.CONTEXT

super().__init__(
supported_entity=supported_entity,
patterns=patterns,
context=context,
supported_language=supported_language,
name=name,
)

# ---------------------------------------------------------
# Helpers
# ---------------------------------------------------------
@staticmethod
def _numeric_part(orgnr: str) -> str:
"""Extract only digits."""
return "".join(filter(str.isdigit, orgnr))

@staticmethod
def _is_luhn_valid(number: str) -> bool:
"""Validate using Luhn algorithm."""
digits = [int(d) for d in number]
checksum = digits[-1]

luhn_sum = 0
for i, d in enumerate(reversed(digits[:-1])):
if i % 2 == 0:
d *= 2
if d > 9:
d -= 9
luhn_sum += d

return (luhn_sum + checksum) % 10 == 0

@staticmethod
def _has_valid_third_digit(number: str) -> bool:
"""Third digit must be >= 2."""
try:
return int(number[2]) >= 2
except (ValueError, IndexError):
return False

# ---------------------------------------------------------
# Validation
# ---------------------------------------------------------
def validate_result(self, pattern_text: str) -> Optional[bool]:
"""Validate Organisationsnummer."""

num = self._numeric_part(pattern_text)

if len(num) != 10:
return False

# Rule: third digit >= 2
if not self._has_valid_third_digit(num):
return False

# Luhn check
return self._is_luhn_valid(num)
71 changes: 71 additions & 0 deletions presidio-analyzer/tests/test_se_organisationsnummer_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pytest

from tests import assert_result
from presidio_analyzer.predefined_recognizers import SeOrganisationsnummerRecognizer


@pytest.fixture(scope="module")
def recognizer():
"""Return an instance of the SeOrganisationsnummerRecognizer."""
return SeOrganisationsnummerRecognizer()


@pytest.fixture(scope="module")
def entities():
"""Return entities to analyze."""
return ["SE_ORGANISATIONSNUMMER"]


@pytest.mark.parametrize(
"text, expected_len, expected_positions",
[
# Valid Swedish organisationsnummer.
(
"212000-0142",
1,
((0, 11),),
),
(
"Our company identity code is: 212000-0142. Thank you.",
1,
((30, 41),),
),
(
"2120000142",
1,
((0, 10),),
),
(
"556703-7485",
1,
((0, 11),),
),
(
"5567037485",
1,
((0, 10),),
),
(
"556703-7485 är vårt orgnummer.",
1,
((0, 11),),
),
(
"556703-7485 tillhör vårt företag.",
1,
((0, 11),),
),
# invalid Organisationsnummer scores
("19000309-3393", 0, ()),
("19001309-2393", 0, ()),
("55670x-7485", 0, ()),
("556703-7r85", 0, ()),],
)
def test_when_all_swedish_organisationsnummer_then_succeed(
text, expected_len, expected_positions, recognizer, entities, max_score
):
"""Tests our recognizer against valid & invalid Swedish organisationsnummer."""
results = recognizer.analyze(text, entities)
assert len(results) == expected_len
for res, (st_pos, fn_pos) in zip(results, expected_positions):
assert_result(res, entities[0], st_pos, fn_pos, max_score)