Skip to content

Commit f243ddd

Browse files
authored
Add min_candidate_length parameter to PhoneNumberMatcher (#319)
1 parent 4212443 commit f243ddd

File tree

3 files changed

+41
-7
lines changed

3 files changed

+41
-7
lines changed

python/phonenumbers/phonenumbermatcher.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,8 @@ class PhoneNumberMatcher(object):
456456
_DONE = 2
457457

458458
def __init__(self, text, region,
459-
leniency=Leniency.VALID, max_tries=65535):
459+
leniency=Leniency.VALID, max_tries=65535,
460+
min_candidate_length=1):
460461
"""Creates a new instance.
461462
462463
Arguments:
@@ -471,6 +472,9 @@ def __init__(self, text, region,
471472
max_tries -- The maximum number of invalid numbers to try before
472473
giving up on the text. This is to cover degenerate cases where
473474
the text has a lot of false positives in it. Must be >= 0.
475+
min_candidate_length -- The minimum length of a candidate phone number.
476+
Can be used to quickly skip candidates that are too short to be valid,
477+
depending on your use-case needs.
474478
"""
475479
if leniency is None:
476480
raise ValueError("Need a leniency value")
@@ -487,6 +491,8 @@ def __init__(self, text, region,
487491
self.leniency = leniency
488492
# The maximum number of retries after matching an invalid number.
489493
self._max_tries = int(max_tries)
494+
# The minimum length of a candidate phone number.
495+
self._min_candidate_length = int(min_candidate_length)
490496
# The iteration tristate.
491497
self._state = PhoneNumberMatcher._NOT_READY
492498
# The last successful match, None unless in state _READY
@@ -513,13 +519,18 @@ def _find(self, index):
513519
# 123 45 67 / 68).
514520
candidate = self._trim_after_first_match(_SECOND_NUMBER_START_PATTERN,
515521
candidate)
522+
candidate_len = len(candidate)
523+
524+
# UPSTREAM DIVERGENCE: The min_candidate_length is Python-specific
525+
# feature, not present in the upstream Java version.
526+
if candidate_len >= self._min_candidate_length:
527+
match = self._extract_match(candidate, start)
528+
if match is not None:
529+
return match
530+
self._max_tries -= 1
516531

517-
match = self._extract_match(candidate, start)
518-
if match is not None:
519-
return match
520532
# Move along
521-
index = start + len(candidate)
522-
self._max_tries -= 1
533+
index = start + candidate_len
523534
match = _PATTERN.search(self.text, index)
524535
return None
525536

python/phonenumbers/phonenumbermatcher.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ class PhoneNumberMatcher:
5656
preferred_region: str | None
5757
leniency: int
5858
_max_tries: int
59+
_min_candidate_length: int
5960
_state: int
6061
_last_match: PhoneNumberMatch | None
6162
_search_index: int
62-
def __init__(self, text: str | None, region: str | None, leniency: int = ..., max_tries: int = ...) -> None: ...
63+
def __init__(self, text: str | None, region: str | None, leniency: int = ..., max_tries: int = ..., min_candidate_length: int = ...) -> None: ...
6364
def _find(self, index: int) -> PhoneNumberMatch | None: ...
6465
def _trim_after_first_match(self, pattern: Pattern[str], candidate: str) -> str: ...
6566
@classmethod

python/tests/phonenumbermatchertest.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,3 +988,25 @@ def testInternals(self):
988988
num_format = NumberFormat(pattern="(\\d{3})(\\d{3})(\\d{4})", format="\\1-\\2-\\3")
989989
self.assertEqual(["650", "253", "0000"],
990990
_get_national_number_groups(us_number, num_format))
991+
992+
def testMinCandidateLengthFiltersShortNumbers(self):
993+
# Python-specific test: min_candidate_length parameter
994+
text = "Call +1800-123-4567 or 415-666-7777 for help"
995+
# With min_candidate_length=13, the short candidate should be skipped
996+
matcher = PhoneNumberMatcher(text, "US", Leniency.POSSIBLE, 65535, min_candidate_length=13)
997+
match = matcher.next() if matcher.has_next() else None
998+
self.assertIsNotNone(match)
999+
self.assertEqual("+1800-123-4567", match.raw_string)
1000+
# Should be no more matches
1001+
self.assertFalse(matcher.has_next())
1002+
1003+
def testMinCandidateLengthDoesNotConsumeMaxTries(self):
1004+
# Python-specific test: skipped short candidates don't consume max_tries
1005+
# Text with 5 short candidates followed by one valid number
1006+
text = "Try 123, 456, 789, 012, 345, then call 415-666-7777"
1007+
# With max_tries=1, if short candidates consumed tries, we'd fail to find the valid number
1008+
# But with min_candidate_length=10, short candidates are skipped without consuming tries
1009+
matcher = PhoneNumberMatcher(text, "US", Leniency.VALID, max_tries=1, min_candidate_length=10)
1010+
match = matcher.next() if matcher.has_next() else None
1011+
self.assertIsNotNone(match)
1012+
self.assertEqual("415-666-7777", match.raw_string)

0 commit comments

Comments
 (0)