Skip to content

Commit 6fb38df

Browse files
committed
✨ use rapidfuzz again
1 parent 84f7999 commit 6fb38df

File tree

8 files changed

+123
-22
lines changed

8 files changed

+123
-22
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ defity = "<2"
1414
defusedxml = "<2"
1515
dill = "<2"
1616
ecs-logging = "==2.*"
17-
editdistancek-rs = "==1.*"
1817
elastic-apm = "==6.*"
1918
elastic-enterprise-search = "<8.16"
2019
elasticsearch = {version = "==8.15.*", extras = ["async"]}
@@ -40,6 +39,7 @@ python-geoip-geolite2-yplan = "==2019.1224"
4039
python-geoip-yplan = "==1.*"
4140
pytz = "*"
4241
PyYAML = "==6.*"
42+
rapidfuzz = "==3.*"
4343
redis = {version = "==5.*", extras = ["hiredis"]}
4444
regex = "==2024.9.11"
4545
setproctitle = "==1.*"

Pipfile.lock

Lines changed: 96 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

an_website/hangman_solver/wordgame_solver.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@
1717

1818
from collections.abc import Collection
1919

20-
from editdistancek_rs import distance
2120
from hangman_solver import Language, read_words_with_length
2221
from typed_stream import Stream
2322

2423
from ..utils.request_handler import APIRequestHandler, HTMLRequestHandler
25-
from ..utils.utils import ModuleInfo
24+
from ..utils.utils import ModuleInfo, bounded_edit_distance
2625

2726

2827
def get_module_info() -> ModuleInfo:
@@ -56,7 +55,9 @@ def find_solutions(word: str, ignore: Collection[str]) -> Stream[str]:
5655
)
5756
)
5857
.exclude(ignore.__contains__)
59-
.filter(lambda test_word: distance(word, test_word, 2) == 1)
58+
.filter(
59+
lambda test_word: bounded_edit_distance(word, test_word, 2) == 1
60+
)
6061
)
6162

6263

an_website/quotes/create.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
from dataclasses import dataclass
1919
from typing import cast
2020

21-
from editdistancek_rs import distance
2221
from tornado.web import HTTPError, MissingArgumentError
2322

2423
from ..utils.data_parsing import parse_args
24+
from ..utils.utils import bounded_edit_distance
2525
from .utils import (
2626
AUTHORS_CACHE,
2727
QUOTES_CACHE,
@@ -117,7 +117,7 @@ async def get_authors(author_name: str) -> list[Author | str]:
117117
*(
118118
author
119119
for author in AUTHORS_CACHE.values()
120-
if distance(
120+
if bounded_edit_distance(
121121
author.name.lower(), author_name_lower, max_distance + 1
122122
)
123123
<= max_distance
@@ -168,7 +168,9 @@ async def get_quotes(quote_str: str) -> list[Quote | str]:
168168
*(
169169
quote
170170
for quote in QUOTES_CACHE.values()
171-
if distance(quote.quote.lower(), lower_quote_str, max_distance + 1)
171+
if bounded_edit_distance(
172+
quote.quote.lower(), lower_quote_str, max_distance + 1
173+
)
172174
<= max_distance
173175
),
174176
fix_quote_str(quote_str),

an_website/utils/utils.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@
5959
import elasticapm
6060
import regex
6161
from blake3 import blake3
62-
from editdistancek_rs import distance
6362
from elastic_transport import ApiError, TransportError
6463
from elasticsearch import AsyncElasticsearch
6564
from geoip import geolite2 # type: ignore[import-untyped]
6665
from openmoji_dist import VERSION as OPENMOJI_VERSION
66+
from rapidfuzz.distance.Levenshtein import distance
6767
from redis.asyncio import Redis
6868
from tornado.web import HTTPError, RequestHandler
6969
from typed_stream import Stream
@@ -300,6 +300,16 @@ def bool_to_str(val: bool) -> str:
300300
return "sure" if val else "nope"
301301

302302

303+
def bounded_edit_distance(s1: str, s2: str, /, k: int) -> int:
304+
"""Return a bounded edit distance between two strings.
305+
306+
k is the maximum number returned
307+
"""
308+
if (dist := distance(s1, s2, score_cutoff=k)) == k + 1:
309+
return k
310+
return dist
311+
312+
303313
def country_code_to_flag(code: str) -> str:
304314
"""Convert a two-letter ISO country code to a flag emoji."""
305315
return "".join(chr(ord(char) + 23 * 29 * 191) for char in code.upper())
@@ -526,7 +536,9 @@ def get_close_matches( # based on difflib.get_close_matches
526536
result: list[tuple[float, str]] = []
527537
for possibility in possibilities:
528538
if max_dist := max(word_len, len(possibility)):
529-
dist = distance(possibility, word, 1 + int(cutoff * max_dist))
539+
dist = bounded_edit_distance(
540+
possibility, word, 1 + int(cutoff * max_dist)
541+
)
530542
if (ratio := dist / max_dist) <= cutoff:
531543
bisect.insort(result, (ratio, possibility))
532544
if len(result) > count:

pip-constraints.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ defity==0.5.1
118118
defusedxml==0.7.1
119119
dunamai==1.23.0
120120
ecs-logging==2.2.0
121-
editdistancek-rs==1.0.0
122121
elastic-apm==6.23.0
123122
elastic-enterprise-search==8.11.0
124123
elastic-transport==8.17.0
@@ -148,6 +147,7 @@ pysocks==1.7.1
148147
python-geoip-geolite2-yplan==2019.1224
149148
python-geoip-yplan==1.2
150149
pytz==2024.2
150+
rapidfuzz==3.11.0
151151
redis==5.2.1
152152
regex==2024.9.11
153153
setproctitle==1.3.4

pip-dev-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ defity==0.5.1; python_version >= '3.10'
118118
defusedxml==0.7.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
119119
dunamai==1.23.0; python_version >= '3.5'
120120
ecs-logging==2.2.0; python_version >= '3.6'
121-
editdistancek-rs==1.0.0; python_version >= '3.12'
122121
elastic-apm==6.23.0; python_version >= '3.6' and python_version < '4'
123122
elastic-enterprise-search==8.11.0; python_version >= '3.6'
124123
elastic-transport==8.17.0; python_version >= '3.8'
@@ -148,6 +147,7 @@ pysocks==1.7.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2
148147
python-geoip-geolite2-yplan==2019.1224
149148
python-geoip-yplan==1.2
150149
pytz==2024.2
150+
rapidfuzz==3.11.0; python_version >= '3.9'
151151
redis[hiredis]==5.2.1; python_version >= '3.8'
152152
regex==2024.9.11; python_version >= '3.8'
153153
setproctitle==1.3.4; python_version >= '3.8'

pip-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ defusedxml==0.7.1; python_version >= '2.7' and python_version not in '3.0, 3.1,
1515
dill==0.3.9; python_version >= '3.8'
1616
dunamai==1.23.0; python_version >= '3.5'
1717
ecs-logging==2.2.0; python_version >= '3.6'
18-
editdistancek-rs==1.0.0; python_version >= '3.12'
1918
elastic-apm==6.23.0; python_version >= '3.6' and python_version < '4'
2019
elastic-enterprise-search==8.11.0; python_version >= '3.6'
2120
elastic-transport==8.17.0; python_version >= '3.8'
@@ -50,6 +49,7 @@ python-geoip-geolite2-yplan==2019.1224
5049
python-geoip-yplan==1.2
5150
pytz==2024.2
5251
pyyaml==6.0.2; python_version >= '3.8'
52+
rapidfuzz==3.11.0; python_version >= '3.9'
5353
redis[hiredis]==5.2.1; python_version >= '3.8'
5454
regex==2024.9.11; python_version >= '3.8'
5555
setproctitle==1.3.4; python_version >= '3.8'

0 commit comments

Comments
 (0)