Skip to content

Commit 134c441

Browse files
committed
fix: replace fast jieba dependency
1 parent 00fc8b2 commit 134c441

6 files changed

Lines changed: 68 additions & 14 deletions

File tree

app/agent/tools/impl/query_transfer_history.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ async def run(self, title: Optional[str] = None,
6969
async with AsyncSessionFactory() as db:
7070
# 处理标题搜索
7171
if title:
72-
# 使用 fast-jieba 分词处理标题
72+
# 使用统一分词封装处理标题,便于替换底层实现
7373
words = jieba_cut(title, HMM=False)
7474
title_search = "%".join(words)
7575
# 查询记录

app/utils/jieba.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""中文分词工具。"""
22

3-
from fast_jieba import cut as fast_jieba_cut
3+
from jieba_next import cut as jieba_next_cut
44

55

66
def cut(text: str, HMM: bool = True, cut_all: bool = False) -> list[str]:
77
"""
8-
使用 fast-jieba 执行中文分词,并兼容 jieba.cut 的常用参数名。
8+
使用 jieba-next 执行中文分词,并兼容 jieba.cut 的常用参数名。
99
"""
10-
return fast_jieba_cut(text, hmm=HMM, cut_all=cut_all)
10+
return list(jieba_next_cut(text, HMM=HMM, cut_all=cut_all))

jieba/__init__.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""jieba 兼容入口。"""
2+
3+
from collections.abc import Iterator
4+
from typing import Any
5+
6+
import jieba_next as _jieba_next
7+
from jieba_next import cut_for_search as _cut_for_search
8+
from jieba_next import lcut as _lcut
9+
from jieba_next import lcut_for_search as _lcut_for_search
10+
11+
12+
def cut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> Iterator[str]:
13+
"""
14+
兼容旧 jieba.cut 入口,底层委托给 jieba-next 的 Rust 加速实现。
15+
"""
16+
return _jieba_next.cut(sentence, cut_all=cut_all, HMM=HMM)
17+
18+
19+
def lcut(sentence: str, cut_all: bool = False, HMM: bool = True, use_paddle: bool = False) -> list[str]:
20+
"""
21+
兼容旧 jieba.lcut 入口,保持返回列表的调用习惯。
22+
"""
23+
return _lcut(sentence, cut_all=cut_all, HMM=HMM)
24+
25+
26+
def cut_for_search(sentence: str, HMM: bool = True) -> Iterator[str]:
27+
"""
28+
兼容旧 jieba.cut_for_search 入口,用于搜索模式分词。
29+
"""
30+
return _cut_for_search(sentence, HMM=HMM)
31+
32+
33+
def lcut_for_search(sentence: str, HMM: bool = True) -> list[str]:
34+
"""
35+
兼容旧 jieba.lcut_for_search 入口,用于搜索模式分词列表。
36+
"""
37+
return _lcut_for_search(sentence, HMM=HMM)
38+
39+
40+
def __getattr__(name: str) -> Any:
41+
"""
42+
将未显式封装的 jieba 属性回退到 jieba-next,减少旧调用面的迁移成本。
43+
"""
44+
return getattr(_jieba_next, name)

requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pywebpush~=2.0.3
6464
aiosqlite~=0.21.0
6565
psycopg2-binary~=2.9.10
6666
asyncpg~=0.30.0
67-
fast-jieba~=0.4.0
67+
jieba-next~=1.0.0rc1
6868
rsa~=4.9
6969
redis~=6.2.0
7070
async_timeout~=5.0.1; python_full_version < "3.11.3"

tests/test_fast_jieba_utils.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

tests/test_jieba_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import jieba
2+
3+
from app.utils.jieba import cut
4+
5+
6+
def test_cut_accepts_legacy_hmm_argument():
7+
"""验证兼容封装仍支持旧 jieba.cut 的 HMM 参数名。"""
8+
words = cut("台湾后台测试", HMM=False)
9+
10+
assert "".join(words) == "台湾后台测试"
11+
assert "后台" in words
12+
13+
14+
def test_legacy_jieba_import_uses_compat_entrypoint():
15+
"""验证插件仍可通过旧 jieba.cut 入口使用主程序分词实现。"""
16+
words = list(jieba.cut("台湾后台测试", HMM=False))
17+
18+
assert "".join(words) == "台湾后台测试"
19+
assert "后台" in words

0 commit comments

Comments
 (0)