Skip to content

Commit c1da90c

Browse files
authored
Merge pull request #192 from bab2min/dev/issue191
앞쪽의 특수 문자가 형태소와 잘못 결합하여 분석되는 버그 수정
2 parents c75659c + 5933afc commit c1da90c

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

Diff for: src/KTrie.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,8 @@ size_t kiwi::splitByTrie(
653653
const auto scanStart = max(endPosMap[nBeginWithMultiplier].first, (uint32_t)1), scanEnd = endPosMap[nBeginWithMultiplier].second;
654654
const bool longestMatched = scanStart < scanEnd && any_of(out.begin() + scanStart, out.begin() + scanEnd, [&](const KGraphNode& g)
655655
{
656-
return nBeginWithMultiplier == g.endPos && lastSpecialEndPos == g.endPos - (g.uform.empty() ? g.form->sizeWithoutSpace() : g.uform.size()) * posMultiplier;
656+
const auto start = g.endPos - (g.uform.empty() ? g.form->sizeWithoutSpace() : g.uform.size()) * posMultiplier;
657+
return nBeginWithMultiplier == g.endPos && (lastSpecialEndPos == start || specialStartPos == start);
657658
});
658659

659660
// insert unknown form

Diff for: test/test_cpp.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,18 @@ TEST(KiwiCpp, UserTag)
361361
EXPECT_EQ(tokens[7].tag, POSTag::user2);
362362
}
363363

364+
TEST(KiwiCpp, STagPrefix)
365+
{
366+
Kiwi& kiwi = reuseKiwiInstance();
367+
auto res = kiwi.analyze(u"자신있는 지역은 `후분양`으로 나올듯 싶습니다.", Match::allWithNormalizing).first;
368+
EXPECT_EQ(res[0].str, u"자신");
369+
EXPECT_EQ(res[1].str, u"");
370+
EXPECT_EQ(res[2].str, u"");
371+
EXPECT_EQ(res[3].str, u"지역");
372+
EXPECT_EQ(res[4].str, u"");
373+
EXPECT_EQ(res[5].str, u"`");
374+
}
375+
364376
TEST(KiwiCpp, HSDataset)
365377
{
366378
KiwiBuilder kw{ MODEL_PATH, 0, BuildOption::default_, };

0 commit comments

Comments
 (0)