Skip to content

Commit 8c346b3

Browse files
authored
Merge pull request #203 from bab2min/dev/fix_issue_189
숫자가 종종 잘못된 문자로 매칭되는 버그 수정
2 parents 979432f + 8a938f1 commit 8c346b3

File tree

2 files changed

+48
-9
lines changed

2 files changed

+48
-9
lines changed

Diff for: src/KTrie.cpp

-9
Original file line numberDiff line numberDiff line change
@@ -818,15 +818,6 @@ size_t kiwi::splitByTrie(
818818
if (curNode->fail())
819819
{
820820
curNode = curNode->fail();
821-
for (auto submatcher = curNode; submatcher; submatcher = submatcher->fail())
822-
{
823-
const Form* cand = submatcher->val(trie);
824-
if (!cand) break;
825-
else if (!trie.hasSubmatch(cand))
826-
{
827-
if (!insertCandidates(candidates, cand, formBase, typoPtrs, str, nonSpaces)) break;
828-
}
829-
}
830821
nextNode = curNode->template nextOpt<arch>(trie, str[n + i]);
831822
}
832823
else

Diff for: test/test_cpp.cpp

+48
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,12 @@ TEST(KiwiCpp, ZCoda)
988988
TEST(KiwiCpp, ZSiot)
989989
{
990990
Kiwi& kiwi = reuseKiwiInstance();
991+
992+
auto resSplit = kiwi.analyze(u"찰랑찰랑한 머릿결과 볼륨감", Match::allWithNormalizing | Match::splitSaisiot);
993+
EXPECT_EQ(resSplit.first.size(), 8);
994+
EXPECT_EQ(resSplit.first[3].str, u"머리");
995+
EXPECT_EQ(resSplit.first[4].tag, POSTag::z_siot);
996+
EXPECT_EQ(resSplit.first[5].str, u"");
991997

992998
for (auto s : {u"하굣길", u"만둣국", u"나뭇잎", u"세숫물", u"고춧가루", u"시곗바늘", u"사글셋방"})
993999
{
@@ -1014,6 +1020,35 @@ TEST(KiwiCpp, ZSiot)
10141020
}
10151021
}
10161022

1023+
TEST(KiwiCpp, ZSiotWithTypo)
1024+
{
1025+
Kiwi kiwi = KiwiBuilder{ MODEL_PATH, 0, BuildOption::default_, }.build(getDefaultTypoSet(DefaultTypoSet::basicTypoSetWithContinual));
1026+
1027+
for (auto s : { u"하굣길", u"만둣국", u"나뭇잎", u"세숫물", u"고춧가루", u"시곗바늘", u"사글셋방" })
1028+
{
1029+
auto resNone = kiwi.analyze(s, Match::allWithNormalizing);
1030+
auto resSplit = kiwi.analyze(s, Match::allWithNormalizing | Match::splitSaisiot);
1031+
auto resMerge = kiwi.analyze(s, Match::allWithNormalizing | Match::mergeSaisiot);
1032+
EXPECT_FALSE(std::any_of(resNone.first.begin(), resNone.first.end(), [](const TokenInfo& token) { return token.tag == POSTag::z_siot; }));
1033+
EXPECT_EQ(resSplit.first.size(), 3);
1034+
EXPECT_EQ(resSplit.first[0].tag, POSTag::nng);
1035+
EXPECT_EQ(resSplit.first[1].tag, POSTag::z_siot);
1036+
EXPECT_EQ(resSplit.first[2].tag, POSTag::nng);
1037+
EXPECT_EQ(resMerge.first.size(), 1);
1038+
EXPECT_EQ(resMerge.first[0].tag, POSTag::nng);
1039+
}
1040+
1041+
for (auto s : { u"발렛 파킹", u"미닛" })
1042+
{
1043+
auto resNone = kiwi.analyze(s, Match::allWithNormalizing);
1044+
auto resSplit = kiwi.analyze(s, Match::allWithNormalizing | Match::splitSaisiot);
1045+
auto resMerge = kiwi.analyze(s, Match::allWithNormalizing | Match::mergeSaisiot);
1046+
EXPECT_EQ(resNone.second, resSplit.second);
1047+
EXPECT_EQ(resNone.second, resMerge.second);
1048+
EXPECT_FALSE(std::any_of(resSplit.first.begin(), resSplit.first.end(), [](const TokenInfo& token) { return token.tag == POSTag::z_siot; }));
1049+
}
1050+
}
1051+
10171052
TEST(KiwiCpp, AnalyzeWithWordPosition)
10181053
{
10191054
std::u16string testSentence = u"나 정말 배불렄ㅋㅋ";
@@ -1609,3 +1644,16 @@ TEST(KiwiCpp, IssueP172_LengthError)
16091644
auto res = kiwi.analyze(text, Match::allWithNormalizing).first;
16101645
EXPECT_GT(res.size(), 0);
16111646
}
1647+
1648+
TEST(KiwiCpp, IssueP189)
1649+
{
1650+
Kiwi& kiwi = reuseKiwiInstance();
1651+
auto res = kiwi.analyze(u"담아 1팩 무료", Match::allWithNormalizing).first;
1652+
1653+
EXPECT_EQ(res.size(), 5);
1654+
EXPECT_EQ(res[0].str, u"");
1655+
EXPECT_EQ(res[1].str, u"");
1656+
EXPECT_EQ(res[2].str, u"1");
1657+
EXPECT_EQ(res[3].str, u"");
1658+
EXPECT_EQ(res[4].str, u"무료");
1659+
}

0 commit comments

Comments
 (0)