Skip to content

Commit f6a714f

Browse files
authored
Merge pull request #182 from bab2min/dev_issue_181
Fix sentence splitting bug on SSO tag
2 parents 41aae65 + e195cfc commit f6a714f

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

Diff for: src/Kiwi.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,8 @@ namespace kiwi
375375
(tokens[i - 1].tag == POSTag::so
376376
|| tokens[i - 1].tag == POSTag::sw
377377
|| tokens[i - 1].tag == POSTag::sp
378-
|| tokens[i - 1].tag == POSTag::se)
378+
|| tokens[i - 1].tag == POSTag::se
379+
|| tokens[i - 1].tag == POSTag::sso)
379380
&& tokens[i - 1].endPos() == tokens[i].position
380381
&& tokens[i - 1].position > tokens[i - 2].endPos();
381382
if (nestedSentEnd)

Diff for: test/test_cpp.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,19 @@ TEST(KiwiCpp, IssueP131_SentenceSplitError)
11241124
EXPECT_EQ(res[1], std::make_pair((size_t)10, (size_t)12));
11251125
}
11261126

1127+
TEST(KiwiCpp, Issue181_SentenceSplitError)
1128+
{
1129+
const char16_t* text = u"존 슈발John Schwall은 그에 꼭 들어맞는 흥미로운 사례였다. 슈발의 아버지와 할아버지는 스테이튼 아일랜드의 소방관이었다. “제 친가 쪽의 남자들은 모두 소방관이에요. 전 다 른 일을 하고 싶었죠.” 슈발이 말했다.";
1130+
Kiwi& kiwi = reuseKiwiInstance();
1131+
auto res = kiwi.splitIntoSents(text);
1132+
EXPECT_EQ(res.size(), 5);
1133+
EXPECT_EQ(res[0], std::make_pair((size_t)0, (size_t)38));
1134+
EXPECT_EQ(res[1], std::make_pair((size_t)39, (size_t)72));
1135+
EXPECT_EQ(res[2], std::make_pair((size_t)73, (size_t)97));
1136+
EXPECT_EQ(res[3], std::make_pair((size_t)98, (size_t)115));
1137+
EXPECT_EQ(res[4], std::make_pair((size_t)116, (size_t)124));
1138+
}
1139+
11271140
TEST(KiwiCpp, AddRule)
11281141
{
11291142
Kiwi& okiwi = reuseKiwiInstance();

0 commit comments

Comments
 (0)