Skip to content

Commit 87bd2b4

Browse files
authored
Merge pull request #157 from bab2min/dev_0170
Dev 0.17.0
2 parents 7c337b6 + a0795c1 commit 87bd2b4

File tree

11 files changed

+252277
-274608
lines changed

11 files changed

+252277
-274608
lines changed

Diff for: CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.12)
22

3-
project(kiwi VERSION 0.16.1 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")
3+
project(kiwi VERSION 0.17.0 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")
44

55
set ( CMAKE_CXX_STANDARD 14 )
66
set ( CMAKE_VERBOSE_MAKEFILE true )

Diff for: ModelGenerator/multi.dict

+252,228-274,597
Large diffs are not rendered by default.

Diff for: bindings/java/kr/pe/bab2min/Kiwi.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
public class Kiwi implements AutoCloseable {
1414
private long _inst;
15-
final private static String _version = "0.16.1";
15+
final private static String _version = "0.17.0";
1616

1717
public static class Match {
1818
final static public int none = 0,

Diff for: include/kiwi/Form.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @file Form.h
33
* @author bab2min ([email protected])
44
* @brief 형태 및 형태소에 관한 정보를 담는 구조체들이 선언된 헤더
5-
* @version 0.16.1
5+
* @version 0.17.0
66
* @date 2022-09-01
77
*
88
*

Diff for: include/kiwi/Kiwi.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @file Kiwi.h
33
* @author bab2min ([email protected])
44
* @brief Kiwi C++ API를 담고 있는 헤더 파일
5-
* @version 0.16.1
5+
* @version 0.17.0
66
* @date 2022-09-01
77
*
88
*

Diff for: include/kiwi/Types.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @file Types.h
33
* @author bab2min ([email protected])
44
* @brief Kiwi C++ API에 쓰이는 주요 타입들을 모아놓은 헤더 파일
5-
* @version 0.16.1
5+
* @version 0.17.0
66
* @date 2022-09-01
77
*
88
*

Diff for: include/kiwi/Utils.h

+16
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,23 @@ namespace kiwi
160160
case u'\r':
161161
case u'\t':
162162
case u'\v':
163+
case u'\xA0':
164+
case u'\u1680':
165+
case u'\u2000':
166+
case u'\u2001':
167+
case u'\u2002':
168+
case u'\u2003':
169+
case u'\u2004':
170+
case u'\u2005':
171+
case u'\u2006':
172+
case u'\u2007':
173+
case u'\u2008':
174+
case u'\u2009':
175+
case u'\u200A':
176+
case u'\u202F':
177+
case u'\u205F':
163178
case u'\u2800':
179+
case u'\u3000':
164180
return true;
165181
}
166182
return false;

Diff for: include/kiwi/capi.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @file capi.h
33
* @author bab2min ([email protected])
44
* @brief Kiwi C API를 담고 있는 헤더 파일
5-
* @version 0.16.1
5+
* @version 0.17.0
66
* @date 2022-09-01
77
*
88
*

Diff for: src/Combiner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ void RuleSet::loadRules(istream& istr)
779779
while (getline(istr, line))
780780
{
781781
if (line[0] == '#') continue;
782-
while (!line.empty() && isSpace(line.back())) line.pop_back();
782+
while (!line.empty() && line.back() < 0x80 && isSpace(line.back())) line.pop_back();
783783
if (line.empty()) continue;
784784

785785
auto fields = split(line, '\t');

Diff for: src/PatternMatcher.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,15 @@ size_t PatternMatcherImpl::testSerial(const char16_t* first, const char16_t* las
240240
{
241241
++b;
242242
if (b != last && *b == ' ') ++b;
243-
if (b == last || !isDigit(*b)) return b - first;
243+
if (b == last || !isDigit(*b))
244+
{
245+
if (b[-1] == ' ') --b;
246+
return b - first;
247+
}
244248
++b;
245249
while (b != last && isDigit(*b)) ++b;
246250
}
251+
if (b[-1] == ' ') --b;
247252
return b - first;
248253
}
249254

Diff for: test/test_cpp.cpp

+20-3
Original file line numberDiff line numberDiff line change
@@ -531,13 +531,13 @@ TEST(KiwiCpp, SpaceTolerant)
531531
TEST(KiwiCpp, MultiWordDictionary)
532532
{
533533
auto& kiwi = reuseKiwiInstance();
534-
const auto text = u"밀리언 달러 베이비랑 바람과 함께 사라지다랑 뭐가 더 재밌었어?";
534+
const auto text = u"밀리언 달러 베이비랑 더 웨이 백 중 뭐가 더 재밌었어?";
535535

536536
auto res = kiwi.analyze(text, Match::allWithNormalizing).first;
537537
EXPECT_EQ(res[0].str, u"밀리언 달러 베이비");
538538
EXPECT_EQ(res[0].tag, POSTag::nnp);
539539

540-
EXPECT_EQ(res[2].str, u"바람과 함께 사라지다");
540+
EXPECT_EQ(res[2].str, u"더 웨이 백");
541541
EXPECT_EQ(res[2].tag, POSTag::nnp);
542542

543543
auto kiwi2 = KiwiBuilder{ MODEL_PATH, 0, BuildOption::default_ & ~BuildOption::loadMultiDict, }.build();
@@ -547,7 +547,7 @@ TEST(KiwiCpp, MultiWordDictionary)
547547

548548
TEST(KiwiCpp, WordsWithSpaces)
549549
{
550-
KiwiBuilder kw{ MODEL_PATH, 0, BuildOption::default_, };
550+
KiwiBuilder kw{ MODEL_PATH, 0, BuildOption::default_ & ~BuildOption::loadMultiDict, };
551551
EXPECT_TRUE(kw.addWord(u"대학생 선교회", POSTag::nnp, 0.0).second);
552552
Kiwi kiwi = kw.build();
553553

@@ -643,6 +643,22 @@ TEST(KiwiCpp, WordsWithSpaces)
643643
EXPECT_EQ(res5.first[1].lineNumber, 2);
644644
}
645645

646+
TEST(KiwiCpp, MultiDict)
647+
{
648+
Kiwi& kiwi = reuseKiwiInstance();
649+
auto res = kiwi.analyze(u"프렌치카페 로스터리 크리스마스에디션 인증샷", Match::all).first;
650+
for (auto& r : res)
651+
{
652+
EXPECT_NE(r.str, u"리 크리스마스");
653+
}
654+
655+
res = kiwi.analyze(u"추첨이벤트 2018년 리빙디자인페어 행사기간", Match::all).first;
656+
for (auto& r : res)
657+
{
658+
EXPECT_NE(r.str, u"리 빙");
659+
}
660+
}
661+
646662
TEST(KiwiCpp, Pattern)
647663
{
648664
Kiwi& kiwi = reuseKiwiInstance();
@@ -716,6 +732,7 @@ TEST(KiwiCpp, Pattern)
716732
tokens = kiwi.analyze(u"2001. 01. 02. 에", Match::all).first;
717733
EXPECT_EQ(tokens.size(), 2);
718734
EXPECT_EQ(tokens[0].tag, POSTag::w_serial);
735+
EXPECT_EQ(tokens[0].str.back(), u'.');
719736

720737
tokens = kiwi.analyze(u"010-1234-5678에", Match::all).first;
721738
EXPECT_EQ(tokens.size(), 2);

0 commit comments

Comments
 (0)