Skip to content

Commit 0ad9387

Browse files
committed
Merge branch 'dev/pclm' of https://github.com/bab2min/Kiwi into dev/pclm
2 parents 3457d37 + 121d2d2 commit 0ad9387

File tree

15 files changed

+78
-31
lines changed

15 files changed

+78
-31
lines changed

Diff for: .github/workflows/arm64_centos7.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
with:
1515
submodules: true
1616
lfs: true
17-
- uses: bab2min/run-on-arch-action@multiple-step
17+
- uses: bab2min/run-on-arch-action@multiple-step-v2
1818
id: runcmd
1919
with:
2020
image: quay.io/pypa/manylinux2014_aarch64
@@ -44,7 +44,7 @@ jobs:
4444
KIWI_ARCH_TYPE=balanced ./build/kiwi-cli-* -m ./models/base -e -o test.out kowiki1000.txt
4545
KIWI_ARCH_TYPE=neon ./build/kiwi-cli-* -m ./models/base -e -o test.out kowiki1000.txt
4646
- name: Archive binaries
47-
uses: actions/upload-artifact@v3
47+
uses: actions/upload-artifact@v4
4848
with:
4949
name: Artifacts Arm64-Centos7
5050
path: |

Diff for: .github/workflows/centos7.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
./build/kiwi-evaluator -m ./models/base eval_data/*.txt --sbg -o eval_results/
4545
- run: tar -zcvf arts.tgz build/*kiwi* build/test/*kiwi* eval_results/*.txt build/bindings/java/*.jar
4646
- name: Archive binaries
47-
uses: actions/upload-artifact@v3
47+
uses: actions/upload-artifact@v4
4848
with:
4949
name: Artifacts Centos7
5050
path: arts.tgz

Diff for: .github/workflows/macos.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
KIWI_ARCH_TYPE=neon ./build/kiwi-cli-* -m ./models/base -e -o test.out --typos 6 kowiki1000.txt
8181
fi
8282
- name: Archive binaries
83-
uses: actions/upload-artifact@v3
83+
uses: actions/upload-artifact@v4
8484
with:
8585
name: Artifacts ${{ matrix.name }}
8686
path: |

Diff for: .github/workflows/ppc64le_centos7.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
with:
1515
submodules: true
1616
lfs: true
17-
- uses: bab2min/run-on-arch-action@multiple-step
17+
- uses: bab2min/run-on-arch-action@multiple-step-v2
1818
id: runcmd
1919
with:
2020
image: quay.io/pypa/manylinux2014_ppc64le
@@ -32,7 +32,7 @@ jobs:
3232
cp -r build /artifacts/
3333
cp -r eval_results /artifacts/
3434
- name: Archive binaries
35-
uses: actions/upload-artifact@v3
35+
uses: actions/upload-artifact@v4
3636
with:
3737
name: Artifacts PPC64LE-Centos7
3838
path: |

Diff for: .github/workflows/release.yml

+24-16
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
uses: bruceadams/[email protected]
1919
env:
2020
GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN }}
21-
- uses: bab2min/run-on-arch-action@multiple-step
21+
- uses: bab2min/run-on-arch-action@multiple-step-v2
2222
with:
2323
image: quay.io/pypa/manylinux2014_x86_64
2424
githubToken: ${{ github.token }}
@@ -200,7 +200,7 @@ jobs:
200200
with:
201201
submodules: true
202202
lfs: true
203-
- uses: bab2min/run-on-arch-action@use-custom-image
203+
- uses: bab2min/run-on-arch-action@multiple-step-v2
204204
id: runcmd
205205
with:
206206
image: quay.io/pypa/manylinux2014_${{ matrix.arch }}
@@ -209,20 +209,28 @@ jobs:
209209
mkdir -p "${PWD}/artifacts"
210210
dockerRunArgs: |
211211
--volume "${PWD}/artifacts:/artifacts"
212-
run: |
213-
yum install java-1.8.0-openjdk-devel -y
214-
mkdir build && pushd build && cmake -DCMAKE_BUILD_TYPE=Release -DKIWI_USE_MIMALLOC=0 -DKIWI_JAVA_BINDING=1 ..
215-
make -j2 && popd
216-
./build/test/kiwi-test
217-
mkdir eval_results && ./build/kiwi-evaluator -m ./models/base eval_data/*.txt -o eval_results/
218-
cd build
219-
mkdir include && mkdir lib && mkdir bin
220-
mv libkiwi* lib/
221-
mv kiwi-* bin/
222-
mv test/kiwi-* bin/
223-
cp -r ../include/kiwi include/
224-
tar -zcvf /artifacts/asset.tgz include/ lib/ bin/
225-
mv bindings/java/kiwi-java*.jar /artifacts/kiwi-java.jar
212+
multipleRun: |
213+
- name: Install dependencies
214+
run: |
215+
yum install java-1.8.0-openjdk-devel -y
216+
- name: Build
217+
run: |
218+
mkdir build && pushd build && cmake -DCMAKE_BUILD_TYPE=Release -DKIWI_USE_MIMALLOC=0 -DKIWI_JAVA_BINDING=1 ..
219+
make -j2 && popd
220+
- name: Test
221+
run: |
222+
./build/test/kiwi-test
223+
mkdir eval_results && ./build/kiwi-evaluator -m ./models/base eval_data/*.txt -o eval_results/
224+
- name: Release
225+
run: |
226+
cd build
227+
mkdir include && mkdir lib && mkdir bin
228+
mv libkiwi* lib/
229+
mv kiwi-* bin/
230+
mv test/kiwi-* bin/
231+
cp -r ../include/kiwi include/
232+
tar -zcvf /artifacts/asset.tgz include/ lib/ bin/
233+
mv bindings/java/kiwi-java*.jar /artifacts/kiwi-java.jar
226234
- name: Get release
227235
id: get_release
228236
uses: bruceadams/[email protected]

Diff for: .github/workflows/ubuntu.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ jobs:
7575
KIWI_ARCH_TYPE=avx512bw ./build/kiwi-cli-* -m ./models/base -e -o test.out --sbg kowiki1000.txt
7676
KIWI_ARCH_TYPE=avx512bw ./build/kiwi-cli-* -m ./models/base -e -o test.out --typos 6 kowiki1000.txt
7777
- name: Archive binaries
78-
uses: actions/upload-artifact@v3
78+
uses: actions/upload-artifact@v4
7979
with:
8080
name: Artifacts ${{ matrix.name }}
8181
path: |

Diff for: .github/workflows/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
.\build\Release\kiwi-evaluator.exe -m .\models\base (Get-ChildItem eval_data\*.txt | Select-Object -Expand FullName) -o eval_results\
3939
.\build\Release\kiwi-evaluator.exe -m .\models\base --sbg (Get-ChildItem eval_data\*.txt | Select-Object -Expand FullName) -o eval_results\
4040
- name: Archive binaries
41-
uses: actions/upload-artifact@v3
41+
uses: actions/upload-artifact@v4
4242
with:
4343
name: Artifacts ${{ matrix.os }} ${{ matrix.arch }}bit
4444
path: |

Diff for: CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.12)
22

3-
project(kiwi VERSION 0.20.3 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")
3+
project(kiwi VERSION 0.20.4 DESCRIPTION "Kiwi, Korean Intelligent Word Identifier")
44

55
set ( CMAKE_CXX_STANDARD 17 )
66
set ( CMAKE_VERBOSE_MAKEFILE true )

Diff for: README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ include/kiwi/capi.h 를 참조하세요.
145145
https://github.com/bab2min/Kiwi/releases 에서 Windows, Linux, macOS 버전으로 컴파일된 Library 파일과 모델 파일을 다운로드 받을 수 있습니다.
146146

147147
### C# Wrapper
148-
https://github.com/bab2min/kiwi-gui
148+
https://github.com/bab2min/kiwi-gui 에서 공식 GUI 툴에 사용되는 C# Wrapper를 찾으실 수 있습니다.
149+
150+
또한 EX3님께서 기여해주신 wrapper인 [NetKiwi](https://github.com/EX3exp/NetKiwi)가 있습니다.
149151

150152
### Python3 Wrapper
151153
또한 Python3용 API인 Kiwipiepy가 제공됩니다. 이에 대해서는 https://github.com/bab2min/kiwipiepy 를 참조하시길 바랍니다.

Diff for: bindings/java/kr/pe/bab2min/Kiwi.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
public class Kiwi implements AutoCloseable {
1414
private long _inst;
15-
final private static String _version = "0.20.3";
15+
final private static String _version = "0.20.4";
1616

1717
public static class Match {
1818
final static public int none = 0,

Diff for: include/kiwi/Form.h

+23
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,29 @@ namespace kiwi
169169

170170
/** 현재 인스턴스가 단일 형태소인지 확인한다 */
171171
bool isSingle() const { return chunks.empty() || complex || saisiot; }
172+
173+
bool hasComplex() const
174+
{
175+
if (getCombined()->complex) return true;
176+
177+
for (auto c : chunks)
178+
{
179+
if (c->complex) return true;
180+
}
181+
return false;
182+
}
183+
184+
template<class Container>
185+
bool hasMorpheme(Container&& m) const
186+
{
187+
if (m.count(getCombined())) return true;
188+
for (auto c : chunks)
189+
{
190+
if (m.count(c)) return true;
191+
}
192+
return false;
193+
}
194+
172195
};
173196

174197
/**

Diff for: include/kiwi/Macro.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55

66
#define KIWI_VERSION_MAJOR 0
77
#define KIWI_VERSION_MINOR 20
8-
#define KIWI_VERSION_PATCH 3
8+
#define KIWI_VERSION_PATCH 4
99

1010
#define KIWI_VERSION_STRING KIWI_STR(KIWI_VERSION_MAJOR) "." KIWI_STR(KIWI_VERSION_MINOR) "." KIWI_STR(KIWI_VERSION_PATCH)

Diff for: src/KiwiBuilder.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2208,7 +2208,7 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c
22082208
estimatedNodeSize += f->first.size() - commonPrefix;
22092209
prevForm = &f->first;
22102210
}
2211-
ret.typoForms.emplace_back(0, 0, 0, hash);
2211+
ret.typoForms.emplace_back(0, 0, hash);
22122212
ret.typoPtrs.emplace_back(ret.typoPool.size());
22132213
formTrie.reserveMore(estimatedNodeSize);
22142214

Diff for: src/PathEvaluator.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -848,8 +848,8 @@ namespace kiwi
848848
validMorphCands.clear();
849849
for (auto& curMorph : cands)
850850
{
851-
if (splitComplex && curMorph->getCombined()->complex) continue;
852-
if (blocklist && blocklist->count(curMorph->getCombined())) continue;
851+
if (splitComplex && curMorph->hasComplex()) continue;
852+
if (blocklist && curMorph->hasMorpheme(*blocklist)) continue;
853853

854854
// 덧붙은 받침(zCoda)을 위한 지름길
855855
if (curMorph->tag == POSTag::z_coda)

Diff for: test/test_cpp.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,20 @@ TEST(KiwiCpp, SplitComplex)
187187
EXPECT_EQ(res2.first[0].str, u"감사");
188188
}
189189
}
190+
191+
{
192+
auto testCases = {
193+
u"집에 갔어요",
194+
u"집에 가요",
195+
};
196+
for (auto s : testCases)
197+
{
198+
auto res1 = kiwi.analyze(s, Match::allWithNormalizing);
199+
auto res2 = kiwi.analyze(s, Match::allWithNormalizing | Match::splitComplex);
200+
EXPECT_EQ(res1.first[res1.first.size() - 1].str, u"어요");
201+
EXPECT_EQ(res2.first[res2.first.size() - 1].str, u"");
202+
}
203+
}
190204
}
191205

192206
TEST(KiwiCpp, OldHangul)

0 commit comments

Comments
 (0)