diff --git a/.github/workflows/arm64_centos7.yml b/.github/workflows/arm64_centos7.yml index ad3a8514..28f93c70 100644 --- a/.github/workflows/arm64_centos7.yml +++ b/.github/workflows/arm64_centos7.yml @@ -14,7 +14,7 @@ jobs: with: submodules: true lfs: true - - uses: bab2min/run-on-arch-action@multiple-step + - uses: bab2min/run-on-arch-action@multiple-step-v2 id: runcmd with: image: quay.io/pypa/manylinux2014_aarch64 diff --git a/.github/workflows/ppc64le_centos7.yml b/.github/workflows/ppc64le_centos7.yml index a0a88a54..00fa49db 100644 --- a/.github/workflows/ppc64le_centos7.yml +++ b/.github/workflows/ppc64le_centos7.yml @@ -14,7 +14,7 @@ jobs: with: submodules: true lfs: true - - uses: bab2min/run-on-arch-action@multiple-step + - uses: bab2min/run-on-arch-action@multiple-step-v2 id: runcmd with: image: quay.io/pypa/manylinux2014_ppc64le diff --git a/include/kiwi/Form.h b/include/kiwi/Form.h index 335a3138..9ae59f55 100644 --- a/include/kiwi/Form.h +++ b/include/kiwi/Form.h @@ -166,6 +166,28 @@ namespace kiwi /** 분할된 형태소의 경우 원형 형태소를 반환한다. 그 외에는 자기 자신을 반환한다. */ const Morpheme* getCombined() const { return this + combined; } + + bool hasComplex() const + { + if (getCombined()->complex) return true; + + for (auto c : chunks) + { + if (c->complex) return true; + } + return false; + } + + template + bool hasMorpheme(Container&& m) const + { + if (m.count(getCombined())) return true; + for (auto c : chunks) + { + if (m.count(c)) return true; + } + return false; + } }; /** diff --git a/src/PathEvaluator.hpp b/src/PathEvaluator.hpp index d89ffe36..05dd1ebb 100644 --- a/src/PathEvaluator.hpp +++ b/src/PathEvaluator.hpp @@ -871,8 +871,8 @@ namespace kiwi { for (auto& curMorph : cands) { - if (splitComplex && curMorph->getCombined()->complex) continue; - if (blocklist && blocklist->count(curMorph->getCombined())) continue; + if (splitComplex && curMorph->hasComplex()) continue; + if (blocklist && curMorph->hasMorpheme(*blocklist)) continue; // 덧붙은 받침(zCoda)을 위한 지름길 if (curMorph->tag == POSTag::z_coda) diff --git a/test/test_cpp.cpp b/test/test_cpp.cpp index 09fdee5c..e8030a49 100644 --- a/test/test_cpp.cpp +++ b/test/test_cpp.cpp @@ -187,6 +187,20 @@ TEST(KiwiCpp, SplitComplex) EXPECT_EQ(res2.first[0].str, u"감사"); } } + + { + auto testCases = { + u"집에 갔어요", + u"집에 가요", + }; + for (auto s : testCases) + { + auto res1 = kiwi.analyze(s, Match::allWithNormalizing); + auto res2 = kiwi.analyze(s, Match::allWithNormalizing | Match::splitComplex); + EXPECT_EQ(res1.first[res1.first.size() - 1].str, u"어요"); + EXPECT_EQ(res2.first[res2.first.size() - 1].str, u"요"); + } + } } TEST(KiwiCpp, OldHangul)