Skip to content

Commit 8eb13c6

Browse files
committed
Fix #205
1 parent fd278ac commit 8eb13c6

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

include/kiwi/Form.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -228,17 +228,18 @@ namespace kiwi
228228
uint32_t formId = 0;
229229
float scoreHash = 0;
230230
uint32_t typoId = 0;
231+
uint16_t numSpaces = 0;
231232
CondVowel leftCond = CondVowel::none;
232233

233234
TypoForm() = default;
234235

235-
TypoForm(const std::tuple<uint32_t, float, CondVowel>& p)
236-
: formId{ std::get<0>(p) }, scoreHash{ std::get<1>(p) }, leftCond{ std::get<2>(p) }
236+
TypoForm(const std::tuple<uint32_t, float, uint16_t, CondVowel>& p)
237+
: formId{ std::get<0>(p) }, scoreHash{ std::get<1>(p) }, numSpaces{ std::get<2>(p)}, leftCond{std::get<3>(p)}
237238
{
238239
}
239240

240-
TypoForm(uint32_t _formId, float _score = 0, bool _hash = 0, uint32_t _typoId = 0, CondVowel _leftCond = CondVowel::none)
241-
: formId{ _formId }, scoreHash{ _hash ? -_score : _score }, typoId{ _typoId }, leftCond{ _leftCond }
241+
TypoForm(uint32_t _formId, float _score = 0, bool _hash = 0, uint32_t _typoId = 0, uint16_t _numSpaces = 0, CondVowel _leftCond = CondVowel::none)
242+
: formId{ _formId }, scoreHash{ _hash ? -_score : _score }, typoId{ _typoId }, numSpaces{ _numSpaces }, leftCond{ _leftCond }
242243
{
243244
}
244245

src/KTrie.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,21 @@ namespace kiwi
5050
uint32_t start = 0;
5151
uint32_t typoId = 0;
5252
uint32_t end = 0; // only used in continual typo tolerant mode
53+
uint32_t numSpaces = 0;
5354

5455
FormCandidate(const Form* _form = nullptr,
5556
float _cost = 0,
5657
uint32_t _start = 0,
5758
uint32_t _typoId = 0,
5859
uint32_t _end = 0,
60+
uint32_t _numSpaces = 0,
5961
uint32_t = 0)
6062
: form{ _form },
6163
cost{ _cost },
6264
start{ _start },
6365
typoId{ _typoId },
64-
end{ _end }
66+
end{ _end },
67+
numSpaces{ _numSpaces }
6568
{}
6669

6770
size_t getStartPos(size_t ) const
@@ -86,7 +89,7 @@ namespace kiwi
8689

8790
size_t getFormSizeWithTypos(const size_t* typoPtrs) const
8891
{
89-
return typoPtrs[typoId + 1] - typoPtrs[typoId];
92+
return typoPtrs[typoId + 1] - typoPtrs[typoId] + numSpaces;
9093
}
9194

9295
bool operator==(const Form* f) const
@@ -100,7 +103,7 @@ namespace kiwi
100103
{
101104
const Form* form = nullptr;
102105

103-
FormCandidate(const Form* _form = nullptr, float = 0, uint32_t = 0, uint32_t = 0, uint32_t = 0, uint32_t = 0)
106+
FormCandidate(const Form* _form = nullptr, float = 0, uint32_t = 0, uint32_t = 0, uint32_t = 0, uint32_t = 0, uint32_t = 0)
104107
: form{ _form }
105108
{}
106109

@@ -146,8 +149,9 @@ namespace kiwi
146149
uint32_t _start = 0,
147150
uint32_t _typoId = 0,
148151
uint32_t _end = 0,
152+
uint32_t _numSpaces = 0,
149153
uint32_t _lengthenedSize = 0)
150-
: FormCandidate<typoTolerant, continualTypoTolerant, false>{ _form, _cost, _start, _typoId, _end, _lengthenedSize },
154+
: FormCandidate<typoTolerant, continualTypoTolerant, false>{ _form, _cost, _start, _typoId, _end, _numSpaces, _lengthenedSize },
151155
lengthenedSize{ _lengthenedSize }
152156
{}
153157

@@ -203,6 +207,7 @@ namespace kiwi
203207
startPosition ? startPosition : ((nonSpaces.size() - typoFormSize) * posMultiplier),
204208
tCand->typoId,
205209
endPosition,
210+
tCand->numSpaces,
206211
lengthenedSize);
207212
}
208213
if (tCand[0].hash() != tCand[1].hash()) break;

src/KiwiBuilder.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2028,7 +2028,7 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c
20282028
// 오타 교정이 있는 경우 가능한 모든 오타에 대해 Trie 생성
20292029
else
20302030
{
2031-
using TypoInfo = tuple<uint32_t, float, CondVowel>;
2031+
using TypoInfo = tuple<uint32_t, float, uint16_t, CondVowel>;
20322032
UnorderedMap<KString, Vector<TypoInfo>> typoGroup;
20332033
auto ptypos = typos.prepare();
20342034
ret.continualTypoCost = ptypos.getContinualTypoCost();
@@ -2043,12 +2043,12 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c
20432043
for (auto t : ptypos._generate(f->form, typoCostThreshold))
20442044
{
20452045
if (t.leftCond != CondVowel::none && f->vowel != CondVowel::none && t.leftCond != f->vowel) continue;
2046-
typoGroup[removeSpace(t.str)].emplace_back(f - ret.forms.data(), t.cost, t.leftCond);
2046+
typoGroup[removeSpace(t.str)].emplace_back(f - ret.forms.data(), t.cost, f->numSpaces, t.leftCond);
20472047
}
20482048
}
20492049
else
20502050
{
2051-
typoGroup[removeSpace(f->form)].emplace_back(f - ret.forms.data(), 0, CondVowel::none);
2051+
typoGroup[removeSpace(f->form)].emplace_back(f - ret.forms.data(), 0, f->numSpaces, CondVowel::none);
20522052
}
20532053
}
20542054

@@ -2107,7 +2107,7 @@ Kiwi KiwiBuilder::build(const TypoTransformer& typos, float typoCostThreshold) c
21072107
estimatedNodeSize += f->first.size() - commonPrefix;
21082108
prevForm = &f->first;
21092109
}
2110-
ret.typoForms.emplace_back(0, 0, hash);
2110+
ret.typoForms.emplace_back(0, 0, 0, hash);
21112111
ret.typoPtrs.emplace_back(ret.typoPool.size());
21122112
formTrie.reserveMore(estimatedNodeSize);
21132113

0 commit comments

Comments
 (0)