Skip to content

Commit c6f2427

Browse files
committed
changed topN of evaluator to 1 & added more typo options
1 parent e91047b commit c6f2427

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

Diff for: tools/Evaluator.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ class Evaluator
2727
std::vector<TestResult> testsets, errors;
2828
const kiwi::Kiwi* kw = nullptr;
2929
kiwi::Match matchOption;
30-
size_t topN = 3;
30+
size_t topN = 1;
3131
public:
32-
Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 3);
32+
Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 1);
3333
void run();
3434
Score evaluate();
3535
const std::vector<TestResult>& getErrors() const { return errors; }

Diff for: tools/evaluator_main.cpp

+38-13
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,28 @@ using namespace std;
1212
using namespace kiwi;
1313

1414
int doEvaluate(const string& modelPath, const string& output, const vector<string>& input,
15-
bool normCoda, bool zCoda, bool useSBG, float typoCostWeight, bool cTypo)
15+
bool normCoda, bool zCoda, bool multiDict, bool useSBG,
16+
float typoCostWeight, bool bTypo, bool cTypo,
17+
int repeat)
1618
{
1719
try
1820
{
21+
if (typoCostWeight > 0 && !bTypo && !cTypo)
22+
{
23+
bTypo = true;
24+
}
25+
else if (typoCostWeight == 0)
26+
{
27+
bTypo = false;
28+
cTypo = false;
29+
}
30+
31+
DefaultTypoSet typos[] = { DefaultTypoSet::withoutTypo, DefaultTypoSet::basicTypoSet, DefaultTypoSet::continualTypoSet, DefaultTypoSet::basicTypoSetWithContinual};
32+
1933
tutils::Timer timer;
20-
Kiwi kw = KiwiBuilder{ modelPath, 1, BuildOption::default_, useSBG }.build(
21-
typoCostWeight > 0 ? (cTypo ? DefaultTypoSet::basicTypoSetWithContinual : DefaultTypoSet::basicTypoSet) : DefaultTypoSet::withoutTypo
34+
auto option = (BuildOption::default_ & ~BuildOption::loadMultiDict) | (multiDict ? BuildOption::loadMultiDict : BuildOption::none);
35+
Kiwi kw = KiwiBuilder{ modelPath, 1, option, useSBG }.build(
36+
typos[(bTypo ? 1 : 0) + (cTypo ? 2 : 0)]
2237
);
2338
if (typoCostWeight > 0) kw.setTypoCostWeight(typoCostWeight);
2439

@@ -34,10 +49,13 @@ int doEvaluate(const string& modelPath, const string& output, const vector<strin
3449
cout << "Test file: " << tf << endl;
3550
try
3651
{
37-
Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda)};
52+
Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda) };
3853
tutils::Timer total;
39-
test.run();
40-
double tm = total.getElapsed();
54+
for (int i = 0; i < repeat; ++i)
55+
{
56+
test.run();
57+
}
58+
double tm = total.getElapsed() / repeat;
4159
auto result = test.evaluate();
4260

4361
cout << result.micro << ", " << result.macro << endl;
@@ -93,21 +111,27 @@ int main(int argc, const char* argv[])
93111

94112
ValueArg<string> model{ "m", "model", "Kiwi model path", false, "ModelGenerator", "string" };
95113
ValueArg<string> output{ "o", "output", "output dir for evaluation errors", false, "", "string" };
96-
SwitchArg withoutNormCoda{ "", "wcoda", "without normalizing coda", false };
97-
SwitchArg withoutZCoda{ "", "wzcoda", "without z-coda", false };
114+
SwitchArg noNormCoda{ "", "no-normcoda", "without normalizing coda", false };
115+
SwitchArg noZCoda{ "", "no-zcoda", "without z-coda", false };
116+
SwitchArg noMulti{ "", "no-multi", "turn off multi dict", false };
98117
SwitchArg useSBG{ "", "sbg", "use SkipBigram", false };
99-
ValueArg<float> typoTolerant{ "", "typo", "make typo-tolerant model", false, 0.f, "float"};
118+
ValueArg<float> typoWeight{ "", "typo", "typo weight", false, 0.f, "float"};
119+
SwitchArg bTypo{ "", "btypo", "make basic-typo-tolerant model", false };
100120
SwitchArg cTypo{ "", "ctypo", "make continual-typo-tolerant model", false };
121+
ValueArg<int> repeat{ "", "repeat", "repeat evaluation for benchmark", false, 1, "int" };
101122
UnlabeledMultiArg<string> files{ "files", "evaluation set files", true, "string" };
102123

103124
cmd.add(model);
104125
cmd.add(output);
105126
cmd.add(files);
106-
cmd.add(withoutNormCoda);
107-
cmd.add(withoutZCoda);
127+
cmd.add(noNormCoda);
128+
cmd.add(noZCoda);
129+
cmd.add(noMulti);
108130
cmd.add(useSBG);
109-
cmd.add(typoTolerant);
131+
cmd.add(typoWeight);
132+
cmd.add(bTypo);
110133
cmd.add(cTypo);
134+
cmd.add(repeat);
111135

112136
try
113137
{
@@ -118,6 +142,7 @@ int main(int argc, const char* argv[])
118142
cerr << "error: " << e.error() << " for arg " << e.argId() << endl;
119143
return -1;
120144
}
121-
return doEvaluate(model, output, files.getValue(), !withoutNormCoda, !withoutZCoda, useSBG, typoTolerant, cTypo);
145+
return doEvaluate(model, output, files.getValue(),
146+
!noNormCoda, !noZCoda, !noMulti, useSBG, typoWeight, bTypo, cTypo, repeat);
122147
}
123148

0 commit comments

Comments
 (0)