@@ -12,13 +12,28 @@ using namespace std;
12
12
using namespace kiwi ;
13
13
14
14
int doEvaluate (const string& modelPath, const string& output, const vector<string>& input,
15
- bool normCoda, bool zCoda, bool useSBG, float typoCostWeight, bool cTypo)
15
+ bool normCoda, bool zCoda, bool multiDict, bool useSBG,
16
+ float typoCostWeight, bool bTypo, bool cTypo,
17
+ int repeat)
16
18
{
17
19
try
18
20
{
21
+ if (typoCostWeight > 0 && !bTypo && !cTypo)
22
+ {
23
+ bTypo = true ;
24
+ }
25
+ else if (typoCostWeight == 0 )
26
+ {
27
+ bTypo = false ;
28
+ cTypo = false ;
29
+ }
30
+
31
+ DefaultTypoSet typos[] = { DefaultTypoSet::withoutTypo, DefaultTypoSet::basicTypoSet, DefaultTypoSet::continualTypoSet, DefaultTypoSet::basicTypoSetWithContinual};
32
+
19
33
tutils::Timer timer;
20
- Kiwi kw = KiwiBuilder{ modelPath, 1 , BuildOption::default_, useSBG }.build (
21
- typoCostWeight > 0 ? (cTypo ? DefaultTypoSet::basicTypoSetWithContinual : DefaultTypoSet::basicTypoSet) : DefaultTypoSet::withoutTypo
34
+ auto option = (BuildOption::default_ & ~BuildOption::loadMultiDict) | (multiDict ? BuildOption::loadMultiDict : BuildOption::none);
35
+ Kiwi kw = KiwiBuilder{ modelPath, 1 , option, useSBG }.build (
36
+ typos[(bTypo ? 1 : 0 ) + (cTypo ? 2 : 0 )]
22
37
);
23
38
if (typoCostWeight > 0 ) kw.setTypoCostWeight (typoCostWeight);
24
39
@@ -34,10 +49,13 @@ int doEvaluate(const string& modelPath, const string& output, const vector<strin
34
49
cout << " Test file: " << tf << endl;
35
50
try
36
51
{
37
- Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda)};
52
+ Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda) };
38
53
tutils::Timer total;
39
- test.run ();
40
- double tm = total.getElapsed ();
54
+ for (int i = 0 ; i < repeat; ++i)
55
+ {
56
+ test.run ();
57
+ }
58
+ double tm = total.getElapsed () / repeat;
41
59
auto result = test.evaluate ();
42
60
43
61
cout << result.micro << " , " << result.macro << endl;
@@ -93,21 +111,27 @@ int main(int argc, const char* argv[])
93
111
94
112
ValueArg<string> model{ " m" , " model" , " Kiwi model path" , false , " ModelGenerator" , " string" };
95
113
ValueArg<string> output{ " o" , " output" , " output dir for evaluation errors" , false , " " , " string" };
96
- SwitchArg withoutNormCoda{ " " , " wcoda" , " without normalizing coda" , false };
97
- SwitchArg withoutZCoda{ " " , " wzcoda" , " without z-coda" , false };
114
+ SwitchArg noNormCoda{ " " , " no-normcoda" , " without normalizing coda" , false };
115
+ SwitchArg noZCoda{ " " , " no-zcoda" , " without z-coda" , false };
116
+ SwitchArg noMulti{ " " , " no-multi" , " turn off multi dict" , false };
98
117
SwitchArg useSBG{ " " , " sbg" , " use SkipBigram" , false };
99
- ValueArg<float > typoTolerant{ " " , " typo" , " make typo-tolerant model" , false , 0 .f , " float" };
118
+ ValueArg<float > typoWeight{ " " , " typo" , " typo weight" , false , 0 .f , " float" };
119
+ SwitchArg bTypo{ " " , " btypo" , " make basic-typo-tolerant model" , false };
100
120
SwitchArg cTypo{ " " , " ctypo" , " make continual-typo-tolerant model" , false };
121
+ ValueArg<int > repeat{ " " , " repeat" , " repeat evaluation for benchmark" , false , 1 , " int" };
101
122
UnlabeledMultiArg<string> files{ " files" , " evaluation set files" , true , " string" };
102
123
103
124
cmd.add (model);
104
125
cmd.add (output);
105
126
cmd.add (files);
106
- cmd.add (withoutNormCoda);
107
- cmd.add (withoutZCoda);
127
+ cmd.add (noNormCoda);
128
+ cmd.add (noZCoda);
129
+ cmd.add (noMulti);
108
130
cmd.add (useSBG);
109
- cmd.add (typoTolerant);
131
+ cmd.add (typoWeight);
132
+ cmd.add (bTypo);
110
133
cmd.add (cTypo);
134
+ cmd.add (repeat);
111
135
112
136
try
113
137
{
@@ -118,6 +142,7 @@ int main(int argc, const char* argv[])
118
142
cerr << " error: " << e.error () << " for arg " << e.argId () << endl;
119
143
return -1 ;
120
144
}
121
- return doEvaluate (model, output, files.getValue (), !withoutNormCoda, !withoutZCoda, useSBG, typoTolerant, cTypo);
145
+ return doEvaluate (model, output, files.getValue (),
146
+ !noNormCoda, !noZCoda, !noMulti, useSBG, typoWeight, bTypo, cTypo, repeat);
122
147
}
123
148
0 commit comments