changed topN of evaluator to 1 & added more typo options

bab2min · bab2min · commit c6f24274aa95 · 2024-04-14T15:11:12.000+09:00
diff --git a/tools/Evaluator.h b/tools/Evaluator.h
@@ -27,9 +27,9 @@ class Evaluator
 	std::vector<TestResult> testsets, errors;
 	const kiwi::Kiwi* kw = nullptr;
 	kiwi::Match matchOption;
-	size_t topN = 3;
+	size_t topN = 1;
 public:
-	Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 3);
+	Evaluator(const std::string& testSetFile, const kiwi::Kiwi* _kw, kiwi::Match _matchOption = kiwi::Match::all, size_t topN = 1);
 	void run();
 	Score evaluate();
 	const std::vector<TestResult>& getErrors() const { return errors; }
diff --git a/tools/evaluator_main.cpp b/tools/evaluator_main.cpp
@@ -12,13 +12,28 @@ using namespace std;
 using namespace kiwi;
 
 int doEvaluate(const string& modelPath, const string& output, const vector<string>& input, 
-	bool normCoda, bool zCoda, bool useSBG, float typoCostWeight, bool cTypo)
+	bool normCoda, bool zCoda, bool multiDict, bool useSBG, 
+	float typoCostWeight, bool bTypo, bool cTypo,
+	int repeat)
 {
 	try
 	{
+		if (typoCostWeight > 0 && !bTypo && !cTypo)
+		{
+			bTypo = true;
+		}
+		else if (typoCostWeight == 0)
+		{
+			bTypo = false;
+			cTypo = false;
+		}
+
+		DefaultTypoSet typos[] = { DefaultTypoSet::withoutTypo, DefaultTypoSet::basicTypoSet, DefaultTypoSet::continualTypoSet, DefaultTypoSet::basicTypoSetWithContinual};
+
 		tutils::Timer timer;
-		Kiwi kw = KiwiBuilder{ modelPath, 1, BuildOption::default_, useSBG }.build(
-			typoCostWeight > 0 ? (cTypo ? DefaultTypoSet::basicTypoSetWithContinual : DefaultTypoSet::basicTypoSet) : DefaultTypoSet::withoutTypo
+		auto option = (BuildOption::default_ & ~BuildOption::loadMultiDict) | (multiDict ? BuildOption::loadMultiDict : BuildOption::none);
+		Kiwi kw = KiwiBuilder{ modelPath, 1, option, useSBG }.build(
+			typos[(bTypo ? 1 : 0) + (cTypo ? 2 : 0)]
 		);
 		if (typoCostWeight > 0) kw.setTypoCostWeight(typoCostWeight);
 		
@@ -34,10 +49,13 @@ int doEvaluate(const string& modelPath, const string& output, const vector<strin
 			cout << "Test file: " << tf << endl;
 			try
 			{
-				Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda)};
+				Evaluator test{ tf, &kw, (normCoda ? Match::allWithNormalizing : Match::all) & ~(zCoda ? Match::none : Match::zCoda) };
 				tutils::Timer total;
-				test.run();
-				double tm = total.getElapsed();
+				for (int i = 0; i < repeat; ++i)
+				{
+					test.run();
+				}
+				double tm = total.getElapsed() / repeat;
 				auto result = test.evaluate();
 
 				cout << result.micro << ", " << result.macro << endl;
@@ -93,21 +111,27 @@ int main(int argc, const char* argv[])
 
 	ValueArg<string> model{ "m", "model", "Kiwi model path", false, "ModelGenerator", "string" };
 	ValueArg<string> output{ "o", "output", "output dir for evaluation errors", false, "", "string" };
-	SwitchArg withoutNormCoda{ "", "wcoda", "without normalizing coda", false };
-	SwitchArg withoutZCoda{ "", "wzcoda", "without z-coda", false };
+	SwitchArg noNormCoda{ "", "no-normcoda", "without normalizing coda", false };
+	SwitchArg noZCoda{ "", "no-zcoda", "without z-coda", false };
+	SwitchArg noMulti{ "", "no-multi", "turn off multi dict", false };
 	SwitchArg useSBG{ "", "sbg", "use SkipBigram", false };
-	ValueArg<float> typoTolerant{ "", "typo", "make typo-tolerant model", false, 0.f, "float"};
+	ValueArg<float> typoWeight{ "", "typo", "typo weight", false, 0.f, "float"};
+	SwitchArg bTypo{ "", "btypo", "make basic-typo-tolerant model", false };
 	SwitchArg cTypo{ "", "ctypo", "make continual-typo-tolerant model", false };
+	ValueArg<int> repeat{ "", "repeat", "repeat evaluation for benchmark", false, 1, "int" };
 	UnlabeledMultiArg<string> files{ "files", "evaluation set files", true, "string" };
 
 	cmd.add(model);
 	cmd.add(output);
 	cmd.add(files);
-	cmd.add(withoutNormCoda);
-	cmd.add(withoutZCoda);
+	cmd.add(noNormCoda);
+	cmd.add(noZCoda);
+	cmd.add(noMulti);
 	cmd.add(useSBG);
-	cmd.add(typoTolerant);
+	cmd.add(typoWeight);
+	cmd.add(bTypo);
 	cmd.add(cTypo);
+	cmd.add(repeat);
 
 	try
 	{
@@ -118,6 +142,7 @@ int main(int argc, const char* argv[])
 		cerr << "error: " << e.error() << " for arg " << e.argId() << endl;
 		return -1;
 	}
-	return doEvaluate(model, output, files.getValue(), !withoutNormCoda, !withoutZCoda, useSBG, typoTolerant, cTypo);
+	return doEvaluate(model, output, files.getValue(), 
+		!noNormCoda, !noZCoda, !noMulti, useSBG, typoWeight, bTypo, cTypo, repeat);
 }