1919
2020namespace divvun {
2121
22- Normaliser::Normaliser (const hfst::HfstTransducer* normaliser_,
23- const hfst::HfstTransducer* generator_,
22+ Normaliser::Normaliser (const hfst::HfstTransducer* generator_,
2423 const hfst::HfstTransducer* sanalyser_,
25- const hfst::HfstTransducer* danalyser_, const vector<string>& tags_,
26- bool verbose_, bool trace_, bool debug_)
27- : normaliser(normaliser_)
28- , generator(generator_)
24+ const hfst::HfstTransducer* danalyser_, bool verbose_, bool trace_,
25+ bool debug_)
26+ : generator(generator_)
2927 , sanalyser(sanalyser_)
3028 , danalyser(danalyser_)
31- , tags(tags_)
3229 , verbose(verbose_)
3330 , trace(trace_)
3431 , debug(debug_) {}
3532
36- Normaliser::Normaliser (const string& normaliser_, const string& generator_,
37- const string& sanalyser_, const string& danalyser_,
38- const vector<string>& tags_, bool verbose_, bool trace_, bool debug_) {
33+ Normaliser::Normaliser (const string& generator_, const string& sanalyser_,
34+ const string& danalyser_, bool verbose_, bool trace_, bool debug_) {
3935 debug = debug_;
4036 verbose = verbose_;
4137 trace = trace_;
4238 if (verbose_) {
4339 std::cout << " Reading files: " << std::endl;
44- std::cout << " * " << normaliser_ << std::endl;
4540 if (trace_) {
4641 std::cout << " Printing traces" << std::endl;
4742 }
4843 if (debug_) {
4944 std::cout << " Printing debugs" << std::endl;
5045 }
5146 }
52- if (normaliser_ != " " ) {
53- normaliser = std::unique_ptr<const hfst::HfstTransducer>(
54- (readTransducer (normaliser_)));
55- }
5647 if (verbose_) {
5748 std::cout << " * " << generator_ << std::endl;
5849 }
@@ -74,17 +65,28 @@ Normaliser::Normaliser(const string& normaliser_, const string& generator_,
7465 danalyser = std::unique_ptr<const hfst::HfstTransducer>(
7566 (readTransducer (danalyser_)));
7667 }
77- if (verbose_) {
78- std::cout << " expanding tags: " ;
79- for (auto tag : tags_) {
80- std::cout << tag << " " ;
81- }
82- std::cout << std::endl;
83- }
84- tags = tags_;
8568 verbose = verbose_;
8669}
8770
71+ void Normaliser::addNormaliser (
72+ const std::string& tag, const hfst::HfstTransducer* nromaliser_) {
73+ if (verbose) {
74+ std::cout << " adding HFST transducer for tag " << tag << std::endl;
75+ }
76+ normalisers[tag] =
77+ std::unique_ptr<const hfst::HfstTransducer>(nromaliser_);
78+ }
79+
80+ void Normaliser::addNormaliser (
81+ const std::string& tag, const std::string& normaliser_) {
82+ if (verbose) {
83+ std::cout << " REading " << normaliser_ << " for tag " << tag
84+ << std::endl;
85+ }
86+ normalisers[tag] =
87+ std::unique_ptr<const hfst::HfstTransducer>(readTransducer (normaliser_));
88+ }
89+
8890void Normaliser::mangle_reading (CGReading& reading, std::ostream& os) {
8991 string outstring = string (reading.reading );
9092 string surf = " " ; // XXX
@@ -94,18 +96,15 @@ void Normaliser::mangle_reading(CGReading& reading, std::ostream& os) {
9496 auto tabend = outstring.find (" \" " );
9597 auto tabs = outstring.substr (tabstart, tabend);
9698 bool everythinghasfailed = true ;
97- if (tags.empty ()) {
98- everythinghasfailed = false ;
99- // os << outstring << std::endl;
100- }
101- bool expand = false ;
99+ std::string expandtag;
102100 bool expandmain = false ;
103- for (auto tag : tags ) {
104- if (outstring.find (tag ) != std::string::npos) {
101+ for (auto & normaliser : normalisers ) {
102+ if (outstring.find (normaliser. first ) != std::string::npos) {
105103 if (debug) {
106- std::cout << " Expanding because of " << tag << std::endl;
104+ std::cout << " Expanding because of " << normaliser.first
105+ << std::endl;
107106 }
108- expand = true ;
107+ expandtag = normaliser. first ;
109108 }
110109 }
111110 if (reading.subreading != nullptr ) {
@@ -153,12 +152,14 @@ void Normaliser::mangle_reading(CGReading& reading, std::ostream& os) {
153152 std::cout << " Using lemma: " << surf << std::endl;
154153 }
155154 }
156- if (expand ) {
155+ if (!expandtag. empty () ) {
157156 // 1. apply expansions from normaliser
158157 if (debug) {
159- std::cout << " 1. looking up normaliser for " << surf << std::endl;
158+ std::cout << " 1. looking up " << expandtag << " normaliser for "
159+ << surf << std::endl;
160160 }
161- const HfstPaths1L expansions (normaliser->lookup_fd (surf, -1 , 2.0 ));
161+ const HfstPaths1L expansions (
162+ normalisers[expandtag]->lookup_fd (surf, -1 , 2.0 ));
162163 if (expansions->empty ()) {
163164 if (debug) {
164165 std::cout << " Normaliser results empty." << std::endl;
@@ -258,11 +259,11 @@ void Normaliser::mangle_reading(CGReading& reading, std::ostream& os) {
258259 p = s.find (r);
259260 }
260261 }
261- for (auto tag : tags ) {
262- p = s.find (" +" + tag );
262+ for (auto & normaliser : normalisers ) {
263+ p = s.find (" +" + normaliser. first );
263264 while (p != std::string::npos) {
264- s.replace (p, tag .length () + 1 , " " );
265- p = s.find (tag );
265+ s.replace (p, normaliser. first .length () + 1 , " " );
266+ p = s.find (normaliser. first );
266267 }
267268 }
268269 regentags = s;
@@ -490,11 +491,11 @@ void Normaliser::mangle_reading(CGReading& reading, std::ostream& os) {
490491 p = s.find (r);
491492 }
492493 }
493- for (auto tag : tags ) {
494- p = s.find (" +" + tag );
494+ for (auto & normaliser : normalisers ) {
495+ p = s.find (" +" + normaliser. first );
495496 while (p != std::string::npos) {
496- s.replace (p, tag .length () + 1 , " " );
497- p = s.find (tag );
497+ s.replace (p, normaliser. first .length () + 1 , " " );
498+ p = s.find (normaliser. first );
498499 }
499500 }
500501 regentags = s;
0 commit comments