@@ -238,14 +238,13 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) {
238238 if (tag == " COERROR" ) { // COERROR kept for backward-compatibility
239239 r.coerror = true ;
240240 }
241- else if (tag == " &SUGGEST" ||
242- tag ==
243- " SUGGEST" ) { // &SUGGEST kept for backward-compatibility
241+ else if (tag == " DROP-PRE-BLANK" ) {
242+ r.drop_pre_blank = true ;
243+ }
244+ else if (tag == " &SUGGEST" || tag == " SUGGEST" ) { // &SUGGEST kept for backward-compatibility
244245 r.suggest = true ;
245246 }
246- else if (tag == " &SUGGESTWF" ||
247- tag ==
248- " SUGGESTWF" ) { // &SUGGESTWF kept for backward-compatibility
247+ else if (tag == " &SUGGESTWF" || tag == " SUGGESTWF" ) { // &SUGGESTWF kept for backward-compatibility
249248 r.suggestwf = true ;
250249 }
251250 else if (result.empty ()) {
@@ -261,9 +260,7 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) {
261260 else if (tag == " &ADDED-BEFORE-BLANK" ) {
262261 r.added = AddedBeforeBlank;
263262 }
264- else if (tag == " &LINK" ||
265- tag ==
266- " &COERROR" ) { // &LINK kept for backward-compatibility
263+ else if (tag == " &LINK" || tag == " &COERROR" ) { // &LINK kept for backward-compatibility
267264 r.coerror = true ;
268265 }
269266 else {
@@ -350,6 +347,7 @@ const Reading proc_reading(const hfst::HfstTransducer& generator,
350347 r.sforms .insert (r.sforms .end (), sub.sforms .begin (), sub.sforms .end ());
351348 r.wf = r.wf .empty () ? sub.wf : r.wf ;
352349 r.fixedcase |= sub.fixedcase ;
350+ r.drop_pre_blank |= sub.drop_pre_blank ;
353351 }
354352 dedupe (r.rels );
355353 if (r.suggest ) {
@@ -577,47 +575,30 @@ build_squiggle_replacement(const Reading& r, const ErrId& err_id,
577575 std::map<pair<size_t , size_t >, pair<u16string, Reading>>
578576 add; // position in text:cohort in Sentence
579577 // Loop from the leftmost to the rightmost of source and target cohorts:
580- if (verbose)
581- std::cerr << " \033 [1;31m=== err_id=\t " << toUtf8 (err_id)
582- << " ===\033 [0m" << std::endl;
583- if (verbose)
584- std::cerr << " \033 [1;33mr.id=\t " << r.id << " \033 [0m" << std::endl;
585- if (verbose)
586- std::cerr << " \033 [1;33msrc.id=\t " << src.id << " \033 [0m" << std::endl;
587- if (verbose)
588- std::cerr << " \033 [1;33mi_c=\t " << i_c << " \033 [0m" << std::endl;
589- if (verbose)
590- std::cerr << " \033 [1;33mleft=\t " << i_left << " \033 [0m" << std::endl;
591- if (verbose)
592- std::cerr << " \033 [1;33mright=\t " << i_right << " \033 [0m" << std::endl;
578+ if (verbose) std::cerr << " \033 [1;31m=== err_id=\t " << toUtf8 (err_id) << " ===\033 [0m" << std::endl;
579+ if (verbose) std::cerr << " \033 [1;33mr.id=\t " << r.id << " \033 [0m" << std::endl;
580+ if (verbose) if (r.drop_pre_blank ) std::cerr << " \033 [1;33mr.drop_pre_blank=\t " << r.drop_pre_blank << " \033 [0m" << std::endl;
581+ if (verbose) std::cerr << " \033 [1;33msrc.id=\t " << src.id << " \033 [0m" << std::endl;
582+ if (verbose) std::cerr << " \033 [1;33mi_c=\t " << i_c << " \033 [0m" << std::endl;
583+ if (verbose) std::cerr << " \033 [1;33mleft=\t " << i_left << " \033 [0m" << std::endl;
584+ if (verbose) std::cerr << " \033 [1;33mright=\t " << i_right << " \033 [0m" << std::endl;
593585 UStringVector reps = { u" " };
594- UStringVector
595- reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps
596586 string prev_added_before_blank = " " ;
597587 std::optional<Casing> addedcasing = std::nullopt ;
598588 for (size_t i = i_left; i <= i_right; ++i) {
599589 const auto & trg = sentence.cohorts [i];
600590 Casing casing = getCasing (toUtf8 (trg.form ));
601591
602- if (verbose)
603- std::cerr << " \033 [1;34mi=\t " << i << " \033 [0m" << std::endl;
604- if (verbose)
605- std::cerr << " \033 [1;34mtrg.form=\t '" << toUtf8 (trg.form )
606- << " '\033 [0m" << std::endl;
607- if (verbose)
608- std::cerr << " \033 [1;34mtrg.id=\t " << trg.id << " \033 [0m"
609- << std::endl;
610- if (verbose)
611- std::cerr << " \033 [1;35mtrg.raw_pre_blank=\t '" << trg.raw_pre_blank
612- << " '\033 [0m" << std::endl;
592+ if (verbose) std::cerr << " \033 [1;34mi=\t " << i << " \033 [0m" << std::endl;
593+ if (verbose) std::cerr << " \033 [1;34mtrg.form=\t '" << toUtf8 (trg.form ) << " '\033 [0m" << std::endl;
594+ if (verbose) std::cerr << " \033 [1;34mtrg.id=\t " << trg.id << " \033 [0m" << std::endl;
595+ if (verbose) std::cerr << " \033 [1;35mtrg.raw_pre_blank=\t '" << trg.raw_pre_blank << " '\033 [0m" << std::endl;
613596
614597 UStringVector rep_this_trg;
615598 const bool del = do_delete (trg, err_id, src.errtypes , deletions);
616599 if (del) {
617600 rep_this_trg.push_back (u" " );
618- if (verbose)
619- std::cerr << " \t\t\033 [1;36mdelete=\t " << toUtf8 (trg.form )
620- << " \033 [0m" << std::endl;
601+ if (verbose) std::cerr << " \t\t\033 [1;36mdelete=\t " << toUtf8 (trg.form ) << " \033 [0m" << std::endl;
621602 }
622603
623604 if (trg.added ) {
@@ -642,18 +623,12 @@ build_squiggle_replacement(const Reading& r, const ErrId& err_id,
642623 bool applies_deletion = trg.id == src.id && src_applies_deletion;
643624 size_t trg_beg = trg.pos ;
644625 size_t trg_end = trg.pos + trg.form .size ();
645- for (const Reading& tr :
646- readings_with_errtype (trg, err_id, applies_deletion)) {
647- if (verbose)
648- std::cerr << " \033 [1;32mtr.line=\t " << tr.line << " \033 [0m"
649- << std::endl;
626+ for (const Reading& tr : readings_with_errtype (trg, err_id, applies_deletion)) {
627+ if (verbose) std::cerr << " \033 [1;32mtr.line=\t " << tr.line << " \033 [0m" << std::endl;
650628 // Update beg/end:
651629 if (tr.added == AddedBeforeBlank) {
652630 if (i == 0 ) {
653- std::cerr
654- << " divvun-suggest: WARNING: Saw &ADDED-BEFORE-BLANK on "
655- " initial word, ignoring"
656- << std::endl;
631+ std::cerr << " divvun-suggest: WARNING: Saw &ADDED-BEFORE-BLANK on " " initial word, ignoring" << std::endl;
657632 continue ;
658633 }
659634 const auto & pretrg = sentence.cohorts [i - 1 ];
@@ -665,40 +640,21 @@ build_squiggle_replacement(const Reading& r, const ErrId& err_id,
665640 NotAdded) { // Don't replace existing form if Added/AddedBeforeBlank
666641 trg_end = trg_beg;
667642 }
668- if (verbose)
669- std::cerr << " \t\033 [1;35mr.wf='" << tr.wf << " '\033 [0m" ;
670- if (verbose)
671- std::cerr << " \t\033 [0;35mr.coerror=" << tr.coerror
672- << " \033 [0m" ;
673- if (verbose)
674- std::cerr << " \t\033 [0;35mr.suggestwf=" << tr.suggestwf
675- << " \033 [0m" ;
676- if (verbose)
677- std::cerr << " \t\033 [0;35mr.suggest=" << tr.suggest
678- << " \033 [0m" << " \t " << tr.line ;
643+ if (verbose) std::cerr << " \t\033 [1;35mr.wf='" << tr.wf << " '\033 [0m" ;
644+ if (verbose) std::cerr << " \t\033 [0;35mr.coerror=" << tr.coerror << " \033 [0m" ;
645+ if (verbose) std::cerr << " \t\033 [0;35mr.suggestwf=" << tr.suggestwf << " \033 [0m" ;
646+ if (verbose) std::cerr << " \t\033 [0;35mr.suggest=" << tr.suggest << " \033 [0m" << " \t " << tr.line ;
679647 // Collect SUGGEST/SUGGESTWF:
680648 if (!del)
681649 for (const auto & sf : tr.sforms ) {
682650 const auto cased_sf =
683651 fromUtf8 (withCasing (tr.fixedcase , casing, sf));
684652 rep_this_trg.push_back (cased_sf);
685- if (tr.suggestwf ) {
686- if (i == i_c) {
687- reps_suggestwf.push_back (cased_sf);
688- }
689- else {
690- std::cerr
691- << " divvun-suggest: WARNING: Saw SUGGESTWF on "
692- " non-central (co-)cohort, ignoring"
693- << std::endl;
694- }
695- }
696- if (verbose)
697- std::cerr << " \t\t\033 [1;36msform=\t '" << sf
698- << " '\033 [0m" << std::endl;
653+ if (verbose) std::cerr << " \t\t\033 [1;36msform=\t '" << sf << " '\033 [0m" << std::endl;
699654 }
700655 fixedcase |= tr.fixedcase ; // for the surface form
701656 } // end for readings of target
657+
702658 if (rep_this_trg.empty ()) {
703659 const auto cased_sf =
704660 fromUtf8 (withCasing (fixedcase, casing, toUtf8 (trg.form )));
@@ -710,10 +666,9 @@ build_squiggle_replacement(const Reading& r, const ErrId& err_id,
710666 UStringVector reps_next;
711667 for (auto & rep : reps) {
712668 // Prepend blank unless at left edge:
713- const auto pre_blank =
714- i == i_left || added_before_blank ?
715- " " :
716- clean_blank (prev_added_before_blank + trg.raw_pre_blank );
669+ const auto pre_blank = i == i_left || added_before_blank || r.drop_pre_blank ?
670+ " " :
671+ clean_blank (prev_added_before_blank + trg.raw_pre_blank );
717672 // For &ADDED, enclose in blanks (unneeded blanks will get cleaned later):
718673 const auto post_blank = trg.added ? u" " : u" " ;
719674 for (const auto & sform : rep_this_trg) {
@@ -732,13 +687,8 @@ build_squiggle_replacement(const Reading& r, const ErrId& err_id,
732687 rep.erase (1 + rep.find_last_not_of (' ' ));
733688 rep.erase (0 , rep.find_first_not_of (' ' ));
734689 }
735- if (verbose)
736- for (const auto & sf : reps) {
737- std::cerr << " \033 [1;35mreps sf=\t '" << toUtf8 (sf) << " '\033 [0m\t "
738- << beg << " ," << end << std::endl;
739- }
740- return std::make_pair (std::make_pair (beg, end),
741- reps_suggestwf.empty () ? reps : reps_suggestwf);
690+ if (verbose) for (const auto & sf : reps) {std::cerr << " \033 [1;35mreps sf=\t '" << toUtf8 (sf) << " '\033 [0m\t " << beg << " ," << end << std::endl;}
691+ return std::make_pair (std::make_pair (beg, end), reps);
742692}
743693
744694variant<Nothing, Err> Suggest::cohort_errs (const ErrId& err_id, size_t i_c,
0 commit comments