@@ -74,7 +74,7 @@ void Normalizer::Init() {
7474util::Status Normalizer::Normalize (absl::string_view input,
7575 std::string *normalized,
7676 std::vector<size_t > *norm_to_orig) const {
77- norm_to_orig->clear ();
77+ if (norm_to_orig) norm_to_orig->clear ();
7878 normalized->clear ();
7979
8080 if (input.empty ()) {
@@ -105,22 +105,20 @@ util::Status Normalizer::Normalize(absl::string_view input,
105105 // Reserves the output buffer to avoid re-allocations.
106106 const size_t kReservedSize = input.size () * 3 ;
107107 normalized->reserve (kReservedSize );
108- norm_to_orig->reserve (kReservedSize );
108+ if (norm_to_orig) norm_to_orig->reserve (kReservedSize );
109109
110110 // Replaces white space with U+2581 (LOWER ONE EIGHT BLOCK)
111111 // if escape_whitespaces() is set (default = true).
112- const absl::string_view kSpaceSymbol = " \xe2\x96\x81 " ;
112+ const absl::string_view kSpaceSymbol =
113+ spec_->escape_whitespaces () ? " \xe2\x96\x81 " : " " ;
113114
114115 // adds kSpaceSymbol to the current context.
115- auto add_ws = [this , &consumed, &normalized, &norm_to_orig, &kSpaceSymbol ]() {
116- if (spec_-> escape_whitespaces ()) {
117- normalized-> append ( kSpaceSymbol . data (), kSpaceSymbol . size ());
116+ auto add_ws = [&consumed, &normalized, &norm_to_orig, &kSpaceSymbol ]() {
117+ normalized-> append ( kSpaceSymbol . data (), kSpaceSymbol . size ());
118+ if (norm_to_orig) {
118119 for (size_t n = 0 ; n < kSpaceSymbol .size (); ++n) {
119120 norm_to_orig->push_back (consumed);
120121 }
121- } else {
122- normalized->append (" " );
123- norm_to_orig->push_back (consumed);
124122 }
125123 };
126124
@@ -143,15 +141,11 @@ util::Status Normalizer::Normalize(absl::string_view input,
143141 if (!sp.empty ()) {
144142 const char *data = sp.data ();
145143 for (size_t n = 0 ; n < sp.size (); ++n) {
146- if (spec_->escape_whitespaces () && data[n] == ' ' ) {
147- // replace ' ' with kSpaceSymbol.
148- normalized->append (kSpaceSymbol .data (), kSpaceSymbol .size ());
149- for (size_t m = 0 ; m < kSpaceSymbol .size (); ++m) {
150- norm_to_orig->push_back (consumed);
151- }
144+ if (data[n] == ' ' ) {
145+ add_ws ();
152146 } else {
153147 *normalized += data[n];
154- norm_to_orig->push_back (consumed);
148+ if (norm_to_orig) norm_to_orig->push_back (consumed);
155149 }
156150 }
157151 // Checks whether the last character of sp is whitespace.
@@ -167,31 +161,31 @@ util::Status Normalizer::Normalize(absl::string_view input,
167161
168162 // Ignores trailing space.
169163 if (spec_->remove_extra_whitespaces ()) {
170- const absl::string_view space =
171- spec_->escape_whitespaces () ? kSpaceSymbol : " " ;
172- while (absl::EndsWith (*normalized, space)) {
173- const int length = normalized->size () - space.size ();
164+ while (absl::EndsWith (*normalized, kSpaceSymbol )) {
165+ const int length = normalized->size () - kSpaceSymbol .size ();
174166 RET_CHECK_GE (length, 0 );
175- consumed = (*norm_to_orig)[length];
176167 normalized->resize (length);
177- norm_to_orig->resize (length);
168+ if (norm_to_orig) {
169+ consumed = (*norm_to_orig)[length];
170+ norm_to_orig->resize (length);
171+ }
178172 }
179173 }
180174
181175 // Adds a space symbol as a suffix (default is false)
182176 if (treat_whitespace_as_suffix_ && spec_->add_dummy_prefix ()) add_ws ();
183177
184- norm_to_orig->push_back (consumed);
185-
186- RET_CHECK_EQ (norm_to_orig->size (), normalized->size () + 1 );
178+ if (norm_to_orig) {
179+ norm_to_orig->push_back (consumed);
180+ RET_CHECK_EQ (norm_to_orig->size (), normalized->size () + 1 );
181+ }
187182
188183 return util::OkStatus ();
189184}
190185
191186std::string Normalizer::Normalize (absl::string_view input) const {
192- std::vector<size_t > norm_to_orig;
193187 std::string normalized;
194- Normalize (input, &normalized, &norm_to_orig ).IgnoreError ();
188+ Normalize (input, &normalized, nullptr ).IgnoreError ();
195189 return normalized;
196190}
197191
0 commit comments