Skip to content

Commit de32a1e

Browse files
committed
Merges internal changes to OSS.
1 parent 80320a3 commit de32a1e

1 file changed

Lines changed: 21 additions & 27 deletions

File tree

src/normalizer.cc

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void Normalizer::Init() {
7474
util::Status Normalizer::Normalize(absl::string_view input,
7575
std::string *normalized,
7676
std::vector<size_t> *norm_to_orig) const {
77-
norm_to_orig->clear();
77+
if (norm_to_orig) norm_to_orig->clear();
7878
normalized->clear();
7979

8080
if (input.empty()) {
@@ -105,22 +105,20 @@ util::Status Normalizer::Normalize(absl::string_view input,
105105
// Reserves the output buffer to avoid re-allocations.
106106
const size_t kReservedSize = input.size() * 3;
107107
normalized->reserve(kReservedSize);
108-
norm_to_orig->reserve(kReservedSize);
108+
if (norm_to_orig) norm_to_orig->reserve(kReservedSize);
109109

110110
// Replaces white space with U+2581 (LOWER ONE EIGHT BLOCK)
111111
// if escape_whitespaces() is set (default = true).
112-
const absl::string_view kSpaceSymbol = "\xe2\x96\x81";
112+
const absl::string_view kSpaceSymbol =
113+
spec_->escape_whitespaces() ? "\xe2\x96\x81" : " ";
113114

114115
// adds kSpaceSymbol to the current context.
115-
auto add_ws = [this, &consumed, &normalized, &norm_to_orig, &kSpaceSymbol]() {
116-
if (spec_->escape_whitespaces()) {
117-
normalized->append(kSpaceSymbol.data(), kSpaceSymbol.size());
116+
auto add_ws = [&consumed, &normalized, &norm_to_orig, &kSpaceSymbol]() {
117+
normalized->append(kSpaceSymbol.data(), kSpaceSymbol.size());
118+
if (norm_to_orig) {
118119
for (size_t n = 0; n < kSpaceSymbol.size(); ++n) {
119120
norm_to_orig->push_back(consumed);
120121
}
121-
} else {
122-
normalized->append(" ");
123-
norm_to_orig->push_back(consumed);
124122
}
125123
};
126124

@@ -143,15 +141,11 @@ util::Status Normalizer::Normalize(absl::string_view input,
143141
if (!sp.empty()) {
144142
const char *data = sp.data();
145143
for (size_t n = 0; n < sp.size(); ++n) {
146-
if (spec_->escape_whitespaces() && data[n] == ' ') {
147-
// replace ' ' with kSpaceSymbol.
148-
normalized->append(kSpaceSymbol.data(), kSpaceSymbol.size());
149-
for (size_t m = 0; m < kSpaceSymbol.size(); ++m) {
150-
norm_to_orig->push_back(consumed);
151-
}
144+
if (data[n] == ' ') {
145+
add_ws();
152146
} else {
153147
*normalized += data[n];
154-
norm_to_orig->push_back(consumed);
148+
if (norm_to_orig) norm_to_orig->push_back(consumed);
155149
}
156150
}
157151
// Checks whether the last character of sp is whitespace.
@@ -167,31 +161,31 @@ util::Status Normalizer::Normalize(absl::string_view input,
167161

168162
// Ignores trailing space.
169163
if (spec_->remove_extra_whitespaces()) {
170-
const absl::string_view space =
171-
spec_->escape_whitespaces() ? kSpaceSymbol : " ";
172-
while (absl::EndsWith(*normalized, space)) {
173-
const int length = normalized->size() - space.size();
164+
while (absl::EndsWith(*normalized, kSpaceSymbol)) {
165+
const int length = normalized->size() - kSpaceSymbol.size();
174166
RET_CHECK_GE(length, 0);
175-
consumed = (*norm_to_orig)[length];
176167
normalized->resize(length);
177-
norm_to_orig->resize(length);
168+
if (norm_to_orig) {
169+
consumed = (*norm_to_orig)[length];
170+
norm_to_orig->resize(length);
171+
}
178172
}
179173
}
180174

181175
// Adds a space symbol as a suffix (default is false)
182176
if (treat_whitespace_as_suffix_ && spec_->add_dummy_prefix()) add_ws();
183177

184-
norm_to_orig->push_back(consumed);
185-
186-
RET_CHECK_EQ(norm_to_orig->size(), normalized->size() + 1);
178+
if (norm_to_orig) {
179+
norm_to_orig->push_back(consumed);
180+
RET_CHECK_EQ(norm_to_orig->size(), normalized->size() + 1);
181+
}
187182

188183
return util::OkStatus();
189184
}
190185

191186
std::string Normalizer::Normalize(absl::string_view input) const {
192-
std::vector<size_t> norm_to_orig;
193187
std::string normalized;
194-
Normalize(input, &normalized, &norm_to_orig).IgnoreError();
188+
Normalize(input, &normalized, nullptr).IgnoreError();
195189
return normalized;
196190
}
197191

0 commit comments

Comments
 (0)