Skip to content

Commit 4d6bdb1

Browse files
committed
optimize urlpattern::test
1 parent 236e519 commit 4d6bdb1

File tree

2 files changed

+108
-23
lines changed

2 files changed

+108
-23
lines changed

include/ada/url_pattern-inl.h

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,9 @@ url_pattern_component<regex_provider>::create_component_match_result(
4646
// says we should start from 1. This case is handled by the
4747
// std_regex_provider.
4848
for (size_t index = 0; index < exec_result.size(); index++) {
49-
result.groups.insert({
50-
group_name_list[index],
51-
std::move(exec_result[index]),
52-
});
49+
// Optimized: Use emplace instead of insert for better performance
50+
result.groups.emplace(group_name_list[index],
51+
std::move(exec_result[index]));
5352
}
5453
return result;
5554
}
@@ -204,16 +203,102 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
204203

205204
template <url_pattern_regex::regex_concept regex_provider>
206205
result<bool> url_pattern<regex_provider>::test(
207-
const url_pattern_input& input, const std::string_view* base_url) {
208-
// TODO: Optimization opportunity. Rather than returning `url_pattern_result`
209-
// Implement a fast path just like `can_parse()` in ada_url.
210-
// Let result be the result of match given this's associated URL pattern,
211-
// input, and baseURL if given.
212-
// If result is null, return false.
213-
if (auto result = match(input, base_url); result.has_value()) {
214-
return result->has_value();
206+
const url_pattern_input& input, const std::string_view* base_url_string) {
207+
// Fast path: Use regex_match instead of full match() to avoid building result
208+
// objects
209+
std::string protocol{};
210+
std::string username{};
211+
std::string password{};
212+
std::string hostname{};
213+
std::string port{};
214+
std::string pathname{};
215+
std::string search{};
216+
std::string hash{};
217+
218+
// If input is a URLPatternInit then:
219+
if (std::holds_alternative<url_pattern_init>(input)) {
220+
// If baseURLString was given, throw a TypeError.
221+
if (base_url_string) {
222+
return tl::unexpected(errors::type_error);
223+
}
224+
225+
auto apply_result = url_pattern_init::process(
226+
std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
227+
protocol, username, password, hostname, port, pathname, search, hash);
228+
229+
if (!apply_result.has_value()) {
230+
return false;
231+
}
232+
233+
ADA_ASSERT_TRUE(apply_result->protocol.has_value());
234+
protocol = std::move(apply_result->protocol.value());
235+
ADA_ASSERT_TRUE(apply_result->username.has_value());
236+
username = std::move(apply_result->username.value());
237+
ADA_ASSERT_TRUE(apply_result->password.has_value());
238+
password = std::move(apply_result->password.value());
239+
ADA_ASSERT_TRUE(apply_result->hostname.has_value());
240+
hostname = std::move(apply_result->hostname.value());
241+
ADA_ASSERT_TRUE(apply_result->port.has_value());
242+
port = std::move(apply_result->port.value());
243+
ADA_ASSERT_TRUE(apply_result->pathname.has_value());
244+
pathname = std::move(apply_result->pathname.value());
245+
ADA_ASSERT_TRUE(apply_result->search.has_value());
246+
if (apply_result->search->starts_with("?")) {
247+
search = apply_result->search->substr(1);
248+
} else {
249+
search = std::move(apply_result->search.value());
250+
}
251+
ADA_ASSERT_TRUE(apply_result->hash.has_value());
252+
ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
253+
hash = std::move(apply_result->hash.value());
254+
} else {
255+
ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
256+
257+
result<url_aggregator> base_url;
258+
259+
if (base_url_string) {
260+
base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
261+
if (!base_url) {
262+
return false;
263+
}
264+
}
265+
266+
url_aggregator* base_url_value =
267+
base_url.has_value() ? &*base_url : nullptr;
268+
269+
auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
270+
base_url_value);
271+
272+
if (!url) {
273+
return false;
274+
}
275+
276+
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
277+
username = url->get_username();
278+
password = url->get_password();
279+
hostname = url->get_hostname();
280+
port = url->get_port();
281+
pathname = url->get_pathname();
282+
if (url->has_search()) {
283+
auto view = url->get_search();
284+
search = view.starts_with("?") ? url->get_search().substr(1) : view;
285+
}
286+
if (url->has_hash()) {
287+
auto view = url->get_hash();
288+
hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
289+
}
215290
}
216-
return tl::unexpected(errors::type_error);
291+
292+
// Fast path: Just use regex_match for boolean testing, no need to extract
293+
// groups
294+
return regex_provider::regex_match(protocol, protocol_component.regexp) &&
295+
regex_provider::regex_match(username, username_component.regexp) &&
296+
regex_provider::regex_match(password, password_component.regexp) &&
297+
regex_provider::regex_match(hostname, hostname_component.regexp) &&
298+
regex_provider::regex_match(port, port_component.regexp) &&
299+
regex_provider::regex_match(pathname, pathname_component.regexp) &&
300+
regex_provider::regex_match(search, search_component.regexp) &&
301+
regex_provider::regex_match(hash, hash_component.regexp);
217302
}
218303

219304
template <url_pattern_regex::regex_concept regex_provider>
@@ -331,7 +416,8 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
331416
// IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
332417
// is removed. Similar work was done on workerd:
333418
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
334-
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
419+
auto protocol_view = url->get_protocol();
420+
protocol.assign(protocol_view.data(), protocol_view.size() - 1);
335421
// Set username to url's username.
336422
username = url->get_username();
337423
// Set password to url's password.
@@ -349,16 +435,17 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
349435
// is removed. Similar work was done on workerd:
350436
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
351437
if (url->has_search()) {
352-
auto view = url->get_search();
353-
search = view.starts_with("?") ? url->get_search().substr(1) : view;
438+
auto search_view = url->get_search();
439+
search =
440+
search_view.starts_with("?") ? search_view.substr(1) : search_view;
354441
}
355442
// Set hash to url's fragment or the empty string if the value is null.
356443
// IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
357444
// removed. Similar work was done on workerd:
358445
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
359446
if (url->has_hash()) {
360-
auto view = url->get_hash();
361-
hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
447+
auto hash_view = url->get_hash();
448+
hash = hash_view.starts_with("#") ? hash_view.substr(1) : hash_view;
362449
}
363450
}
364451

src/url_pattern_regex.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,9 @@ std::optional<std::regex> std_regex_provider::create_instance(
2525
std::optional<std::vector<std::optional<std::string>>>
2626
std_regex_provider::regex_search(std::string_view input,
2727
const std::regex& pattern) {
28-
std::string input_str(
29-
input.begin(),
30-
input.end()); // Convert string_view to string for regex_search
31-
std::smatch match_result;
32-
if (!std::regex_search(input_str, match_result, pattern,
28+
// Use iterator-based regex_search to avoid string allocation
29+
std::match_results<std::string_view::const_iterator> match_result;
30+
if (!std::regex_search(input.begin(), input.end(), match_result, pattern,
3331
std::regex_constants::match_any)) {
3432
return std::nullopt;
3533
}

0 commit comments

Comments
 (0)