Skip to content
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f8cfddc
optimize url::can_parse method
anonrig Mar 29, 2026
bae2393
update clang-tools to 22
anonrig Mar 29, 2026
e7b4b56
create AGENTS.md
anonrig Mar 29, 2026
e65faad
remove unused methods
anonrig Mar 29, 2026
2c56335
update comments & abi-check
anonrig Mar 29, 2026
0df4e41
bump SOVERSION to 5 for intentional ABI break
anonrig Mar 29, 2026
6d137d3
fix clang-tidy-22 warnings: noexcept-escape, unchecked-optional-acces…
anonrig Mar 30, 2026
4b29e68
address fuzzing issues
anonrig Mar 30, 2026
9d32bd6
fix throwing-static-init false positive and add clang-tidy to run-cla…
anonrig Mar 30, 2026
c7cb8f8
fix docker clang-tidy: generate compile_commands.json on host, run ti…
anonrig Mar 30, 2026
7592b19
fix gen_compile_commands: drop -stdlib=libc++ when using host GCC
anonrig Mar 30, 2026
457813a
wipe stale cmake cache before gen_compile_commands to drop old CXX_FLAGS
anonrig Mar 30, 2026
c68b315
fix docker clang-tidy: install cmake+ninja in container, use clang++-…
anonrig Mar 30, 2026
95d69d7
wipe build-clang-tidy before docker cmake to avoid generator mismatch
anonrig Mar 30, 2026
b77f4dd
install clang-22 and libc++-22-dev in docker tidy container
anonrig Mar 30, 2026
e9ca11a
reduce apt-get verbosity with -qq flag
anonrig Mar 30, 2026
ce6b89b
add git to docker deps for CPM to clone gtest
anonrig Mar 30, 2026
a9a7322
suppress apt/docker verbosity, fix SSL certs, cache CPM downloads on …
anonrig Mar 30, 2026
a086e30
exclude vendored gtest from clang-tidy and update ExcludeHeaderFilter…
anonrig Mar 30, 2026
5756e68
scope clang-tidy to src/ only, fix git safe.directory, simplify docke…
anonrig Mar 30, 2026
0cc0b23
fix all clang-tidy issues: scope to ada.cpp, NOLINT false positives, …
anonrig Mar 30, 2026
827cc3b
remove .cpm-cache from repo, add to .gitignore
anonrig Mar 30, 2026
c2df2c2
add regression tests for extra-slash fuzzer crashes (ws:///..., ws://…
anonrig Mar 30, 2026
dc4bace
fix % in host: return nullopt to defer to full parser; add regression…
anonrig Mar 30, 2026
9e8d166
fix port leading-zeros: strip zeros before pl>5 check; add regression…
anonrig Mar 30, 2026
32d4de4
fix IPv4 fast path bypassing port validation; add regression test
anonrig Mar 30, 2026
ad5bcad
Update CMakeLists.txt
anonrig Mar 30, 2026
44b0892
Update CMakeLists.txt
anonrig Mar 30, 2026
6522713
Update CMakeLists.txt
anonrig Mar 30, 2026
baa061d
add shortcuts for can_parse slow path
anonrig Mar 31, 2026
5376065
optimize even further (#1111)
anonrig Mar 31, 2026
77c6d5f
Fix error in optimized can_parse (#1118)
CarlosEduR Apr 3, 2026
d287dff
Improve consistency in optimized can_parse (#1119)
CarlosEduR Apr 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ Checks: >
-bugprone-narrowing-conversions,
-bugprone-suspicious-include,
-bugprone-unhandled-exception-at-new,
-bugprone-throwing-static-initialization,
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
# Turn all the warnings from the checks above into errors.
WarningsAsErrors: '*'
# Check first-party (non-system, non-vendored) headers.
HeaderFilterRegex: '.*'
ExcludeHeaderFilterRegex: 'build/_deps/'
ExcludeHeaderFilterRegex: '(build[^/]*/_deps/|\.cpm-cache/|/expected\.h$)'
SystemHeaders: false
17 changes: 15 additions & 2 deletions .github/workflows/abi-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,21 @@ jobs:
- name: Find latest release tag
id: baseline
run: |
# Find the most recent vX.Y.Z tag reachable from the current commit's history
LATEST_TAG=$(git tag --list 'v*.*.*' --sort=-version:refname | head -1)
# Find the most recent vX.Y.Z tag that does NOT point to the current HEAD.
# Excluding HEAD ensures we compare against a previous release even when a
# new release tag was just pushed to main alongside this workflow run.
CURRENT_SHA=$(git rev-parse HEAD)
LATEST_TAG=$(git tag --merged HEAD --list 'v*.*.*' --sort=-version:refname | while IFS= read -r tag; do
TAG_SHA=$(git rev-parse "${tag}^{}" 2>/dev/null)
if [ "$TAG_SHA" != "$CURRENT_SHA" ]; then
echo "$tag"
break
fi
done)
if [ -z "$LATEST_TAG" ]; then
echo "No previous release tag found — cannot establish a baseline."
exit 1
fi
echo "Latest release tag: $LATEST_TAG"
echo "tag=$LATEST_TAG" >> "$GITHUB_OUTPUT"

Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/lint_and_format_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Run clang-format
uses: jidicula/clang-format-action@6cd220de46c89139a0365edae93eee8eb30ca8fe # v4.16.0
with:
clang-format-version: '17'
clang-format-version: '22'
fallback-style: 'Google'

- uses: chartboost/ruff-action@e18ae971ccee1b2d7bbef113930f00c670b78da4 # v1.0.0
Expand All @@ -38,11 +38,15 @@ jobs:
version: 0.6.0

- name: Install clang-tidy and libc++
run: sudo apt-get update && sudo apt-get install -y clang-tidy-20 libc++-20-dev libc++abi-20-dev
run: |
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-22 main" | sudo tee /etc/apt/sources.list.d/llvm.list
sudo apt-get update
sudo apt-get install -y clang-tidy-22 libc++-22-dev libc++abi-22-dev

- name: Run clang-tidy
run: >
cmake -B build -DADA_TESTING=ON -DADA_USE_UNSAFE_STD_REGEX_PROVIDER=ON -DCMAKE_CXX_CLANG_TIDY=clang-tidy-20 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS="-stdlib=libc++" &&
cmake -B build -DADA_TESTING=ON -DADA_USE_UNSAFE_STD_REGEX_PROVIDER=ON -DCMAKE_CXX_CLANG_TIDY=clang-tidy-22 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS="-stdlib=libc++" &&
cmake --build build -j=4
env:
CXX: clang++-20
CXX: clang++-22
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ build-*/
*-build-*
_fuzz_check/

# CPM package cache (used by tools/run-clangcldocker.sh docker fallback)
.cpm-cache/

# Python cache
__pycache__
venv
Expand Down
13 changes: 13 additions & 0 deletions CLAUDE.md → AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@

This guide provides instructions for building, testing, and benchmarking the Ada URL parser library using CMake.

## Pre-commit Checklist

Always run the clang-format and clang-tidy script before committing:

```bash
bash tools/run-clangcldocker.sh
```

This runs clang-format on all tracked source files and clang-tidy on `src/ada.cpp`
(the single translation unit that includes all first-party code). The script uses
the locally installed LLVM 22 toolchain when available, otherwise falls back to the
`xianpengshen/clang-tools:22` Docker image automatically.

## Quick Reference

```bash
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/bench_protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include "counters/bench.h"

template <class Function1, class Function2>
counters::event_aggregate shuffle_bench(Function1 &&function1,
Function2 &&function2,
counters::event_aggregate shuffle_bench(Function1&& function1,
Function2&& function2,
size_t min_repeat = 300,
size_t min_time_ns = 400'000'000,
size_t max_repeat = 1000000,
Expand Down Expand Up @@ -77,7 +77,7 @@ constexpr uint64_t scheme_keys[] = {

// branchless load of up to 5 characters into a uint64_t, padding with zeros if
// n < 5
inline uint64_t branchless_load5(const char *p, size_t n) {
inline uint64_t branchless_load5(const char* p, size_t n) {
uint64_t input = (uint8_t)p[0];
input |= ((uint64_t)(uint8_t)p[n > 1] << 8) & (0 - (uint64_t)(n > 1));
input |= ((uint64_t)(uint8_t)p[(n > 2) * 2] << 16) & (0 - (uint64_t)(n > 2));
Expand Down Expand Up @@ -131,7 +131,7 @@ std::optional<SchemeType> get_scheme_type(std::string_view scheme) noexcept {
return std::nullopt;
}

double pretty_print(const std::string &name, size_t num_values,
double pretty_print(const std::string& name, size_t num_values,
counters::event_aggregate agg) {
printf("%-50s : ", name.c_str());
printf(" %5.3f ns ", agg.fastest_elapsed_ns() / double(num_values));
Expand Down Expand Up @@ -269,7 +269,7 @@ void collect_benchmark_results(size_t number_strings) {
gen.seed(42); // reset seed to ensure same shuffle for all benchmarks
}

int main(int argc, char **argv) {
int main(int argc, char** argv) {
if (!counters::has_performance_counters()) {
printf(
"Performance counters not available, you may need to run with sudo.\n");
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/competitors/servo-url/servo_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ struct Url;

extern "C" {

Url *parse_url(const char *raw_input, size_t raw_input_length);
Url* parse_url(const char* raw_input, size_t raw_input_length);

void free_url(Url *raw);
void free_url(Url* raw);

const char *parse_url_to_href(const char *raw_input, size_t raw_input_length);
const char* parse_url_to_href(const char* raw_input, size_t raw_input_length);

void free_string(const char *);
void free_string(const char*);
} // extern "C"

} // namespace servo_url
Expand Down
2 changes: 1 addition & 1 deletion fuzz/can_parse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "ada.cpp"
#include "ada.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider fdp(data, size);
std::string source = fdp.ConsumeRandomLengthString(256);
std::string base_source = fdp.ConsumeRandomLengthString(256);
Expand Down
2 changes: 1 addition & 1 deletion fuzz/idna.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "ada.cpp"
#include "ada.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider fdp(data, size);
std::string source = fdp.ConsumeRandomLengthString(256);
std::string source2 = fdp.ConsumeRandomLengthString(64);
Expand Down
10 changes: 5 additions & 5 deletions fuzz/parse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#include "ada.cpp"
#include "ada.h"

bool is_valid_utf8_string(const char *buf, size_t len) {
const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
bool is_valid_utf8_string(const char* buf, size_t len) {
const uint8_t* data = reinterpret_cast<const uint8_t*>(buf);
uint64_t pos = 0;
uint32_t code_point = 0;
while (pos < len) {
Expand Down Expand Up @@ -93,7 +93,7 @@ bool is_valid_utf8_string(const char *buf, size_t len) {
}

// Exercise all getters and boolean predicates on ada::url
static void exercise_url_predicates(const ada::url &u) {
static void exercise_url_predicates(const ada::url& u) {
volatile size_t length = 0;
length += u.get_href().size();
length += u.get_origin().size();
Expand All @@ -119,7 +119,7 @@ static void exercise_url_predicates(const ada::url &u) {
}

// Exercise all getters and boolean predicates on ada::url_aggregator
static void exercise_aggregator_predicates(const ada::url_aggregator &u) {
static void exercise_aggregator_predicates(const ada::url_aggregator& u) {
volatile size_t length = 0;
length += u.get_href().size();
length += u.get_origin().size();
Expand Down Expand Up @@ -150,7 +150,7 @@ static void exercise_aggregator_predicates(const ada::url_aggregator &u) {
(void)u.to_diagram();
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider fdp(data, size);
std::string source = fdp.ConsumeRandomLengthString(256);
std::string base = fdp.ConsumeRandomLengthString(256);
Expand Down
14 changes: 7 additions & 7 deletions fuzz/url_search_params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "ada.cpp"
#include "ada.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
FuzzedDataProvider fdp(data, size);
std::string source = fdp.ConsumeRandomLengthString(256);
std::string base_source = fdp.ConsumeRandomLengthString(256);
Expand All @@ -33,7 +33,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
length += val->size();
}
auto all_vals = initialized.get_all(source);
for (const auto &v : all_vals) {
for (const auto& v : all_vals) {
length += v.size();
}
}
Expand All @@ -42,7 +42,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
initialized.sort();

// Test C++ range-for iteration; also verify has(k) and has(k,v) consistency.
for (const auto &pair : initialized) {
for (const auto& pair : initialized) {
length += pair.first.size();
length += pair.second.size();

Expand Down Expand Up @@ -114,7 +114,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
{
auto all_vals = search_params.get_all(source);
length += all_vals.size();
for (const auto &v : all_vals) {
for (const auto& v : all_vals) {
length += v.size();
}
}
Expand Down Expand Up @@ -170,7 +170,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}

// Test C++ range-for on the mutated params
for (const auto &pair : search_params) {
for (const auto& pair : search_params) {
length += pair.first.size();
length += pair.second.size();
}
Expand All @@ -181,7 +181,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
length += search_params.size();

// Test that reset() followed by iteration doesn't crash
for (const auto &pair : search_params) {
for (const auto& pair : search_params) {
length += pair.first.size();
length += pair.second.size();
}
Expand Down Expand Up @@ -270,7 +270,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {

// Size and iteration must not crash.
length += sp_from_url.size();
for (const auto &pair : sp_from_url) {
for (const auto& pair : sp_from_url) {
length += pair.first.size();
length += pair.second.size();
}
Expand Down
6 changes: 3 additions & 3 deletions include/ada/common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@
#endif

// Align to N-byte boundary
#define ADA_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
#define ADA_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
#define ADA_ROUNDUP_N(a, n) (((a) + ((n) - 1)) & ~((n) - 1))
#define ADA_ROUNDDOWN_N(a, n) ((a) & ~((n) - 1))

#define ADA_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
#define ADA_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)

#if defined(ADA_REGULAR_VISUAL_STUDIO)

Expand Down
Loading
Loading