Skip to content

Commit 6051898

Browse files
bartlomiejuclaude
andcommitted
feat: expose simdutf bindings behind simdutf cargo feature
V8 bundles simdutf (SIMD-accelerated Unicode validation/transcoding), but consumers like deno_core can't use a separate simdutf Rust crate alongside rusty_v8 due to C++ symbol clashes. This exposes the bundled simdutf API through rusty_v8 behind a `simdutf` cargo feature flag. Changes: - Cargo.toml: add `simdutf` feature - BUILD.gn: conditionally link simdutf dep and define RUSTY_V8_ENABLE_SIMDUTF - build.rs: wire cargo feature to GN arg, add to prebuilt suffix - binding.cc: add ~250 lines of extern "C" wrappers for simdutf functions (validation, conversion, length calculation, base64), gated by preprocessor - simdutf.rs: safe Rust API wrapping all exposed simdutf operations - lib.rs: register simdutf module behind cfg(feature) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9eaafb5 commit 6051898

File tree

6 files changed

+1141
-1
lines changed

6 files changed

+1141
-1
lines changed

BUILD.gn

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
22
import("//build/config/host_byteorder.gni")
33

4+
declare_args() {
5+
rusty_v8_enable_simdutf = false
6+
}
7+
48
static_library("rusty_v8") {
59
complete_static_lib = true
610
sources = [ "src/binding.cc" ]
@@ -10,6 +14,9 @@ static_library("rusty_v8") {
1014
"//v8:v8_libbase",
1115
"//v8:v8_libplatform",
1216
]
17+
if (rusty_v8_enable_simdutf) {
18+
deps += [ "//third_party/simdutf:simdutf" ]
19+
}
1320
configs -= [
1421
"//build/config/compiler:default_init_stack_vars",
1522
"//build/config/compiler:thin_archive",
@@ -34,11 +41,17 @@ config("rusty_v8_config") {
3441
# internal V8 headers.
3542
include_dirs = [
3643
"v8",
44+
".",
3745
"$target_gen_dir/v8",
3846
]
3947

48+
defines = []
4049
if (is_debug) {
41-
defines = [ "DEBUG" ]
50+
defines += [ "DEBUG" ]
51+
}
52+
53+
if (rusty_v8_enable_simdutf) {
54+
defines += [ "RUSTY_V8_ENABLE_SIMDUTF" ]
4255
}
4356

4457
if (is_clang) {

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ opt-level = 1
102102
[features]
103103
default = ["use_custom_libcxx"]
104104
use_custom_libcxx = []
105+
simdutf = []
105106
v8_enable_pointer_compression = []
106107
v8_enable_sandbox = ["v8_enable_pointer_compression"]
107108
v8_enable_v8_checks = []

build.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ fn build_v8(is_asan: bool) {
311311
env::var("CARGO_FEATURE_V8_ENABLE_V8_CHECKS").is_ok()
312312
));
313313

314+
gn_args.push(format!(
315+
"rusty_v8_enable_simdutf={}",
316+
env::var("CARGO_FEATURE_SIMDUTF").is_ok()
317+
));
318+
314319
// Fix GN's host_cpu detection when using x86_64 bins on Apple Silicon
315320
if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") {
316321
gn_args.push("host_cpu=\"arm64\"".to_string());
@@ -538,6 +543,9 @@ fn prebuilt_features_suffix() -> String {
538543
if env::var("CARGO_FEATURE_V8_ENABLE_SANDBOX").is_ok() {
539544
features.push_str("_sandbox");
540545
}
546+
if env::var("CARGO_FEATURE_SIMDUTF").is_ok() {
547+
features.push_str("_simdutf");
548+
}
541549
features
542550
}
543551

src/binding.cc

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4409,4 +4409,249 @@ RustObj* cppgc__WeakPersistent__Get(cppgc::WeakPersistent<RustObj>* self) {
44094409
return self->Get();
44104410
}
44114411

4412+
// =============================================================================
4413+
// simdutf bindings (gated behind RUSTY_V8_ENABLE_SIMDUTF)
4414+
// =============================================================================
4415+
4416+
#ifdef RUSTY_V8_ENABLE_SIMDUTF
4417+
#include "third_party/simdutf/simdutf.h"
4418+
4419+
struct simdutf__result {
4420+
int error;
4421+
size_t count;
4422+
};
4423+
4424+
static simdutf__result to_ffi_result(simdutf::result r) {
4425+
return {static_cast<int>(r.error), r.count};
4426+
}
4427+
4428+
// --- Validation ---
4429+
4430+
bool simdutf__validate_utf8(const char* buf, size_t len) {
4431+
return simdutf::validate_utf8(buf, len);
4432+
}
4433+
4434+
simdutf__result simdutf__validate_utf8_with_errors(const char* buf,
4435+
size_t len) {
4436+
return to_ffi_result(simdutf::validate_utf8_with_errors(buf, len));
4437+
}
4438+
4439+
bool simdutf__validate_ascii(const char* buf, size_t len) {
4440+
return simdutf::validate_ascii(buf, len);
4441+
}
4442+
4443+
simdutf__result simdutf__validate_ascii_with_errors(const char* buf,
4444+
size_t len) {
4445+
return to_ffi_result(simdutf::validate_ascii_with_errors(buf, len));
4446+
}
4447+
4448+
bool simdutf__validate_utf16le(const char16_t* buf, size_t len) {
4449+
return simdutf::validate_utf16le(buf, len);
4450+
}
4451+
4452+
simdutf__result simdutf__validate_utf16le_with_errors(const char16_t* buf,
4453+
size_t len) {
4454+
return to_ffi_result(simdutf::validate_utf16le_with_errors(buf, len));
4455+
}
4456+
4457+
bool simdutf__validate_utf16be(const char16_t* buf, size_t len) {
4458+
return simdutf::validate_utf16be(buf, len);
4459+
}
4460+
4461+
simdutf__result simdutf__validate_utf16be_with_errors(const char16_t* buf,
4462+
size_t len) {
4463+
return to_ffi_result(simdutf::validate_utf16be_with_errors(buf, len));
4464+
}
4465+
4466+
bool simdutf__validate_utf32(const char32_t* buf, size_t len) {
4467+
return simdutf::validate_utf32(buf, len);
4468+
}
4469+
4470+
simdutf__result simdutf__validate_utf32_with_errors(const char32_t* buf,
4471+
size_t len) {
4472+
return to_ffi_result(simdutf::validate_utf32_with_errors(buf, len));
4473+
}
4474+
4475+
// --- Conversion: UTF-8 <-> UTF-16LE ---
4476+
4477+
size_t simdutf__convert_utf8_to_utf16le(const char* input, size_t length,
4478+
char16_t* output) {
4479+
return simdutf::convert_utf8_to_utf16le(input, length, output);
4480+
}
4481+
4482+
simdutf__result simdutf__convert_utf8_to_utf16le_with_errors(const char* input,
4483+
size_t length,
4484+
char16_t* output) {
4485+
return to_ffi_result(
4486+
simdutf::convert_utf8_to_utf16le_with_errors(input, length, output));
4487+
}
4488+
4489+
size_t simdutf__convert_valid_utf8_to_utf16le(const char* input, size_t length,
4490+
char16_t* output) {
4491+
return simdutf::convert_valid_utf8_to_utf16le(input, length, output);
4492+
}
4493+
4494+
size_t simdutf__convert_utf16le_to_utf8(const char16_t* input, size_t length,
4495+
char* output) {
4496+
return simdutf::convert_utf16le_to_utf8(input, length, output);
4497+
}
4498+
4499+
simdutf__result simdutf__convert_utf16le_to_utf8_with_errors(
4500+
const char16_t* input, size_t length, char* output) {
4501+
return to_ffi_result(
4502+
simdutf::convert_utf16le_to_utf8_with_errors(input, length, output));
4503+
}
4504+
4505+
size_t simdutf__convert_valid_utf16le_to_utf8(const char16_t* input,
4506+
size_t length, char* output) {
4507+
return simdutf::convert_valid_utf16le_to_utf8(input, length, output);
4508+
}
4509+
4510+
// --- Conversion: UTF-8 <-> UTF-16BE ---
4511+
4512+
size_t simdutf__convert_utf8_to_utf16be(const char* input, size_t length,
4513+
char16_t* output) {
4514+
return simdutf::convert_utf8_to_utf16be(input, length, output);
4515+
}
4516+
4517+
size_t simdutf__convert_utf16be_to_utf8(const char16_t* input, size_t length,
4518+
char* output) {
4519+
return simdutf::convert_utf16be_to_utf8(input, length, output);
4520+
}
4521+
4522+
// --- Conversion: UTF-8 <-> Latin-1 ---
4523+
4524+
size_t simdutf__convert_utf8_to_latin1(const char* input, size_t length,
4525+
char* output) {
4526+
return simdutf::convert_utf8_to_latin1(input, length, output);
4527+
}
4528+
4529+
simdutf__result simdutf__convert_utf8_to_latin1_with_errors(const char* input,
4530+
size_t length,
4531+
char* output) {
4532+
return to_ffi_result(
4533+
simdutf::convert_utf8_to_latin1_with_errors(input, length, output));
4534+
}
4535+
4536+
size_t simdutf__convert_valid_utf8_to_latin1(const char* input, size_t length,
4537+
char* output) {
4538+
return simdutf::convert_valid_utf8_to_latin1(input, length, output);
4539+
}
4540+
4541+
size_t simdutf__convert_latin1_to_utf8(const char* input, size_t length,
4542+
char* output) {
4543+
return simdutf::convert_latin1_to_utf8(input, length, output);
4544+
}
4545+
4546+
// --- Conversion: Latin-1 <-> UTF-16LE ---
4547+
4548+
size_t simdutf__convert_latin1_to_utf16le(const char* input, size_t length,
4549+
char16_t* output) {
4550+
return simdutf::convert_latin1_to_utf16le(input, length, output);
4551+
}
4552+
4553+
size_t simdutf__convert_utf16le_to_latin1(const char16_t* input, size_t length,
4554+
char* output) {
4555+
return simdutf::convert_utf16le_to_latin1(input, length, output);
4556+
}
4557+
4558+
// --- Conversion: UTF-8 <-> UTF-32 ---
4559+
4560+
size_t simdutf__convert_utf8_to_utf32(const char* input, size_t length,
4561+
char32_t* output) {
4562+
return simdutf::convert_utf8_to_utf32(input, length, output);
4563+
}
4564+
4565+
size_t simdutf__convert_utf32_to_utf8(const char32_t* input, size_t length,
4566+
char* output) {
4567+
return simdutf::convert_utf32_to_utf8(input, length, output);
4568+
}
4569+
4570+
// --- Length calculation ---
4571+
4572+
size_t simdutf__utf8_length_from_utf16le(const char16_t* input, size_t length) {
4573+
return simdutf::utf8_length_from_utf16le(input, length);
4574+
}
4575+
4576+
size_t simdutf__utf8_length_from_utf16be(const char16_t* input, size_t length) {
4577+
return simdutf::utf8_length_from_utf16be(input, length);
4578+
}
4579+
4580+
size_t simdutf__utf16_length_from_utf8(const char* input, size_t length) {
4581+
return simdutf::utf16_length_from_utf8(input, length);
4582+
}
4583+
4584+
size_t simdutf__utf8_length_from_latin1(size_t length) {
4585+
return simdutf::utf8_length_from_latin1(length);
4586+
}
4587+
4588+
size_t simdutf__latin1_length_from_utf8(const char* input, size_t length) {
4589+
return simdutf::latin1_length_from_utf8(input, length);
4590+
}
4591+
4592+
size_t simdutf__utf32_length_from_utf8(const char* input, size_t length) {
4593+
return simdutf::utf32_length_from_utf8(input, length);
4594+
}
4595+
4596+
size_t simdutf__utf8_length_from_utf32(const char32_t* input, size_t length) {
4597+
return simdutf::utf8_length_from_utf32(input, length);
4598+
}
4599+
4600+
size_t simdutf__utf16_length_from_utf32(const char32_t* input, size_t length) {
4601+
return simdutf::utf16_length_from_utf32(input, length);
4602+
}
4603+
4604+
size_t simdutf__utf32_length_from_utf16le(const char16_t* input,
4605+
size_t length) {
4606+
return simdutf::utf32_length_from_utf16le(input, length);
4607+
}
4608+
4609+
// --- Counting ---
4610+
4611+
size_t simdutf__count_utf8(const char* input, size_t length) {
4612+
return simdutf::count_utf8(input, length);
4613+
}
4614+
4615+
size_t simdutf__count_utf16le(const char16_t* input, size_t length) {
4616+
return simdutf::count_utf16le(input, length);
4617+
}
4618+
4619+
size_t simdutf__count_utf16be(const char16_t* input, size_t length) {
4620+
return simdutf::count_utf16be(input, length);
4621+
}
4622+
4623+
// --- Encoding detection ---
4624+
4625+
int simdutf__detect_encodings(const char* input, size_t length) {
4626+
return simdutf::detect_encodings(input, length);
4627+
}
4628+
4629+
// --- Base64 ---
4630+
4631+
size_t simdutf__maximal_binary_length_from_base64(const char* input,
4632+
size_t length) {
4633+
return simdutf::maximal_binary_length_from_base64(input, length);
4634+
}
4635+
4636+
simdutf__result simdutf__base64_to_binary(const char* input, size_t length,
4637+
char* output, uint64_t options,
4638+
uint64_t last_chunk_options) {
4639+
return to_ffi_result(simdutf::base64_to_binary(
4640+
input, length, output, static_cast<simdutf::base64_options>(options),
4641+
static_cast<simdutf::last_chunk_handling_options>(last_chunk_options)));
4642+
}
4643+
4644+
size_t simdutf__base64_length_from_binary(size_t length, uint64_t options) {
4645+
return simdutf::base64_length_from_binary(
4646+
length, static_cast<simdutf::base64_options>(options));
4647+
}
4648+
4649+
size_t simdutf__binary_to_base64(const char* input, size_t length, char* output,
4650+
uint64_t options) {
4651+
return simdutf::binary_to_base64(
4652+
input, length, output, static_cast<simdutf::base64_options>(options));
4653+
}
4654+
4655+
#endif // RUSTY_V8_ENABLE_SIMDUTF
4656+
44124657
} // extern "C"

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ mod wasm;
8686
pub mod inspector;
8787
pub mod json;
8888
pub mod script_compiler;
89+
#[cfg(feature = "simdutf")]
90+
pub mod simdutf;
8991
// This module is intentionally named "V8" rather than "v8" to match the
9092
// C++ namespace "v8::V8".
9193
#[allow(non_snake_case)]

0 commit comments

Comments
 (0)