Skip to content

buffer: add fast api for isAscii & isUtf8 #58058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,40 @@ void Swap64(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(args[0]);
}

bool FastIsUtf8(v8::Local<v8::Value>,
Local<Value> buffer,
// NOLINTNEXTLINE(runtime/references) This is V8 api.
FastApiCallbackOptions& options) {
TRACK_V8_FAST_API_CALL("buffer.isUtf8");
ArrayBufferViewContents<uint8_t> view(buffer);
if (view.WasDetached()) {
node::THROW_ERR_INVALID_STATE(options.isolate,
"Cannot validate on a detached buffer");
return false;
}
return simdutf::validate_utf8(reinterpret_cast<const char*>(view.data()),
view.length());
}

static v8::CFunction fast_is_utf8(v8::CFunction::Make(FastIsUtf8));

bool FastIsAscii(v8::Local<v8::Value>,
Local<Value> buffer,
// NOLINTNEXTLINE(runtime/references) This is V8 api.
FastApiCallbackOptions& options) {
TRACK_V8_FAST_API_CALL("buffer.isAscii");
ArrayBufferViewContents<uint8_t> view(buffer);
if (view.WasDetached()) {
node::THROW_ERR_INVALID_STATE(options.isolate,
"Cannot validate on a detached buffer");
return false;
}
return simdutf::validate_ascii(reinterpret_cast<const char*>(view.data()),
view.length());
}

static v8::CFunction fast_is_ascii(v8::CFunction::Make(FastIsAscii));

static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_EQ(args.Length(), 1);
Expand Down Expand Up @@ -1567,8 +1601,9 @@ void Initialize(Local<Object> target,
SetMethod(context, target, "swap32", Swap32);
SetMethod(context, target, "swap64", Swap64);

SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
SetMethodNoSideEffect(context, target, "isAscii", IsAscii);
SetFastMethodNoSideEffect(context, target, "isUtf8", IsUtf8, &fast_is_utf8);
SetFastMethodNoSideEffect(
context, target, "isAscii", IsAscii, &fast_is_ascii);

target
->Set(context,
Expand Down Expand Up @@ -1674,6 +1709,11 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {

registry->Register(Atob);
registry->Register(Btoa);

registry->Register(FastIsUtf8);
registry->Register(fast_is_utf8.GetTypeInfo());
registry->Register(FastIsAscii);
registry->Register(fast_is_ascii.GetTypeInfo());
}

} // namespace Buffer
Expand Down
71 changes: 71 additions & 0 deletions test/parallel/test-buffer-isutf8-isascii-fast-api.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Flags: --expose-internals --no-warnings --allow-natives-syntax
'use strict';

const common = require('../common');
const assert = require('assert');
const { internalBinding } = require('internal/test/binding');

// Get direct access to the buffer validation methods
const buffer = require('buffer');

// Create test buffers
const utf8Buffer = Buffer.from('Hello, 世界!'); // Valid UTF-8 with actual Unicode characters
const asciiBuffer = Buffer.from('Hello, World!'); // Valid ASCII
const nonUtf8Buffer = Buffer.from([0xFF, 0xFF, 0xFF]); // Invalid UTF-8
const nonAsciiBuffer = Buffer.from([0x80, 0x90, 0xA0]); // Invalid ASCII

// Test basic functionality for isUtf8
assert.strictEqual(buffer.isUtf8(utf8Buffer), true);
assert.strictEqual(buffer.isUtf8(nonUtf8Buffer), false);

// Test basic functionality for isAscii
assert.strictEqual(buffer.isAscii(asciiBuffer), true);
assert.strictEqual(buffer.isAscii(nonAsciiBuffer), false);

// Test detached buffers
const detachedBuffer = new ArrayBuffer(10);
// Let's detach the buffer if it's supported
detachedBuffer.detach?.();

if (detachedBuffer.detached) {
const typedArray = new Uint8Array(detachedBuffer);

assert.throws(() => {
buffer.isUtf8(typedArray);
}, {
name: 'Error',
code: 'ERR_INVALID_STATE'
});

assert.throws(() => {
buffer.isAscii(typedArray);
}, {
name: 'Error',
code: 'ERR_INVALID_STATE'
});
}

// Test optimization and fast API paths
function testFastPaths() {
// Test both valid and invalid cases to ensure both paths are optimized
buffer.isUtf8(utf8Buffer);
buffer.isUtf8(nonUtf8Buffer);
buffer.isAscii(asciiBuffer);
buffer.isAscii(nonAsciiBuffer);
}

// Since we want to optimize the C++ methods, we need to prepare them
// through their JS wrappers
eval('%PrepareFunctionForOptimization(buffer.isUtf8)');
eval('%PrepareFunctionForOptimization(buffer.isAscii)');
testFastPaths();
eval('%OptimizeFunctionOnNextCall(buffer.isUtf8)');
eval('%OptimizeFunctionOnNextCall(buffer.isAscii)');
testFastPaths();

// Verify fast API calls were made if running in debug mode
if (common.isDebug) {
const { getV8FastApiCallCount } = internalBinding('debug');
assert.strictEqual(getV8FastApiCallCount('buffer.isUtf8'), 2); // Called twice in testFastPaths
assert.strictEqual(getV8FastApiCallCount('buffer.isAscii'), 2); // Called twice in testFastPaths
}
Loading