From 80ec3559ab813ed786ee7dc3b386e1dc1d9c1955 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 27 Apr 2025 21:23:05 -0400 Subject: [PATCH 1/4] buffer: add fast api for isAscii & isUtf8 --- src/node_buffer.cc | 32 +++++++- src/node_external_reference.h | 3 + .../test-buffer-isutf8-isascii-fast-api.js | 74 +++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 test/parallel/test-buffer-isutf8-isascii-fast-api.js diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 19f343a7f174e4..3a4af2a4ac2dc0 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1190,6 +1190,28 @@ void Swap64(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(args[0]); } +bool FastIsUtf8(v8::Local, + const v8::FastApiTypedArray& buffer) { + uint8_t* buffer_data; + CHECK(buffer.getStorageIfAligned(&buffer_data)); + TRACK_V8_FAST_API_CALL("buffer.isUtf8"); + return simdutf::validate_utf8(reinterpret_cast(buffer_data), + buffer.length()); +} + +static v8::CFunction fast_is_utf8(v8::CFunction::Make(FastIsUtf8)); + +bool FastIsAscii(v8::Local, + const v8::FastApiTypedArray& buffer) { + uint8_t* buffer_data; + CHECK(buffer.getStorageIfAligned(&buffer_data)); + TRACK_V8_FAST_API_CALL("buffer.isAscii"); + return simdutf::validate_ascii(reinterpret_cast(buffer_data), + buffer.length()); +} + +static v8::CFunction fast_is_ascii(v8::CFunction::Make(FastIsAscii)); + static void IsUtf8(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_EQ(args.Length(), 1); @@ -1567,8 +1589,9 @@ void Initialize(Local target, SetMethod(context, target, "swap32", Swap32); SetMethod(context, target, "swap64", Swap64); - SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8); - SetMethodNoSideEffect(context, target, "isAscii", IsAscii); + SetFastMethodNoSideEffect(context, target, "isUtf8", IsUtf8, &fast_is_utf8); + SetFastMethodNoSideEffect( + context, target, "isAscii", IsAscii, &fast_is_ascii); target ->Set(context, @@ -1674,6 +1697,11 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Atob); registry->Register(Btoa); + + registry->Register(FastIsUtf8); + registry->Register(fast_is_utf8.GetTypeInfo()); + registry->Register(FastIsAscii); + registry->Register(fast_is_ascii.GetTypeInfo()); } } // namespace Buffer diff --git a/src/node_external_reference.h b/src/node_external_reference.h index 1ade653aeb4150..595158e0040050 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -46,6 +46,8 @@ using CFunctionCallbackWithInt64 = void (*)(v8::Local unused, using CFunctionCallbackWithBool = void (*)(v8::Local unused, v8::Local receiver, bool); +using CFunctionFastIsUtf8 = bool (*)( + v8::Local, const v8::FastApiTypedArray& buffer); using CFunctionCallbackWithString = bool (*)(v8::Local, const v8::FastOneByteString& input); using CFunctionCallbackWithStrings = @@ -111,6 +113,7 @@ class ExternalReferenceRegistry { V(CFunctionCallbackValueReturnDoubleUnusedReceiver) \ V(CFunctionCallbackWithInt64) \ V(CFunctionCallbackWithBool) \ + V(CFunctionFastIsUtf8) \ V(CFunctionCallbackWithString) \ V(CFunctionCallbackWithStrings) \ V(CFunctionCallbackWithTwoUint8Arrays) \ diff --git a/test/parallel/test-buffer-isutf8-isascii-fast-api.js b/test/parallel/test-buffer-isutf8-isascii-fast-api.js new file mode 100644 index 00000000000000..9a31ef4e504c32 --- /dev/null +++ b/test/parallel/test-buffer-isutf8-isascii-fast-api.js @@ -0,0 +1,74 @@ +// Flags: --expose-internals --no-warnings --allow-natives-syntax +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const { internalBinding } = require('internal/test/binding'); + +// Get direct access to the buffer validation methods +const buffer = require('buffer'); + +// Create test buffers +const utf8Buffer = Buffer.from('Hello, 世界!'); // Valid UTF-8 with actual Unicode characters +const asciiBuffer = Buffer.from('Hello, World!'); // Valid ASCII +const nonUtf8Buffer = Buffer.from([0xFF, 0xFF, 0xFF]); // Invalid UTF-8 +const nonAsciiBuffer = Buffer.from([0x80, 0x90, 0xA0]); // Invalid ASCII + +// Test basic functionality for isUtf8 +assert.strictEqual(buffer.isUtf8(utf8Buffer), true); +assert.strictEqual(buffer.isUtf8(nonUtf8Buffer), false); + +// Test basic functionality for isAscii +assert.strictEqual(buffer.isAscii(asciiBuffer), true); +assert.strictEqual(buffer.isAscii(nonAsciiBuffer), false); + +// Test detached buffers +const detachedBuffer = new ArrayBuffer(10); +try { + detachedBuffer.detach(); +} catch (e) { + console.log('Skipping detached buffer tests - detach not supported'); +} + +if (detachedBuffer.detached) { + const typedArray = new Uint8Array(detachedBuffer); + + assert.throws(() => { + buffer.isUtf8(typedArray); + }, { + name: 'Error', + code: 'ERR_INVALID_STATE' + }); + + assert.throws(() => { + buffer.isAscii(typedArray); + }, { + name: 'Error', + code: 'ERR_INVALID_STATE' + }); +} + +// Test optimization and fast API paths +function testFastPaths() { + // Test both valid and invalid cases to ensure both paths are optimized + buffer.isUtf8(utf8Buffer); + buffer.isUtf8(nonUtf8Buffer); + buffer.isAscii(asciiBuffer); + buffer.isAscii(nonAsciiBuffer); +} + +// Since we want to optimize the C++ methods, we need to prepare them +// through their JS wrappers +eval('%PrepareFunctionForOptimization(buffer.isUtf8)'); +eval('%PrepareFunctionForOptimization(buffer.isAscii)'); +testFastPaths(); +eval('%OptimizeFunctionOnNextCall(buffer.isUtf8)'); +eval('%OptimizeFunctionOnNextCall(buffer.isAscii)'); +testFastPaths(); + +// Verify fast API calls were made if running in debug mode +if (common.isDebug) { + const { getV8FastApiCallCount } = internalBinding('debug'); + assert.strictEqual(getV8FastApiCallCount('buffer.isUtf8'), 2); // Called twice in testFastPaths + assert.strictEqual(getV8FastApiCallCount('buffer.isAscii'), 2); // Called twice in testFastPaths +} From 62dea031ddb222770df1f52e949465476d140854 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 27 Apr 2025 21:29:41 -0400 Subject: [PATCH 2/4] fixup! buffer: add fast api for isAscii & isUtf8 --- test/parallel/test-buffer-isutf8-isascii-fast-api.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parallel/test-buffer-isutf8-isascii-fast-api.js b/test/parallel/test-buffer-isutf8-isascii-fast-api.js index 9a31ef4e504c32..b052e509861b23 100644 --- a/test/parallel/test-buffer-isutf8-isascii-fast-api.js +++ b/test/parallel/test-buffer-isutf8-isascii-fast-api.js @@ -26,7 +26,7 @@ assert.strictEqual(buffer.isAscii(nonAsciiBuffer), false); const detachedBuffer = new ArrayBuffer(10); try { detachedBuffer.detach(); -} catch (e) { +} catch (_e) { console.log('Skipping detached buffer tests - detach not supported'); } From 7e84063d6cbc6c6deea2eef58432a906a0c594d5 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 27 Apr 2025 21:33:29 -0400 Subject: [PATCH 3/4] fixup! buffer: add fast api for isAscii & isUtf7 --- src/node_buffer.cc | 30 ++++++++++++------- src/node_external_reference.h | 3 -- .../test-buffer-isutf8-isascii-fast-api.js | 7 ++--- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 3a4af2a4ac2dc0..3719a381394643 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1191,23 +1191,33 @@ void Swap64(const FunctionCallbackInfo& args) { } bool FastIsUtf8(v8::Local, - const v8::FastApiTypedArray& buffer) { - uint8_t* buffer_data; - CHECK(buffer.getStorageIfAligned(&buffer_data)); + Local buffer, + FastApiCallbackOptions& options) { TRACK_V8_FAST_API_CALL("buffer.isUtf8"); - return simdutf::validate_utf8(reinterpret_cast(buffer_data), - buffer.length()); + ArrayBufferViewContents view(buffer); + if (view.WasDetached()) { + node::THROW_ERR_INVALID_STATE(options.isolate, + "Cannot validate on a detached buffer"); + return false; + } + return simdutf::validate_utf8(reinterpret_cast(view.data()), + view.length()); } static v8::CFunction fast_is_utf8(v8::CFunction::Make(FastIsUtf8)); bool FastIsAscii(v8::Local, - const v8::FastApiTypedArray& buffer) { - uint8_t* buffer_data; - CHECK(buffer.getStorageIfAligned(&buffer_data)); + Local buffer, + FastApiCallbackOptions& options) { TRACK_V8_FAST_API_CALL("buffer.isAscii"); - return simdutf::validate_ascii(reinterpret_cast(buffer_data), - buffer.length()); + ArrayBufferViewContents view(buffer); + if (view.WasDetached()) { + node::THROW_ERR_INVALID_STATE(options.isolate, + "Cannot validate on a detached buffer"); + return false; + } + return simdutf::validate_ascii(reinterpret_cast(view.data()), + view.length()); } static v8::CFunction fast_is_ascii(v8::CFunction::Make(FastIsAscii)); diff --git a/src/node_external_reference.h b/src/node_external_reference.h index 595158e0040050..1ade653aeb4150 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -46,8 +46,6 @@ using CFunctionCallbackWithInt64 = void (*)(v8::Local unused, using CFunctionCallbackWithBool = void (*)(v8::Local unused, v8::Local receiver, bool); -using CFunctionFastIsUtf8 = bool (*)( - v8::Local, const v8::FastApiTypedArray& buffer); using CFunctionCallbackWithString = bool (*)(v8::Local, const v8::FastOneByteString& input); using CFunctionCallbackWithStrings = @@ -113,7 +111,6 @@ class ExternalReferenceRegistry { V(CFunctionCallbackValueReturnDoubleUnusedReceiver) \ V(CFunctionCallbackWithInt64) \ V(CFunctionCallbackWithBool) \ - V(CFunctionFastIsUtf8) \ V(CFunctionCallbackWithString) \ V(CFunctionCallbackWithStrings) \ V(CFunctionCallbackWithTwoUint8Arrays) \ diff --git a/test/parallel/test-buffer-isutf8-isascii-fast-api.js b/test/parallel/test-buffer-isutf8-isascii-fast-api.js index b052e509861b23..2b9667d9d19939 100644 --- a/test/parallel/test-buffer-isutf8-isascii-fast-api.js +++ b/test/parallel/test-buffer-isutf8-isascii-fast-api.js @@ -24,11 +24,8 @@ assert.strictEqual(buffer.isAscii(nonAsciiBuffer), false); // Test detached buffers const detachedBuffer = new ArrayBuffer(10); -try { - detachedBuffer.detach(); -} catch (_e) { - console.log('Skipping detached buffer tests - detach not supported'); -} +// Let's detach the buffer if it's supported +detachedBuffer.detach?.(); if (detachedBuffer.detached) { const typedArray = new Uint8Array(detachedBuffer); From 6b11f13b408ddcb0da20744b6226d142b2ceb95a Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sat, 3 May 2025 14:57:50 -0400 Subject: [PATCH 4/4] fixup! fixup! buffer: add fast api for isAscii & isUtf7 --- src/node_buffer.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 3719a381394643..55c392ff07446d 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -1192,6 +1192,7 @@ void Swap64(const FunctionCallbackInfo& args) { bool FastIsUtf8(v8::Local, Local buffer, + // NOLINTNEXTLINE(runtime/references) This is V8 api. FastApiCallbackOptions& options) { TRACK_V8_FAST_API_CALL("buffer.isUtf8"); ArrayBufferViewContents view(buffer); @@ -1208,6 +1209,7 @@ static v8::CFunction fast_is_utf8(v8::CFunction::Make(FastIsUtf8)); bool FastIsAscii(v8::Local, Local buffer, + // NOLINTNEXTLINE(runtime/references) This is V8 api. FastApiCallbackOptions& options) { TRACK_V8_FAST_API_CALL("buffer.isAscii"); ArrayBufferViewContents view(buffer);