@@ -996,106 +996,111 @@ impl String {
996996 }
997997 }
998998
999- /// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
1000- /// provided buffer.
999+ /// Converts a [`crate::String`] to either an owned [`std::string::String`],
1000+ /// or a borrowed [`str`], depending on whether it fits into the provided
1001+ /// buffer.
1002+ ///
1003+ /// Uses [`ValueView`] internally for direct access to the string's
1004+ /// contents, eliminating the `utf8_length` pre-scan that the previous
1005+ /// implementation required.
10011006 pub fn to_rust_cow_lossy < ' a , const N : usize > (
10021007 & self ,
10031008 scope : & mut Isolate ,
10041009 buffer : & ' a mut [ MaybeUninit < u8 > ; N ] ,
10051010 ) -> Cow < ' a , str > {
1006- let len_utf16 = self . length ( ) ;
1007-
1008- // No need to allocate or do any work for zero-length strings
1009- if len_utf16 == 0 {
1011+ let len = self . length ( ) ;
1012+ if len == 0 {
10101013 return "" . into ( ) ;
10111014 }
10121015
1013- // TODO(mmastrac): Ideally we should be able to access the string's internal representation
1014- let len_utf8 = self . utf8_length ( scope) ;
1016+ // SAFETY: `self` is a valid V8 string reachable from a handle scope.
1017+ // The ValueView is dropped before we return, so the
1018+ // DisallowGarbageCollection scope it holds is properly scoped.
1019+ let view = unsafe { ValueView :: new_from_ref ( scope, self ) } ;
10151020
1016- // If len_utf8 == len_utf16 and the string is one-byte, we can take the fast memcpy path. This is true iff the
1017- // string is 100% 7-bit ASCII.
1018- if self . is_onebyte ( ) && len_utf8 == len_utf16 {
1019- if len_utf16 <= N {
1020- self . write_one_byte_uninit_v2 ( scope, 0 , buffer, WriteFlags :: empty ( ) ) ;
1021- unsafe {
1022- // Get a slice of &[u8] of what we know is initialized now
1023- let buffer = & mut buffer[ ..len_utf16] ;
1024- let buffer = & mut * ( buffer as * mut [ _ ] as * mut [ u8 ] ) ;
1025-
1026- // We know it's valid UTF-8, so make a string
1027- return Cow :: Borrowed ( std:: str:: from_utf8_unchecked ( buffer) ) ;
1021+ match view. data ( ) {
1022+ ValueViewData :: OneByte ( bytes) => {
1023+ if bytes. is_ascii ( ) {
1024+ // ASCII: direct memcpy, no transcoding needed.
1025+ if bytes. len ( ) <= N {
1026+ unsafe {
1027+ std:: ptr:: copy_nonoverlapping (
1028+ bytes. as_ptr ( ) ,
1029+ buffer. as_mut_ptr ( ) as * mut u8 ,
1030+ bytes. len ( ) ,
1031+ ) ;
1032+ let buf = & mut buffer[ ..bytes. len ( ) ] ;
1033+ let buf = & mut * ( buf as * mut [ _ ] as * mut [ u8 ] ) ;
1034+ Cow :: Borrowed ( std:: str:: from_utf8_unchecked ( buf) )
1035+ }
1036+ } else {
1037+ // SAFETY: ASCII bytes are valid UTF-8.
1038+ unsafe {
1039+ Cow :: Owned ( std:: string:: String :: from_utf8_unchecked (
1040+ bytes. to_vec ( ) ,
1041+ ) )
1042+ }
1043+ }
1044+ } else {
1045+ // Latin-1 non-ASCII: each byte can expand to at most 2 UTF-8
1046+ // bytes. Use conservative size check.
1047+ let max_utf8_len = bytes. len ( ) * 2 ;
1048+ if max_utf8_len <= N {
1049+ let written = unsafe {
1050+ latin1_to_utf8 (
1051+ bytes. len ( ) ,
1052+ bytes. as_ptr ( ) ,
1053+ buffer. as_mut_ptr ( ) as * mut u8 ,
1054+ )
1055+ } ;
1056+ unsafe {
1057+ let buf = & mut buffer[ ..written] ;
1058+ let buf = & mut * ( buf as * mut [ _ ] as * mut [ u8 ] ) ;
1059+ Cow :: Borrowed ( std:: str:: from_utf8_unchecked ( buf) )
1060+ }
1061+ } else {
1062+ let mut buf = Vec :: with_capacity ( max_utf8_len) ;
1063+ unsafe {
1064+ let written =
1065+ latin1_to_utf8 ( bytes. len ( ) , bytes. as_ptr ( ) , buf. as_mut_ptr ( ) ) ;
1066+ buf. set_len ( written) ;
1067+ Cow :: Owned ( std:: string:: String :: from_utf8_unchecked ( buf) )
1068+ }
1069+ }
10281070 }
10291071 }
1030-
1031- unsafe {
1032- // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
1033- // accidentally creating a slice of u8 which would be invalid.
1034- let layout = std:: alloc:: Layout :: from_size_align ( len_utf16, 1 ) . unwrap ( ) ;
1035- let data = std:: alloc:: alloc ( layout) as * mut MaybeUninit < u8 > ;
1036- let buffer = std:: ptr:: slice_from_raw_parts_mut ( data, len_utf16) ;
1037-
1038- // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
1039- self . write_one_byte_uninit_v2 (
1040- scope,
1041- 0 ,
1042- & mut * buffer,
1043- WriteFlags :: kReplaceInvalidUtf8,
1044- ) ;
1045-
1046- // Return an owned string from this guaranteed now-initialized data
1047- let buffer = data as * mut u8 ;
1048- return Cow :: Owned ( std:: string:: String :: from_raw_parts (
1049- buffer, len_utf16, len_utf16,
1050- ) ) ;
1051- }
1052- }
1053-
1054- if len_utf8 <= N {
1055- // No malloc path
1056- let length = self . write_utf8_uninit_v2 (
1057- scope,
1058- buffer,
1059- WriteFlags :: kReplaceInvalidUtf8,
1060- None ,
1061- ) ;
1062- debug_assert ! ( length == len_utf8) ;
1063-
1064- // SAFETY: We know that we wrote `length` UTF-8 bytes. See `slice_assume_init_mut` for additional guarantee information.
1065- unsafe {
1066- // Get a slice of &[u8] of what we know is initialized now
1067- let buffer = & mut buffer[ ..length] ;
1068- let buffer = & mut * ( buffer as * mut [ _ ] as * mut [ u8 ] ) ;
1069-
1070- // We know it's valid UTF-8, so make a string
1071- return Cow :: Borrowed ( std:: str:: from_utf8_unchecked ( buffer) ) ;
1072+ ValueViewData :: TwoByte ( units) => {
1073+ // Transcode UTF-16 directly into the stack buffer when possible.
1074+ let mut pos = 0 ;
1075+ let mut tmp = [ 0u8 ; 4 ] ;
1076+ let mut all_fit = true ;
1077+ for result in std:: char:: decode_utf16 ( units. iter ( ) . copied ( ) ) {
1078+ let c = result. unwrap_or ( '\u{FFFD}' ) ;
1079+ let encoded = c. encode_utf8 ( & mut tmp) ;
1080+ if pos + encoded. len ( ) > N {
1081+ all_fit = false ;
1082+ break ;
1083+ }
1084+ unsafe {
1085+ std:: ptr:: copy_nonoverlapping (
1086+ encoded. as_ptr ( ) ,
1087+ ( buffer. as_mut_ptr ( ) as * mut u8 ) . add ( pos) ,
1088+ encoded. len ( ) ,
1089+ ) ;
1090+ }
1091+ pos += encoded. len ( ) ;
1092+ }
1093+ if all_fit {
1094+ unsafe {
1095+ let buf = & mut buffer[ ..pos] ;
1096+ let buf = & mut * ( buf as * mut [ _ ] as * mut [ u8 ] ) ;
1097+ Cow :: Borrowed ( std:: str:: from_utf8_unchecked ( buf) )
1098+ }
1099+ } else {
1100+ Cow :: Owned ( std:: string:: String :: from_utf16_lossy ( units) )
1101+ }
10721102 }
10731103 }
1074-
1075- // SAFETY: This allocates a buffer manually using the default allocator using the string's capacity.
1076- // We have a large number of invariants to uphold, so please check changes to this code carefully
1077- unsafe {
1078- // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
1079- // accidentally creating a slice of u8 which would be invalid.
1080- let layout = std:: alloc:: Layout :: from_size_align ( len_utf8, 1 ) . unwrap ( ) ;
1081- let data = std:: alloc:: alloc ( layout) as * mut MaybeUninit < u8 > ;
1082- let buffer = std:: ptr:: slice_from_raw_parts_mut ( data, len_utf8) ;
1083-
1084- // Write to this MaybeUninit buffer, assuming we're going to fill this entire buffer
1085- let length = self . write_utf8_uninit_v2 (
1086- scope,
1087- & mut * buffer,
1088- WriteFlags :: kReplaceInvalidUtf8,
1089- None ,
1090- ) ;
1091- debug_assert ! ( length == len_utf8) ;
1092-
1093- // Return an owned string from this guaranteed now-initialized data
1094- let buffer = data as * mut u8 ;
1095- Cow :: Owned ( std:: string:: String :: from_raw_parts (
1096- buffer, length, len_utf8,
1097- ) )
1098- }
10991104 }
11001105}
11011106
@@ -1132,12 +1137,29 @@ pub struct ValueView<'s>(
11321137impl < ' s > ValueView < ' s > {
11331138 #[ inline( always) ]
11341139 pub fn new ( isolate : & mut Isolate , string : Local < ' s , String > ) -> Self {
1140+ // SAFETY: Local<'s, String> derefs to &String; delegate to new_from_ref.
1141+ unsafe { Self :: new_from_ref ( isolate, & * string) }
1142+ }
1143+
1144+ /// Constructs a `ValueView` from a raw string reference.
1145+ ///
1146+ /// # Safety
1147+ ///
1148+ /// The caller must ensure that `string` is a valid V8 string that
1149+ /// remains alive for at least `'s`. In practice this means the
1150+ /// string must be reachable from a handle scope that outlives the
1151+ /// returned `ValueView`.
1152+ #[ inline( always) ]
1153+ pub ( crate ) unsafe fn new_from_ref (
1154+ isolate : & mut Isolate ,
1155+ string : & ' s String ,
1156+ ) -> Self {
11351157 let mut v = std:: mem:: MaybeUninit :: uninit ( ) ;
11361158 unsafe {
11371159 v8__String__ValueView__CONSTRUCT (
11381160 v. as_mut_ptr ( ) ,
11391161 isolate. as_real_ptr ( ) ,
1140- & * string,
1162+ string,
11411163 ) ;
11421164 v. assume_init ( )
11431165 }
0 commit comments