Skip to content

Commit 626e10e

Browse files
authored
Split SequenceString into two types for Latin1 and Utf16 (#4571)
Slice still has `is_latin1` which will be fixed in the next PR.
1 parent 3b5651f commit 626e10e

File tree

8 files changed

+302
-215
lines changed

8 files changed

+302
-215
lines changed

core/string/src/builder.rs

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::vtable::sequence::DATA_OFFSET;
1+
use crate::r#type::{InternalStringType, Latin1, Utf16};
22
use crate::{JsStr, JsStrVariant, JsString, SequenceString, alloc_overflow};
33
use std::{
44
alloc::{Layout, alloc, dealloc, realloc},
@@ -8,23 +8,25 @@ use std::{
88
str::{self},
99
};
1010

11-
/// A mutable builder to create instance of `JsString`.
11+
/// A mutable builder to create instances of `JsString`.
1212
#[derive(Debug)]
13-
pub struct JsStringBuilder<D: Copy> {
13+
#[allow(private_bounds)]
14+
pub struct JsStringBuilder<D: InternalStringType> {
1415
cap: usize,
1516
len: usize,
16-
inner: NonNull<SequenceString>,
17+
inner: NonNull<SequenceString<D>>,
1718
phantom_data: PhantomData<D>,
1819
}
1920

20-
impl<D: Copy> Default for JsStringBuilder<D> {
21+
impl<D: InternalStringType> Default for JsStringBuilder<D> {
2122
fn default() -> Self {
2223
Self::new()
2324
}
2425
}
2526

26-
impl<D: Copy> JsStringBuilder<D> {
27-
const DATA_SIZE: usize = size_of::<D>();
27+
#[allow(private_bounds)]
28+
impl<D: InternalStringType> JsStringBuilder<D> {
29+
const DATA_SIZE: usize = size_of::<D::Byte>();
2830
const MIN_NON_ZERO_CAP: usize = 8 / Self::DATA_SIZE;
2931

3032
/// Create a new `JsStringBuilder` with capacity of zero.
@@ -75,7 +77,7 @@ impl<D: Copy> JsStringBuilder<D> {
7577
/// Returns the capacity calculated from given layout.
7678
#[must_use]
7779
const fn capacity_from_layout(layout: Layout) -> usize {
78-
(layout.size() - DATA_OFFSET) / Self::DATA_SIZE
80+
(layout.size() - D::DATA_OFFSET) / Self::DATA_SIZE
7981
}
8082

8183
/// Create a new `JsStringBuilder` with specific capacity
@@ -109,7 +111,7 @@ impl<D: Copy> JsStringBuilder<D> {
109111
self.inner != NonNull::dangling()
110112
}
111113

112-
/// Returns the inner `RawJsString`'s layout.
114+
/// Returns the inner sequence string's layout.
113115
///
114116
/// # Safety
115117
///
@@ -120,7 +122,7 @@ impl<D: Copy> JsStringBuilder<D> {
120122
// Caller should ensure that the inner is allocated.
121123
unsafe {
122124
Layout::for_value(self.inner.as_ref())
123-
.extend(Layout::array::<D>(self.capacity()).unwrap_unchecked())
125+
.extend(Layout::array::<D::Byte>(self.capacity()).unwrap_unchecked())
124126
.unwrap_unchecked()
125127
.0
126128
.pad_to_align()
@@ -133,10 +135,10 @@ impl<D: Copy> JsStringBuilder<D> {
133135
///
134136
/// Caller should ensure that the inner is allocated.
135137
#[must_use]
136-
const unsafe fn data(&self) -> *mut D {
137-
let seq_ptr = self.inner.as_ptr().cast::<u8>();
138+
const unsafe fn data(&self) -> *mut D::Byte {
139+
let seq_ptr: *mut D::Byte = self.inner.as_ptr().cast();
138140
// SAFETY: Caller should ensure that the inner is allocated.
139-
unsafe { seq_ptr.add(DATA_OFFSET).cast() }
141+
unsafe { seq_ptr.byte_add(D::DATA_OFFSET) }
140142
}
141143

142144
/// Allocates when there is not sufficient capacity.
@@ -160,16 +162,17 @@ impl<D: Copy> JsStringBuilder<D> {
160162
let old_layout = unsafe { self.current_layout() };
161163
// SAFETY:
162164
// Valid pointer is required by `realloc` and pointer is checked above to be valid.
163-
// The layout size of `RawJsString` is never zero, since it has to store
165+
// The layout size of the sequence string is never zero, since it has to store
164166
// the length of the string and the reference count.
165167
unsafe { realloc(old_ptr.cast(), old_layout, new_layout.size()) }
166168
} else {
167169
// SAFETY:
168-
// The layout size of `RawJsString` is never zero, since it has to store
170+
// The layout size of the sequence string is never zero, since it has to store
169171
// the length of the string and the reference count.
170172
unsafe { alloc(new_layout) }
171173
};
172-
let Some(new_ptr) = NonNull::new(new_ptr.cast::<SequenceString>()) else {
174+
175+
let Some(new_ptr) = NonNull::new(new_ptr.cast::<SequenceString<D>>()) else {
173176
std::alloc::handle_alloc_error(new_layout)
174177
};
175178
self.inner = new_ptr;
@@ -178,7 +181,7 @@ impl<D: Copy> JsStringBuilder<D> {
178181

179182
/// Appends an element to the inner `RawJsString` of `JsStringBuilder`.
180183
#[inline]
181-
pub fn push(&mut self, v: D) {
184+
pub fn push(&mut self, v: D::Byte) {
182185
let required_cap = self.len() + 1;
183186
self.allocate_if_needed(required_cap);
184187
// SAFETY:
@@ -198,7 +201,7 @@ impl<D: Copy> JsStringBuilder<D> {
198201
///
199202
/// Caller should ensure the capacity is large enough to hold elements.
200203
#[inline]
201-
pub const unsafe fn extend_from_slice_unchecked(&mut self, v: &[D]) {
204+
pub const unsafe fn extend_from_slice_unchecked(&mut self, v: &[D::Byte]) {
202205
// SAFETY: Caller should ensure the capacity is large enough to hold elements.
203206
unsafe {
204207
ptr::copy_nonoverlapping(v.as_ptr(), self.data().add(self.len()), v.len());
@@ -208,7 +211,7 @@ impl<D: Copy> JsStringBuilder<D> {
208211

209212
/// Pushes elements from slice to `JsStringBuilder`.
210213
#[inline]
211-
pub fn extend_from_slice(&mut self, v: &[D]) {
214+
pub fn extend_from_slice(&mut self, v: &[D::Byte]) {
212215
let required_cap = self.len() + v.len();
213216
self.allocate_if_needed(required_cap);
214217
// SAFETY:
@@ -219,13 +222,13 @@ impl<D: Copy> JsStringBuilder<D> {
219222
}
220223

221224
fn new_layout(cap: usize) -> Layout {
222-
let new_layout = Layout::array::<D>(cap)
223-
.and_then(|arr| Layout::new::<SequenceString>().extend(arr))
225+
let new_layout = Layout::array::<D::Byte>(cap)
226+
.and_then(|arr| Layout::new::<SequenceString<D>>().extend(arr))
224227
.map(|(layout, offset)| (layout.pad_to_align(), offset))
225228
.map_err(|_| None);
226229
match new_layout {
227230
Ok((new_layout, offset)) => {
228-
debug_assert_eq!(offset, DATA_OFFSET);
231+
debug_assert_eq!(offset, D::DATA_OFFSET);
229232
new_layout
230233
}
231234
Err(None) => alloc_overflow(),
@@ -287,7 +290,7 @@ impl<D: Copy> JsStringBuilder<D> {
287290
///
288291
/// Caller should ensure the capacity is large enough to hold elements.
289292
#[inline]
290-
pub const unsafe fn push_unchecked(&mut self, v: D) {
293+
pub const unsafe fn push_unchecked(&mut self, v: D::Byte) {
291294
// SAFETY: Caller should ensure the capacity is large enough to hold elements.
292295
unsafe {
293296
self.data().add(self.len()).write(v);
@@ -318,7 +321,7 @@ impl<D: Copy> JsStringBuilder<D> {
318321
/// Extracts a slice containing the elements in the inner `RawJsString`.
319322
#[inline]
320323
#[must_use]
321-
pub fn as_slice(&self) -> &[D] {
324+
pub fn as_slice(&self) -> &[D::Byte] {
322325
if self.is_allocated() {
323326
// SAFETY:
324327
// The inner `RawJsString` is allocated which means it is not null.
@@ -335,7 +338,7 @@ impl<D: Copy> JsStringBuilder<D> {
335338
/// Use of a builder whose contents are not valid encoding is undefined behavior.
336339
#[inline]
337340
#[must_use]
338-
pub unsafe fn as_mut_slice(&mut self) -> &mut [D] {
341+
pub unsafe fn as_mut_slice(&mut self) -> &mut [D::Byte] {
339342
if self.is_allocated() {
340343
// SAFETY:
341344
// The inner `RawJsString` is allocated which means it is not null.
@@ -348,7 +351,7 @@ impl<D: Copy> JsStringBuilder<D> {
348351
/// Builds `JsString` from `JsStringBuilder`
349352
#[inline]
350353
#[must_use]
351-
fn build_inner(mut self, latin1: bool) -> JsString {
354+
fn build_inner(mut self) -> JsString {
352355
if self.is_empty() {
353356
return JsString::default();
354357
}
@@ -366,18 +369,18 @@ impl<D: Copy> JsStringBuilder<D> {
366369
// `NonNull` verified for us that the pointer returned by `alloc` is valid,
367370
// meaning we can write to its pointed memory.
368371
unsafe {
369-
inner.as_ptr().write(SequenceString::new(len, latin1));
372+
inner.as_ptr().write(SequenceString::<D>::new(len));
370373
}
371374

372375
// Tell the compiler not to call the destructor of `JsStringBuilder`,
373-
// because we move inner `RawJsString` to `JsString`.
376+
// because we move inner sequence string to `JsString`.
374377
std::mem::forget(self);
375378

376379
JsString { ptr: inner.cast() }
377380
}
378381
}
379382

380-
impl<D: Copy> Drop for JsStringBuilder<D> {
383+
impl<D: InternalStringType> Drop for JsStringBuilder<D> {
381384
/// Set cold since [`JsStringBuilder`] should be created to build `JsString`
382385
#[cold]
383386
#[inline]
@@ -397,21 +400,21 @@ impl<D: Copy> Drop for JsStringBuilder<D> {
397400
}
398401
}
399402

400-
impl<D: Copy> AddAssign<&JsStringBuilder<D>> for JsStringBuilder<D> {
403+
impl<D: InternalStringType> AddAssign<&JsStringBuilder<D>> for JsStringBuilder<D> {
401404
#[inline]
402405
fn add_assign(&mut self, rhs: &JsStringBuilder<D>) {
403406
self.extend_from_slice(rhs.as_slice());
404407
}
405408
}
406409

407-
impl<D: Copy> AddAssign<&[D]> for JsStringBuilder<D> {
410+
impl<D: InternalStringType> AddAssign<&[D::Byte]> for JsStringBuilder<D> {
408411
#[inline]
409-
fn add_assign(&mut self, rhs: &[D]) {
412+
fn add_assign(&mut self, rhs: &[D::Byte]) {
410413
self.extend_from_slice(rhs);
411414
}
412415
}
413416

414-
impl<D: Copy> Add<&JsStringBuilder<D>> for JsStringBuilder<D> {
417+
impl<D: InternalStringType> Add<&JsStringBuilder<D>> for JsStringBuilder<D> {
415418
type Output = Self;
416419

417420
#[inline]
@@ -421,19 +424,19 @@ impl<D: Copy> Add<&JsStringBuilder<D>> for JsStringBuilder<D> {
421424
}
422425
}
423426

424-
impl<D: Copy> Add<&[D]> for JsStringBuilder<D> {
427+
impl<D: InternalStringType> Add<&[D::Byte]> for JsStringBuilder<D> {
425428
type Output = Self;
426429

427430
#[inline]
428-
fn add(mut self, rhs: &[D]) -> Self::Output {
431+
fn add(mut self, rhs: &[D::Byte]) -> Self::Output {
429432
self.extend_from_slice(rhs);
430433
self
431434
}
432435
}
433436

434-
impl<D: Copy> Extend<D> for JsStringBuilder<D> {
437+
impl<D: InternalStringType> Extend<D::Byte> for JsStringBuilder<D> {
435438
#[inline]
436-
fn extend<I: IntoIterator<Item = D>>(&mut self, iter: I) {
439+
fn extend<I: IntoIterator<Item = D::Byte>>(&mut self, iter: I) {
437440
let iterator = iter.into_iter();
438441
let (lower_bound, _) = iterator.size_hint();
439442
let require_cap = self.len() + lower_bound;
@@ -442,33 +445,38 @@ impl<D: Copy> Extend<D> for JsStringBuilder<D> {
442445
}
443446
}
444447

445-
impl<D: Copy> FromIterator<D> for JsStringBuilder<D> {
448+
impl<D: InternalStringType> FromIterator<D::Byte> for JsStringBuilder<D> {
446449
#[inline]
447-
fn from_iter<T: IntoIterator<Item = D>>(iter: T) -> Self {
450+
fn from_iter<T: IntoIterator<Item = D::Byte>>(iter: T) -> Self {
448451
let mut builder = Self::new();
449452
builder.extend(iter);
450453
builder
451454
}
452455
}
453456

454-
impl<D: Copy> From<&[D]> for JsStringBuilder<D> {
457+
impl<D: InternalStringType> From<&[D::Byte]> for JsStringBuilder<D> {
455458
#[inline]
456-
fn from(value: &[D]) -> Self {
459+
fn from(value: &[D::Byte]) -> Self {
457460
let mut builder = Self::with_capacity(value.len());
458461
// SAFETY: The capacity is large enough to hold elements.
459462
unsafe { builder.extend_from_slice_unchecked(value) };
460463
builder
461464
}
462465
}
463466

464-
impl<D: Copy + Eq + PartialEq> PartialEq for JsStringBuilder<D> {
467+
impl<D: InternalStringType> PartialEq for JsStringBuilder<D>
468+
where
469+
D::Byte: Eq + PartialEq,
470+
{
465471
#[inline]
466472
fn eq(&self, other: &Self) -> bool {
467-
self.as_slice().eq(other.as_slice())
473+
let slice: &[D::Byte] = self.as_slice();
474+
let other_slice: &[D::Byte] = other.as_slice();
475+
slice.eq(other_slice)
468476
}
469477
}
470478

471-
impl<D: Copy> Clone for JsStringBuilder<D> {
479+
impl<D: InternalStringType> Clone for JsStringBuilder<D> {
472480
#[inline]
473481
fn clone(&self) -> Self {
474482
if self.is_allocated() {
@@ -491,7 +499,7 @@ impl<D: Copy> Clone for JsStringBuilder<D> {
491499
if source_len > self.capacity() {
492500
self.allocate(source_len);
493501
} else {
494-
// At this point, inner `RawJsString` of self or source can be not allocated,
502+
// At this point, inner sequence string of self or source can be not allocated,
495503
// returns earlier to avoid copying from/to `null`.
496504
if source_len == 0 {
497505
// SAFETY: 0 is always less or equal to self's capacity.
@@ -528,7 +536,7 @@ impl<D: Copy> Clone for JsStringBuilder<D> {
528536
/// s.extend([b'1', b'2', b'3']);
529537
/// let js_string = s.build();
530538
/// ```
531-
pub type Latin1JsStringBuilder = JsStringBuilder<u8>;
539+
pub type Latin1JsStringBuilder = JsStringBuilder<Latin1>;
532540

533541
impl Latin1JsStringBuilder {
534542
/// Builds a `JsString` if the current instance is strictly `ASCII`.
@@ -544,7 +552,7 @@ impl Latin1JsStringBuilder {
544552
#[must_use]
545553
pub fn build(self) -> Option<JsString> {
546554
if self.is_ascii() {
547-
Some(self.build_inner(true))
555+
Some(self.build_inner())
548556
} else {
549557
None
550558
}
@@ -562,7 +570,7 @@ impl Latin1JsStringBuilder {
562570
#[inline]
563571
#[must_use]
564572
pub unsafe fn build_as_latin1(self) -> JsString {
565-
self.build_inner(true)
573+
self.build_inner()
566574
}
567575
}
568576

@@ -577,14 +585,14 @@ impl Latin1JsStringBuilder {
577585
/// s.extend([0xD83C, 0xDFB9, 0xD83C, 0xDFB6, 0xD83C, 0xDFB5]); // 🎹🎶🎵
578586
/// let js_string = s.build();
579587
/// ```
580-
pub type Utf16JsStringBuilder = JsStringBuilder<u16>;
588+
pub type Utf16JsStringBuilder = JsStringBuilder<Utf16>;
581589

582590
impl Utf16JsStringBuilder {
583591
/// Builds `JsString` from `Utf16JsStringBuilder`
584592
#[inline]
585593
#[must_use]
586594
pub fn build(self) -> JsString {
587-
self.build_inner(false)
595+
self.build_inner()
588596
}
589597
}
590598

core/string/src/display.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl fmt::Debug for JsStringDebugInfo<'_> {
9090

9191
// Show kind specific fields from string.
9292
match self.inner.kind() {
93-
JsStringKind::Sequence => {
93+
JsStringKind::Latin1Sequence | JsStringKind::Utf16Sequence => {
9494
if let Some(rc) = self.inner.refcount() {
9595
dbg.borrow_mut().field("refcount", &rc);
9696
}

0 commit comments

Comments
 (0)