Skip to content

Commit a06ff98

Browse files
committed
Merge remote-tracking branch 'origin/latin1-and-utf16-string-2' into latin1-and-utf16-string-3
2 parents 846cfbf + 81a52d8 commit a06ff98

File tree

3 files changed

+99
-97
lines changed

3 files changed

+99
-97
lines changed

core/string/src/lib.rs

Lines changed: 10 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ mod tests;
2626

2727
use self::{iter::Windows, str::JsSliceIndex};
2828
use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy, JsStringDebugInfo};
29-
use crate::r#type::{Latin1, Latin1SequenceString, StringType, Utf16, Utf16SequenceString};
29+
use crate::r#type::{Latin1, Utf16};
3030
pub use crate::vtable::StaticString;
3131
use crate::vtable::{SequenceString, SliceString};
3232
#[doc(inline)]
@@ -38,14 +38,13 @@ pub use crate::{
3838
str::{JsStr, JsStrVariant},
3939
};
4040
use std::marker::PhantomData;
41+
use std::{borrow::Cow, mem::ManuallyDrop};
4142
use std::{
42-
alloc::{Layout, alloc},
4343
convert::Infallible,
4444
hash::{Hash, Hasher},
4545
ptr::{self, NonNull},
4646
str::FromStr,
4747
};
48-
use std::{borrow::Cow, mem::ManuallyDrop};
4948
use vtable::JsStringVTable;
5049

5150
fn alloc_overflow() -> ! {
@@ -103,10 +102,10 @@ pub struct RawJsString {
103102
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
104103
#[repr(u8)]
105104
pub(crate) enum JsStringKind {
106-
/// A sequential memory slice of Latin1 bytes. See [`Latin1SequenceString`].
105+
/// A sequential memory slice of Latin1 bytes. See [`SequenceString`].
107106
Latin1Sequence = 0,
108107

109-
/// A sequential memory slice of UTF-16 code units. See [`Utf16SequenceString`].
108+
/// A sequential memory slice of UTF-16 code units. See [`SequenceString`].
110109
Utf16Sequence = 1,
111110

112111
/// A slice of an existing string. See [`SliceString`].
@@ -565,11 +564,11 @@ impl JsString {
565564
}
566565

567566
let (ptr, data_offset) = if latin1_encoding {
568-
let p = Self::allocate_seq::<Latin1>(full_count);
569-
(p.cast::<u8>(), size_of::<Latin1SequenceString>())
567+
let p = SequenceString::<Latin1>::allocate(full_count);
568+
(p.cast::<u8>(), size_of::<SequenceString<Latin1>>())
570569
} else {
571-
let p = Self::allocate_seq::<Utf16>(full_count);
572-
(p.cast::<u8>(), size_of::<Utf16SequenceString>())
570+
let p = SequenceString::<Utf16>::allocate(full_count);
571+
(p.cast::<u8>(), size_of::<SequenceString<Utf16>>())
573572
};
574573

575574
let string = {
@@ -623,78 +622,6 @@ impl JsString {
623622
StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)
624623
}
625624

626-
/// Allocates a new [`Latin1SequenceString`] with an internal capacity of `str_len` bytes.
627-
///
628-
/// # Panics
629-
///
630-
/// Panics if `try_allocate_seq` returns `Err`.
631-
fn allocate_seq<T: StringType>(str_len: usize) -> NonNull<SequenceString<T>> {
632-
match Self::try_allocate_seq::<T>(str_len) {
633-
Ok(v) => v,
634-
Err(None) => alloc_overflow(),
635-
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
636-
}
637-
}
638-
639-
// This is marked as safe because it is always valid to call this function to request any number
640-
// of bytes, since this function ought to fail on an OOM error.
641-
/// Allocates a new [`SequenceString`] with an internal capacity of `str_len` bytes.
642-
///
643-
/// # Errors
644-
///
645-
/// Returns `Err(None)` on integer overflows `usize::MAX`.
646-
/// Returns `Err(Some(Layout))` on allocation error.
647-
fn try_allocate_seq<T: StringType>(
648-
str_len: usize,
649-
) -> Result<NonNull<SequenceString<T>>, Option<Layout>> {
650-
let (layout, offset) = Layout::array::<T::Byte>(str_len)
651-
.and_then(|arr| T::base_layout().extend(arr))
652-
.map(|(layout, offset)| (layout.pad_to_align(), offset))
653-
.map_err(|_| None)?;
654-
655-
debug_assert_eq!(offset, T::DATA_OFFSET);
656-
debug_assert_eq!(layout.align(), align_of::<SequenceString<T>>());
657-
658-
#[allow(clippy::cast_ptr_alignment)]
659-
// SAFETY:
660-
// The layout size of `SequenceString` is never zero, since it has to store
661-
// the length of the string and the reference count.
662-
let inner = unsafe { alloc(layout).cast::<SequenceString<T>>() };
663-
664-
// We need to verify that the pointer returned by `alloc` is not null, otherwise
665-
// we should abort, since an allocation error is pretty unrecoverable for us
666-
// right now.
667-
let inner = NonNull::new(inner).ok_or(Some(layout))?;
668-
669-
// SAFETY:
670-
// `NonNull` verified for us that the pointer returned by `alloc` is valid,
671-
// meaning we can write to its pointed memory.
672-
unsafe {
673-
// Write the first part, the `SequenceString`.
674-
inner.as_ptr().write(SequenceString::<T>::new(str_len));
675-
}
676-
677-
debug_assert!({
678-
let inner = inner.as_ptr();
679-
// SAFETY:
680-
// - `inner` must be a valid pointer, since it comes from a `NonNull`,
681-
// meaning we can safely dereference it to `SequenceString`.
682-
// - `offset` should point us to the beginning of the array,
683-
// and since we requested a `SequenceString` layout with a trailing
684-
// `[T::Byte; str_len]`, the memory of the array must be in the `usize`
685-
// range for the allocation to succeed.
686-
unsafe {
687-
// This is `<u8>` as the offset is in bytes.
688-
ptr::eq(
689-
inner.cast::<u8>().add(offset).cast(),
690-
(*inner).data().cast_mut(),
691-
)
692-
}
693-
});
694-
695-
Ok(inner)
696-
}
697-
698625
/// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
699626
fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
700627
let count = string.len();
@@ -712,14 +639,14 @@ impl JsString {
712639
#[allow(clippy::cast_ptr_alignment)]
713640
match string.variant() {
714641
JsStrVariant::Latin1(s) => {
715-
let ptr = Self::allocate_seq::<Latin1>(count);
642+
let ptr = SequenceString::<Latin1>::allocate(count);
716643
let data = (&raw mut (*ptr.as_ptr()).data)
717644
.cast::<<Latin1 as r#type::sealed::InternalStringType>::Byte>();
718645
ptr::copy_nonoverlapping(s.as_ptr(), data, count);
719646
Self { ptr: ptr.cast() }
720647
}
721648
JsStrVariant::Utf16(s) => {
722-
let ptr = Self::allocate_seq::<Utf16>(count);
649+
let ptr = SequenceString::<Utf16>::allocate(count);
723650
let data = (&raw mut (*ptr.as_ptr()).data)
724651
.cast::<<Utf16 as r#type::sealed::InternalStringType>::Byte>();
725652
ptr::copy_nonoverlapping(s.as_ptr(), data, count);

core/string/src/type.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ pub trait StringType: InternalStringType + Sealed {
3737
type Char: Copy + Eq + 'static;
3838
}
3939

40+
// It is good defensive programming to have [`Latin1`] `!Copy`, as it should
41+
// not be used as a value anyway.
4042
#[allow(missing_copy_implementations)]
4143
#[derive(Debug)]
42-
pub struct Latin1;
44+
pub enum Latin1 {}
4345

4446
impl Sealed for Latin1 {}
4547
impl StringType for Latin1 {
@@ -48,7 +50,7 @@ impl StringType for Latin1 {
4850

4951
#[allow(private_interfaces)]
5052
impl InternalStringType for Latin1 {
51-
const DATA_OFFSET: usize = size_of::<Latin1SequenceString>();
53+
const DATA_OFFSET: usize = size_of::<SequenceString<Self>>();
5254
const KIND: JsStringKind = JsStringKind::Latin1Sequence;
5355
type Byte = u8;
5456

@@ -61,9 +63,11 @@ impl InternalStringType for Latin1 {
6163
}
6264
}
6365

66+
// It is good defensive programming to have [`Utf16`] `!Copy`, as it should
67+
// not be used as a value anyway.
6468
#[allow(missing_copy_implementations)]
6569
#[derive(Debug)]
66-
pub struct Utf16;
70+
pub enum Utf16 {}
6771

6872
impl Sealed for Utf16 {}
6973
impl StringType for Utf16 {
@@ -72,7 +76,7 @@ impl StringType for Utf16 {
7276

7377
#[allow(private_interfaces)]
7478
impl InternalStringType for Utf16 {
75-
const DATA_OFFSET: usize = size_of::<Utf16SequenceString>();
79+
const DATA_OFFSET: usize = size_of::<SequenceString<Self>>();
7680
const KIND: JsStringKind = JsStringKind::Utf16Sequence;
7781
type Byte = u16;
7882

@@ -84,6 +88,3 @@ impl InternalStringType for Utf16 {
8488
JsStr::utf16(slice)
8589
}
8690
}
87-
88-
pub(crate) type Latin1SequenceString = SequenceString<Latin1>;
89-
pub(crate) type Utf16SequenceString = SequenceString<Utf16>;

core/string/src/vtable/sequence.rs

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,32 @@
1-
//! `VTable` implementations for [`Latin1SequenceString`] and [`Utf16SequenceString`].
1+
//! `VTable` implementations for [`SequenceString`].
22
use crate::r#type::StringType;
33
use crate::vtable::JsStringVTable;
4-
use crate::{JsStr, JsString};
5-
use std::alloc::{Layout, dealloc};
4+
use crate::{JsStr, JsString, alloc_overflow};
5+
use std::alloc::{Layout, alloc, dealloc};
66
use std::cell::Cell;
77
use std::marker::PhantomData;
88
use std::process::abort;
9+
use std::ptr;
910
use std::ptr::NonNull;
1011

11-
/// A sequential memory array of Latin1 bytes.
12+
/// A sequential memory array of `T::Char` elements.
13+
///
14+
/// # Notes
15+
/// A [`SequenceString`] is `!Sync` (using [`Cell`]) and invariant over `T` (strings
16+
/// of various types cannot be used interchangeably). The string, however, could be
17+
/// `Send`, although within Boa this does not make sense.
1218
#[repr(C)]
1319
pub(crate) struct SequenceString<T: StringType> {
1420
/// Embedded `VTable` - must be the first field for vtable dispatch.
1521
vtable: JsStringVTable,
1622
refcount: Cell<usize>,
17-
// Invariant, `!Send` and `!Sync`.
18-
_marker: PhantomData<*mut T>,
23+
// Forces invariant contract.
24+
_marker: PhantomData<fn() -> T>,
1925
pub(crate) data: [u8; 0],
2026
}
2127

2228
impl<T: StringType> SequenceString<T> {
23-
/// Creates a dummy [`Latin1SequenceString`]. This should only be used to write to
29+
/// Creates a [`SequenceString`] without data. This should only be used to write to
2430
/// an allocation which contains all the information.
2531
#[inline]
2632
#[must_use]
@@ -40,6 +46,74 @@ impl<T: StringType> SequenceString<T> {
4046
}
4147
}
4248

49+
/// Allocates a new [`SequenceString`] with an internal capacity of `len` characters.
50+
///
51+
/// # Panics
52+
///
53+
/// Panics if `try_allocate_seq` returns `Err`.
54+
pub(crate) fn allocate(len: usize) -> NonNull<SequenceString<T>> {
55+
match Self::try_allocate(len) {
56+
Ok(v) => v,
57+
Err(None) => alloc_overflow(),
58+
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
59+
}
60+
}
61+
62+
/// Allocates a new [`SequenceString`] with an internal capacity of `len` characters.
63+
///
64+
/// # Errors
65+
///
66+
/// Returns `Err(None)` on integer overflows `usize::MAX`.
67+
/// Returns `Err(Some(Layout))` on allocation error.
68+
pub(crate) fn try_allocate(len: usize) -> Result<NonNull<Self>, Option<Layout>> {
69+
let (layout, offset) = Layout::array::<T::Byte>(len)
70+
.and_then(|arr| T::base_layout().extend(arr))
71+
.map(|(layout, offset)| (layout.pad_to_align(), offset))
72+
.map_err(|_| None)?;
73+
74+
debug_assert_eq!(offset, T::DATA_OFFSET);
75+
debug_assert_eq!(layout.align(), align_of::<Self>());
76+
77+
#[allow(clippy::cast_ptr_alignment)]
78+
// SAFETY:
79+
// The layout size of `SequenceString` is never zero, since it has to store
80+
// the length of the string and the reference count.
81+
let inner = unsafe { alloc(layout).cast::<Self>() };
82+
83+
// We need to verify that the pointer returned by `alloc` is not null, otherwise
84+
// we should abort, since an allocation error is pretty unrecoverable for us
85+
// right now.
86+
let inner = NonNull::new(inner).ok_or(Some(layout))?;
87+
88+
// SAFETY:
89+
// `NonNull` verified for us that the pointer returned by `alloc` is valid,
90+
// meaning we can write to its pointed memory.
91+
unsafe {
92+
// Write the first part, the `SequenceString`.
93+
inner.as_ptr().write(Self::new(len));
94+
}
95+
96+
debug_assert!({
97+
let inner = inner.as_ptr();
98+
// SAFETY:
99+
// - `inner` must be a valid pointer, since it comes from a `NonNull`,
100+
// meaning we can safely dereference it to `SequenceString`.
101+
// - `offset` should point us to the beginning of the array,
102+
// and since we requested a `SequenceString` layout with a trailing
103+
// `[T::Byte; str_len]`, the memory of the array must be in the `usize`
104+
// range for the allocation to succeed.
105+
unsafe {
106+
// This is `<u8>` as the offset is in bytes.
107+
ptr::eq(
108+
inner.cast::<u8>().add(offset).cast(),
109+
(*inner).data().cast_mut(),
110+
)
111+
}
112+
});
113+
114+
Ok(inner)
115+
}
116+
43117
/// Returns the pointer to the data.
44118
#[inline]
45119
#[must_use]

0 commit comments

Comments
 (0)