Skip to content

Fastcontains improvements #187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/WebURL/Parser/Parser+Host.swift
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ extension ParsedHost {
let result: Optional<ParsedHost>
let needsPercentDecoding =
hostname.withContiguousStorageIfAvailable {
$0.boundsChecked.uncheckedFastContains(ASCII.percentSign.codePoint)
$0.boundsChecked.fastContains(ASCII.percentSign.codePoint)
} ?? true
if needsPercentDecoding {
result = ParsedHost._parseSpecialHostname(
Expand Down
6 changes: 3 additions & 3 deletions Sources/WebURL/PercentEncoding.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1506,7 +1506,7 @@ public struct NoSubstitutions: SubstitutionMap {
@inlinable @inline(__always)
public func _canSkipDecoding(_ source: UnsafeBufferPointer<UInt8>) -> Bool {
source.count <= _percentDecodingFastPathThreshold
&& !source.boundsChecked.uncheckedFastContains(ASCII.percentSign.codePoint)
&& !source.boundsChecked.fastContains(ASCII.percentSign.codePoint)
}
}

Expand Down Expand Up @@ -2130,8 +2130,8 @@ extension URLEncodeSet {
@inlinable @inline(__always)
public func _canSkipDecoding(_ source: UnsafeBufferPointer<UInt8>) -> Bool {
source.count <= _percentDecodingFastPathThreshold
&& !(source.boundsChecked.uncheckedFastContains(ASCII.percentSign.codePoint)
|| source.boundsChecked.uncheckedFastContains(ASCII.plus.codePoint))
&& !(source.boundsChecked.fastContains(ASCII.percentSign.codePoint)
|| source.boundsChecked.fastContains(ASCII.plus.codePoint))
}
}

Expand Down
2 changes: 1 addition & 1 deletion Sources/WebURL/Util/ASCII+LazyTextTransformations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ extension ASCII {
if !trimmedSlice.isEmpty {
let hasInternalNewlinesOrTabs =
trimmedSlice.withContiguousStorageIfAvailable {
$0.boundsChecked.uncheckedFastContainsTabOrCROrLF()
$0.boundsChecked.fastContainsTabOrCROrLF()
} ?? trimmedSlice.contains(where: { isNewlineOrTab($0) })
if hasInternalNewlinesOrTabs {
return .left(ASCII.NewlineAndTabFiltered(unchecked: trimmedSlice))
Expand Down
81 changes: 33 additions & 48 deletions Sources/WebURL/Util/BitTwiddling.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,34 @@ extension UInt64 {
///
@inlinable @inline(__always)
internal init(repeatingByte byte: UInt8) {
self = 0
withUnsafeMutableBytes(of: &self) {
$0[0] = byte
$0[1] = byte
$0[2] = byte
$0[3] = byte
$0[4] = byte
$0[5] = byte
$0[6] = byte
$0[7] = byte
}
self = 0x01010101_01010101 &* UInt64(byte)
}
}

extension UnsafeBoundsCheckedBufferPointer where Element == UInt8 {

/// Whether or not the buffer contains the given byte.
///
/// This implementation is able to search chunks of 8 bytes at a time, using only 5 instructions per chunk.
///
/// > Important:
/// > This function is **not** bounds-checked (since 8-byte chunks are loaded directly from the `baseAddress`,
/// > rather than via the Collection interface), although of course it only reads data within the buffer's bounds.
/// > The reason it lives on `UnsafeBoundsCheckedBufferPointer` is because unsigned indexes allow for
/// > better performance and code-size.
///
@inlinable @inline(__always) // mask must be constant-folded.
internal func uncheckedFastContains(_ element: UInt8) -> Bool {
let mask = UInt64(repeatingByte: element)
return _uncheckedFastContains(element: element, mask: mask)
}

@inlinable
internal func _uncheckedFastContains(element: UInt8, mask: UInt64) -> Bool {
internal func fastContains(_ element: UInt8) -> Bool {

var i = startIndex
while distance(from: i, to: endIndex) >= 8 {

// - UnsafeBoundsCheckedBufferPointer does not enforce that its startIndex is in-bounds
// by construction; it only checks indexes which are actually read from.
// We need to check it here since we'll be reading using 'loadUnaligned'.
//
// - Since our index type is UInt, 'i <= endIndex' and 'endIndex <= Int.max' SHOULD be enough
// for the compiler to know that (i + 8) cannot overflow. Unfortunately it doesn't,
// so the precondition is only for the benefit of humans. https://github.com/apple/swift/issues/71919
precondition(i <= endIndex && endIndex <= Int.max)

while i &+ 8 <= endIndex {
// Load 8 bytes from the source.
var eightBytes = UnsafeRawPointer(
self.baseAddress.unsafelyUnwrapped.advanced(by: Int(bitPattern: i))
).loadUnaligned(as: UInt64.self)
var eightBytes = self.loadUnaligned_unchecked(fromByteOffset: i, as: UInt64.self)
// XOR every byte with the element we're searching for.
// If there are any matches, we'll get a zero byte in that position.
eightBytes ^= mask
eightBytes ^= UInt64(repeatingByte: element)
// Use bit-twiddling to detect if any bytes were zero.
// https://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
let found = (eightBytes &- 0x0101_0101_0101_0101) & (~eightBytes & 0x8080_8080_8080_8080)
Expand All @@ -80,22 +65,23 @@ extension UnsafeBoundsCheckedBufferPointer where Element == UInt8 {
/// Whether or not the buffer contains an ASCII horizontal tab (0x09), line feed (0x0A),
/// or carriage return (0x0D) code-unit.
///
/// This implementation is able to search chunks of 8 bytes at a time, using only 5 instructions per chunk.
///
/// > Important:
/// > This function is **not** bounds-checked (since 8-byte chunks are loaded directly from the `baseAddress`,
/// > rather than via the Collection interface), although of course it only reads data within the buffer's bounds.
/// > The reason it lives on `UnsafeBoundsCheckedBufferPointer` is because unsigned indexes allow for
/// > better performance and code-size.
///
@inlinable
internal func uncheckedFastContainsTabOrCROrLF() -> Bool {
internal func fastContainsTabOrCROrLF() -> Bool {

var i = startIndex
while distance(from: i, to: endIndex) >= 8 {

// - UnsafeBoundsCheckedBufferPointer does not enforce that its startIndex is in-bounds
// by construction; it only checks indexes which are actually read from.
// We need to check it here since we'll be reading using 'loadUnaligned'.
//
// - Since our index type is UInt, 'i <= endIndex' and 'endIndex <= Int.max' SHOULD be enough
// for the compiler to know that (i + 8) cannot overflow. Unfortunately it doesn't,
// so the precondition is only for the benefit of humans. https://github.com/apple/swift/issues/71919
precondition(i <= endIndex && endIndex <= Int.max)

while i &+ 8 <= endIndex {
// Load 8 bytes from the source.
let eightBytes = UnsafeRawPointer(
self.baseAddress.unsafelyUnwrapped.advanced(by: Int(bitPattern: i))
).loadUnaligned(as: UInt64.self)
var eightBytes = self.loadUnaligned_unchecked(fromByteOffset: i, as: UInt64.self)

// Check for line feeds first; we're more likely to find one than a tab or carriage return.
var bytesForLF = eightBytes
Expand All @@ -105,10 +91,9 @@ extension UnsafeBoundsCheckedBufferPointer where Element == UInt8 {

// Check for tabs (0x09, 0b0000_1001) and carriage returns (0x0D, 0b0000_1101).
// These differ by one bit, so mask it out (turns carriage returns in to tabs), then look for tabs.
var bytesForTCR = eightBytes
bytesForTCR &= UInt64(repeatingByte: 0b1111_1011)
bytesForTCR ^= UInt64(repeatingByte: 0b0000_1001)
found = (bytesForTCR &- 0x0101_0101_0101_0101) & (~bytesForTCR & 0x8080_8080_8080_8080)
eightBytes &= UInt64(repeatingByte: 0b1111_1011)
eightBytes ^= UInt64(repeatingByte: 0b0000_1001)
found = (eightBytes &- 0x0101_0101_0101_0101) & (~eightBytes & 0x8080_8080_8080_8080)
if found != 0 { return true }

i &+= 8
Expand Down
109 changes: 96 additions & 13 deletions Sources/WebURL/Util/Pointers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,93 @@
// --------------------------------------------


extension UnsafeRawPointer {
#if swift(<5.9)
extension UnsafeRawPointer {

/// Returns a new instance of the given type, constructed from the raw memory at the specified offset.
///
/// The memory at this pointer plus offset must be initialized to `T` or another type
/// that is layout compatible with `T`. It does not need to be aligned for access to `T`.
///
@inlinable @inline(__always)
internal func loadUnaligned<T>(fromByteOffset offset: Int = 0, as: T.Type) -> T where T: FixedWidthInteger {
assert(_isPOD(T.self))
var val: T = 0
withUnsafeMutableBytes(of: &val) {
$0.copyMemory(from: UnsafeRawBufferPointer(start: self, count: T.bitWidth / 8))
}
return val
}
}
#endif

extension UnsafeBoundsCheckedBufferPointer where Element == UInt8 {

/// Returns a new instance of the given type, constructed from the raw memory at the specified offset.
///
/// The memory at this pointer plus offset must be initialized to `T` or another type
/// that is layout compatible with `T`. It does not need to be aligned for access to `T`.
///
@inlinable @inline(__always)
internal func loadUnaligned<T>(
fromByteOffset offset: UInt = 0,
as: T.Type
) -> T where T: FixedWidthInteger {

// As far as memory safety is concerned, we only need to check the final byte against endIndex.
precondition(_isPOD(T.self))
precondition(offset + UInt(MemoryLayout<T>.size) <= endIndex)
return loadUnaligned_unchecked(fromByteOffset: offset, as: T.self)
}

/// Returns a new instance of the given type, constructed from the raw memory at the specified offset.
///
/// The memory at this pointer plus offset must be initialized to `T` or another type that is layout compatible with `T`.
/// It does not need to be aligned for access to `T`.
/// The memory at this pointer plus offset must be initialized to `T` or another type
/// that is layout compatible with `T`. It does not need to be aligned for access to `T`.
///
/// > Important:
/// >
/// > This function does not bounds-check the load operation in release builds.
/// > Callers of this function must perform their own reasoning about bounds-checking
/// > to ensure than an out-of-bounds read never occurs.
/// >
/// > If you need to perform a single unaligned load,
/// > use the ``loadUnaligned`` function instead (without the `_unchecked` suffix),
/// > as it includes bounds-checking.
///
@inlinable @inline(__always)
internal func loadUnaligned<T>(fromByteOffset offset: Int = 0, as: T.Type) -> T where T: FixedWidthInteger {
internal func loadUnaligned_unchecked<T>(
fromByteOffset offset: UInt = 0, as: T.Type
) -> T where T: FixedWidthInteger {

assert(_isPOD(T.self))
var val: T = 0
withUnsafeMutableBytes(of: &val) {
$0.copyMemory(from: UnsafeRawBufferPointer(start: self, count: T.bitWidth / 8))
assert(offset >= startIndex)
assert(offset + UInt(MemoryLayout<T>.size) <= endIndex)

// Given that we may assume the load is in-bounds,
// by ruling out zero-sized loads we may infer that `self.count > 0`,
// and hence that `self.baseAddress != nil`.
//
// But this way of handling nil baseAddresses
// is more likely to be constant-folded.
guard MemoryLayout<T>.size > 0 else {
return unsafeBitCast((), to: T.self)
}
return val

#if swift(>=5.9)
return UnsafeRawPointer(self.baseAddress.unsafelyUnwrapped)
.loadUnaligned(fromByteOffset: Int(bitPattern: offset), as: T.self)
#else
var val: T = 0
withUnsafeMutableBytes(of: &val) { dest in
dest.copyMemory(
from: UnsafeRawBufferPointer(
start: self.baseAddress.unsafelyUnwrapped + Int(bitPattern: offset),
count: MemoryLayout<T>.size
))
}
return val
#endif
}
}

Expand Down Expand Up @@ -464,12 +536,23 @@ extension UnsafeBoundsCheckedBufferPointer: RandomAccessCollection {
}

@inlinable
internal func index(_ i: UInt, offsetBy n: Int, limitedBy limit: UInt) -> UInt? {
let l = distance(from: i, to: limit)
if n > 0 ? l >= 0 && l < n : l <= 0 && n < l {
return nil
internal func index(_ i: UInt, offsetBy distance: Int, limitedBy limit: UInt) -> UInt? {
// Note that we are taking some liberties here:
// If (i, distance, limit) are not in order, Collection requires the limit to have no effect.
// We return 'nil' instead.
// Details at: https://forums.swift.org/t/allow-index-limitedby-to-return-nil-if-limit-is-invalid/70578

if distance >= 0 {
// All valid 'i' are <= Int.max, so this will not overflow.
// An invalid 'i' is allowed to return a nonsense result.
let j = i &+ UInt(distance)
return j <= limit ? j : nil

} else {
// All valid 'i' are >= 0 and <= Int.max, so this will not underflow.
let j = Int(bitPattern: i) &+ distance
return j >= limit ? UInt(bitPattern: j) : nil
}
return UInt(bitPattern: Int(bitPattern: i) &+ n)
}

@inlinable
Expand Down
10 changes: 5 additions & 5 deletions Sources/WebURL/WebURL+Scheme.swift
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ extension WebURL.SchemeKind {
@inlinable
internal init<UTF8Bytes>(parsing schemeContent: UTF8Bytes) where UTF8Bytes: Sequence, UTF8Bytes.Element == UInt8 {

if let contiguouslyParsed = schemeContent.withContiguousStorageIfAvailable({ buffer -> Self in
guard let count = UInt8(exactly: buffer.count), count > 0 else { return .other }
return WebURL.SchemeKind(ptr: UnsafeRawPointer(buffer.baseAddress.unsafelyUnwrapped), count: count)
}) {
func parseContiguous(_ buffer: UnsafeBufferPointer<UInt8>) -> Self {
buffer.baseAddress.map { WebURL.SchemeKind(ptr: UnsafeRawPointer($0), count: buffer.count) } ?? .other
}
if let contiguouslyParsed = schemeContent.withContiguousStorageIfAvailable(parseContiguous) {
self = contiguouslyParsed
return
}
Expand Down Expand Up @@ -93,7 +93,7 @@ extension WebURL.SchemeKind {
// Note: 'count' is a separate parameter because UnsafeRawBufferPointer.count includes a force-unwrap,
// which can have a significant performance impact: https://bugs.swift.org/browse/SR-14422
@inlinable
internal init(ptr: UnsafeRawPointer, count: UInt8) {
internal init(ptr: UnsafeRawPointer, count: Int) {
// Setting the 6th bit of each byte (i.e. OR-ing with 00100000) normalizes the code-unit to lowercase ASCII.
switch count {
case 2:
Expand Down
Loading
Loading