Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 51 additions & 51 deletions Sources/FoundationEssentials/URL/URL_Parsing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -567,63 +567,57 @@ extension URL {
shouldEncode = true
}

if flags.contains(.hasHost) {
let hostRange = impl.pointee.hostRange
@inline(__always)
func checkHost() -> Bool {
guard flags.contains(.hasHost) else { return true }
let host = span.extracting(impl.pointee.hostRange)
if flags.contains(.isIPLiteral) {
// Ignore the leading and trailing brackets
var i = hostRange.startIndex + 1
let endBracketIndex = hostRange.endIndex - 1
while i < endBracketIndex && URLComponentAllowedSet.hostIPvFuture.contains(span[i]) {
i += 1
assert(host.count >= 2)
let innerHost = host.extracting(1..<(host.count - 1))
guard let isValid = validateIPLiteral(
innerHost: innerHost,
useModernParsing: useModernParsing
) else {
return false
}
if i < endBracketIndex {
// We found a character that's not allowed in .hostIPvFuture
// Only a zone ID (starting at "%") can be percent-encoded
guard span[i] == UInt8(ascii: "%") else {
// The IP portion contained an invalid character that was
// not the start of a zone ID, so return false.
return false
}
// "%25" is the correctly-encoded zone ID delimiter for a URL
let isValidZoneID = (
i + 2 < endBracketIndex
&& span[i + 1] == UInt8(ascii: "2")
&& span[i + 2] == UInt8(ascii: "5")
&& validate(
span: span.extracting((i + 3)..<endBracketIndex),
component: .hostZoneID
)
)
if !isValidZoneID {
// We have an invalid zone ID, but we can encode it
guard allowEncoding else { return false }
flags.insert(.shouldEncodeHost)
shouldEncode = true
}
if !isValid {
guard allowEncoding else { return false }
flags.insert(.shouldEncodeHost)
shouldEncode = true
}
} else if !validate(span: span.extracting(hostRange), component: .host) {
} else if !validate(span: host, component: .host) {
guard allowEncoding else { return false }
guard span[hostRange.startIndex] != UInt8(ascii: "[") else { return false }
if useModernParsing && flags.contains(.hasSpecialScheme) {
// We will IDNA-encode the host, which doesn't encode ASCII
// characters. If there's an invalid ASCII character in the
// host, it would remain in the final string, so fail now.
if containsInvalidASCII(host: span.extracting(hostRange)) {
return false
if useModernParsing {
if flags.contains(.hasSpecialScheme) {
// We will IDNA-encode the host, which doesn't encode ASCII
// characters. If there's an invalid ASCII character in the
// host, it would remain in the final string, so fail now.
if containsInvalidASCII(host: host) { return false }
} else {
// Don't allow an unterminated IP-literal for any scheme
assert(!host.isEmpty) // Validation failed, so host is non-empty
if host[0] == UInt8(ascii: "[") { return false }
}
}
flags.insert(.shouldEncodeHost)
shouldEncode = true
}
return true
}

guard checkHost() else {
return false
}

if flags.contains(.hasPort) {
let port = span.extracting(impl.pointee.portRange)
if useModernParsing {
// Validate the port only contains digits
var isValid = true
for i in impl.pointee.portRange {
for i in port.indices {
// Checks for ASCII digits "0" through "9" for a generic UnsignedInteger
if (span[unchecked: i] &- 0x30) > 9 {
if (port[i] &- 0x30) > 9 {
isValid = false
break
}
Expand All @@ -637,24 +631,23 @@ extension URL {
return false
}
}
} else if !validate(span: span.extracting(impl.pointee.portRange), component: .anyValid) {
} else if !validate(span: port, component: .anyValid) {
// Allow any valid URL character in the port for compatibility
guard allowEncoding else { return false }
shouldEncode = true
}
}

// Path always exists
if !validate(span: span.extracting(impl.pointee.pathRange), component: .path) {
let path = span.extracting(impl.pointee.pathRange)
if !validate(span: path, component: .path) {
guard allowEncoding else { return false }
if flags.isDisjoint(with: [.hasScheme, .hasHost]) {
if useModernParsing && flags.isDisjoint(with: [.hasScheme, .hasHost]) {
// A relative-ref must not contain a ":" before the first "/"
let path = span.extracting(impl.pointee.pathRange)
for i in path.indices {
let v = path[i]
if v == UInt8(ascii: "/") {
if path[i] == UInt8(ascii: "/") {
break
} else if v == UInt8(ascii: ":") {
} else if path[i] == UInt8(ascii: ":") {
return false
}
}
Expand Down Expand Up @@ -1089,11 +1082,18 @@ extension URL {
let newHostStart = hostRange.startIndex + extraBytesAdded
if flags.contains(.shouldEncodeHost) {
if flags.contains(.isIPLiteral) {
// We can only be encoding a zone ID, find the start
while copyEnd < hostRange.endIndex - 1 && span[copyEnd] != UInt8(ascii: "%") {
copyEnd += 1
copyEnd += 1 // Include leading "["
let endBracketIndex = hostRange.endIndex - 1
if updateRanges {
// We can only be encoding a zone ID, find the start
while copyEnd < endBracketIndex && span[copyEnd] != UInt8(ascii: "%") {
copyEnd += 1
}
guard encodeZoneID(range: copyEnd..<endBracketIndex) else { return false }
} else {
// For CFURL, encode everything between the brackets
guard encode(range: copyEnd..<endBracketIndex, component: .hostIPvFuture) else { return false }
}
guard encodeZoneID(range: copyEnd..<(hostRange.endIndex - 1)) else { return false }
copyEnd += 1 // Include trailing "]"
} else if updateRanges && flags.contains(.hasSpecialScheme) && _uidnaHook() != nil {
// Support IDNA-encoding for NSURL (updateRanges: true)
Expand Down
38 changes: 38 additions & 0 deletions Sources/FoundationEssentials/URL/URL_Validation.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,44 @@ internal func containsInvalidASCII<T: UnsignedInteger & FixedWidthInteger>(
return false
}

// Validates the IP literal host portion inside the "[" and "]"
// A return value of false can be encoded, nil means reject entirely
@inline(__always)
internal func validateIPLiteral<T: UnsignedInteger & FixedWidthInteger>(
innerHost: borrowing Span<T>,
useModernParsing: Bool
) -> Bool? {
guard useModernParsing else {
// For CFURL, allow arbitrary percent-encoding
return validate(span: innerHost, component: .hostIPvFuture)
}
var i = 0
while i < innerHost.count && URLComponentAllowedSet.hostIPvFuture.contains(innerHost[i]) {
i += 1
}
if i == innerHost.count {
return true
}
// We found a character that's not allowed in .hostIPvFuture
// Only a zone ID (starting at "%") can be percent-encoded
guard innerHost[i] == UInt8(ascii: "%") else {
// The IP portion contained an invalid character that was
// not the start of a zone ID, so return nil.
return nil
}
// "%25" is the correctly-encoded zone ID delimiter for a URL
let isValidZoneID = (
i + 2 < innerHost.count
&& innerHost[i + 1] == UInt8(ascii: "2")
&& innerHost[i + 2] == UInt8(ascii: "5")
&& validate(
span: innerHost.extracting((i + 3)...),
component: .hostZoneID
)
)
return isValidZoneID
}

// Don't allow percent-escape sequences when validating certain paths
@inline(__always)
internal func strictValidate(path: borrowing Span<UInt8>) -> Bool {
Expand Down
Loading