forked from swiftlang/swift-foundation
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathString+Encoding+Names.swift
More file actions
155 lines (135 loc) · 4.61 KB
/
String+Encoding+Names.swift
File metadata and controls
155 lines (135 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// MARK: - Private extensions for parsing encoding names
private extension UTF8.CodeUnit {
func _isASCIICaseInsensitivelyEqual(to other: UTF8.CodeUnit) -> Bool {
return switch self {
case other, other._uppercased, other._lowercased: true
default: false
}
}
}
private extension String {
func _isASCIICaseInsensitivelyEqual(to other: String) -> Bool {
let (myUTF8, otherUTF8) = (self.utf8, other.utf8)
var (myIndex, otherIndex) = (myUTF8.startIndex, otherUTF8.startIndex)
while myIndex < myUTF8.endIndex && otherIndex < otherUTF8.endIndex {
guard myUTF8[myIndex]._isASCIICaseInsensitivelyEqual(to: otherUTF8[otherIndex]) else {
return false
}
myUTF8.formIndex(after: &myIndex)
otherUTF8.formIndex(after: &otherIndex)
}
return myIndex == myUTF8.endIndex && otherIndex == otherUTF8.endIndex
}
}
// MARK: - IANA Charset Names
/// Info about IANA Charset.
internal struct IANACharset {
/// Preferred MIME Name
let preferredMIMEName: String?
/// The name of this charset
let name: String
/// The aliases of this charset
let aliases: Array<String>
var representativeName: String {
return preferredMIMEName ?? name
}
init(preferredMIMEName: String?, name: String, aliases: Array<String>) {
self.preferredMIMEName = preferredMIMEName
self.name = name
self.aliases = aliases
}
func matches(_ string: String) -> Bool {
if let preferredMIMEName = self.preferredMIMEName,
preferredMIMEName._isASCIICaseInsensitivelyEqual(to: string) {
return true
}
if name._isASCIICaseInsensitivelyEqual(to: string) {
return true
}
for alias in aliases {
if alias._isASCIICaseInsensitivelyEqual(to: string) {
return true
}
}
return false
}
}
// MARK: - `String.Encoding` Names
extension String.Encoding {
private var _ianaCharset: IANACharset? {
switch self {
case .utf8: .utf8
case .ascii: .usASCII
case .japaneseEUC: .eucJP
case .isoLatin1: .iso8859_1
case .shiftJIS: .shiftJIS
case .isoLatin2: .iso8859_2
case .unicode: .utf16
case .windowsCP1251: .windows1251
case .windowsCP1252: .windows1252
case .windowsCP1253: .windows1253
case .windowsCP1254: .windows1254
case .windowsCP1250: .windows1250
case .iso2022JP: .iso2022JP
case .macOSRoman: .macintosh
case .utf16BigEndian: .utf16BE
case .utf16LittleEndian: .utf16LE
case .utf32: .utf32
case .utf32BigEndian: .utf32BE
case .utf32LittleEndian: .utf32LE
default: nil
}
}
/// The name of this encoding that is compatible with the one of the IANA registry "charset".
@available(FoundationPreview 6.3, *)
public var ianaName: String? {
return _ianaCharset?.representativeName
}
/// Creates an instance from the name of the IANA registry "charset".
///
/// - Note: The given name is compared to each IANA "charset" name
/// with ASCII case-insensitive collation
/// to determine which encoding is suitable.
@available(FoundationPreview 6.3, *)
public init?(ianaName charsetName: String) {
let possibilities: [String.Encoding] = [
.utf8,
.ascii,
.japaneseEUC,
.isoLatin1,
.shiftJIS,
.isoLatin2,
.unicode, // .utf16
.windowsCP1251,
.windowsCP1252,
.windowsCP1253,
.windowsCP1254,
.windowsCP1250,
.iso2022JP,
.macOSRoman,
.utf16BigEndian,
.utf16LittleEndian,
.utf32,
.utf32BigEndian,
.utf32LittleEndian,
]
for encoding in possibilities {
if encoding._ianaCharset!.matches(charsetName) {
self = encoding
return
}
}
return nil
}
}