forked from swiftlang/swift-foundation
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathParserState.swift
More file actions
1756 lines (1562 loc) · 83.1 KB
/
ParserState.swift
File metadata and controls
1756 lines (1562 loc) · 83.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2026 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#if canImport(FoundationEssentials)
import FoundationEssentials
#elseif FOUNDATION_FRAMEWORK
import Foundation
#endif
#if canImport(Darwin)
import Darwin
#elseif canImport(Bionic)
import Bionic
#elseif canImport(Glibc)
import Glibc
#elseif canImport(Musl)
import Musl
#elseif canImport(ucrt)
import ucrt
#elseif canImport(WASILibc)
import WASILibc
#endif
#if canImport(CollectionsInternal)
internal import CollectionsInternal
#elseif canImport(BasicContainers)
internal import BasicContainers
#endif
extension JSONParserDecoder {
@usableFromInline
internal struct ParserState: ~Escapable {
@usableFromInline
var reader: DocumentReader
@usableFromInline
var depth: Int = 0
@usableFromInline
let options: _Borrow<NewJSONDecoder.Options>
@usableFromInline
var currentTopCodingPathNode: UnsafeMutablePointer<CodingPathNode>
@inlinable
@_lifetime(copy span, copy options)
init(unvalidatedUTF8Span span: RawSpan, options: _Borrow<NewJSONDecoder.Options>, topCodingPathNode: UnsafeMutablePointer<CodingPathNode>) {
self.reader = .init(bytes: span)
self.options = options
self.currentTopCodingPathNode = topCodingPathNode
}
@inlinable
@_lifetime(copy utf8, copy options)
init(utf8: UTF8Span, options: _Borrow<NewJSONDecoder.Options>, topCodingPathNode: UnsafeMutablePointer<CodingPathNode>) {
self.reader = .init(utf8: utf8)
self.options = options
self.currentTopCodingPathNode = topCodingPathNode
}
@usableFromInline
var codingPath: CodingPath {
self.currentTopCodingPathNode.pointee.path
}
@usableFromInline
@inline(__always)
@_lifetime(self: copy self)
mutating func copyRelevantState(from state: ParserState) {
self.reader.readOffset = state.reader.readOffset
self.currentTopCodingPathNode = state.currentTopCodingPathNode
self.depth = state.depth
}
@inlinable
@_lifetime(self: copy self)
mutating func skipString() throws(JSONError) {
reader.moveReaderIndex(forwardBy: 1) // consume start quote.
try reader.skipUTF8StringTillNextUnescapedQuote()
reader.moveReaderIndex(forwardBy: 1) // consume end quote.
}
@_lifetime(self: copy self) mutating func decode(_ t: Int.Type) throws(CodingError.Decoding) -> Int { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: Int8.Type) throws(CodingError.Decoding) -> Int8 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: Int16.Type) throws(CodingError.Decoding) -> Int16 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: Int32.Type) throws(CodingError.Decoding) -> Int32 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: Int64.Type) throws(CodingError.Decoding) -> Int64 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: Int128.Type) throws(CodingError.Decoding) -> Int128 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt.Type) throws(CodingError.Decoding) -> UInt { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt8.Type) throws(CodingError.Decoding) -> UInt8 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt16.Type) throws(CodingError.Decoding) -> UInt16 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt32.Type) throws(CodingError.Decoding) -> UInt32 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt64.Type) throws(CodingError.Decoding) -> UInt64 { try decode() }
@_lifetime(self: copy self) mutating func decode(_ t: UInt128.Type) throws(CodingError.Decoding) -> UInt128 { try decode() }
@usableFromInline
internal struct FloatingPointNonConformingStringValueVisitor<T: BinaryFloatingPoint & PrevalidatedJSONNumberBufferConvertible>: DecodingStringVisitor {
@usableFromInline
typealias DecodedValue = T
@usableFromInline
let policy: Options.NonConformingFloatDecodingStrategy
@usableFromInline
init(policy: Options.NonConformingFloatDecodingStrategy) {
self.policy = policy
}
@usableFromInline
func visitString(_ string: String) throws(CodingError.Decoding) -> T {
switch policy {
case .throw:
throw CodingError.typeMismatch(expectedType: T.self, actualValue: string)
case .convertFromString(let positiveInfinity, let negativeInfinity, let nan):
switch string {
case positiveInfinity: return T.infinity
case negativeInfinity: return -T.infinity
case nan: return T.nan
default: throw CodingError.typeMismatch(expectedType: T.self, actualValue: string)
}
}
}
@usableFromInline
func visitUTF8Bytes(_ buffer: UTF8Span) throws(CodingError.Decoding) -> T {
// TODO: Inefficient.
try self.visitString(String(copying: buffer))
}
}
@inline(__always)
@_lifetime(self: copy self)
mutating func decodeFloatingPoint<T: BinaryFloatingPoint & PrevalidatedJSONNumberBufferConvertible>(_ t: T.Type) throws(CodingError.Decoding) -> T {
do throws(_JSONDecodingError) {
guard let char = reader.peek() else {
throw .json(JSONError.unexpectedEndOfFile)
}
switch char {
case ._quote:
let policy = self.options[].nonConformingFloatDecodingStrategy
var decoder = JSONParserDecoder(state: self)
let result = try decoder.decodeString(FloatingPointNonConformingStringValueVisitor<T>(policy: policy)) ^^ .decodingError
self.copyRelevantState(from: decoder.state)
return result
case ._minus, _asciiNumbers:
return try reader.parseFloatingPoint(as: t) ^^ .jsonError
default:
throw .decoding(decodingError(expectedTypeDescription: "floating point number"))
}
} catch {
switch error {
case .json(let error):
throw error.at(self.codingPath)
case .decoding(let error):
throw error
}
}
}
@_lifetime(self: copy self) mutating func decode(_ t: Float.Type) throws(CodingError.Decoding) -> Float { try self.decodeFloatingPoint(Float.self) }
@_lifetime(self: copy self) mutating func decode(_ t: Double.Type) throws(CodingError.Decoding) -> Double { try self.decodeFloatingPoint(Double.self) }
@inlinable
@inline(__always)
mutating func decode<T: FixedWidthInteger>() throws(CodingError.Decoding) -> T {
do throws(_JSONDecodingError) {
switch try reader.parseInteger(as: T.self) ^^ .jsonError {
case .pureInteger(let integer):
return integer
case .retryAsFloatingPoint:
// TODO: Slowpath? Lots of inlined code here.
let double = try reader.parseFloatingPoint(as: Double.self) ^^ .jsonError
guard let integer = T(exactly: double) else {
// TODO: Include the parsed string? Explain we're trying to represent as an integer?
throw .json(JSONError.numberIsNotRepresentableInSwift(parsed: String(double)))
}
// Double only has 53 bits of significand, so values with magnitude >= 2^53
// may have been rounded. Reject them to avoid silently returning wrong integers.
// TODO: Classic JSONDecoder would retry Decimal -> integer parsing for these values.
if double.magnitude >= Double(sign: .plus, exponent: Double.significandBitCount + 1, significand: 1) {
throw .json(JSONError.numberIsNotRepresentableInSwift(parsed: String(double)))
}
return integer
case .notANumber:
throw .decoding(decodingError(expectedTypeDescription: "integer number"))
}
} catch {
switch error {
case .json(let error):
throw error.at(self.codingPath)
case .decoding(let error):
throw error
}
}
}
@_lifetime(self: copy self)
mutating func decodeUnhintedNumber<V: JSONDecodingVisitor & ~Copyable & ~Escapable>(_ visitor: borrowing V, isNegative: Bool) throws(CodingError.Decoding) -> V.DecodedValue {
// Check if the visitor wants arbitrary precision numbers
if visitor.prefersArbitraryPrecisionNumbers {
let start = reader.readOffset
let (_, _) = reader.skipNumber()
let end = reader.readOffset
let numberSpan = reader.bytes.extracting(unchecked: start..<end)
// We're asserting here that `skipNumber` stops before any invalid JSON number bytes, which guarantees that we have ASCII.
// TODO: 0 length spans?
let utf8Span = UTF8Span(unchecked: .init(_bytes: numberSpan), isKnownASCII: true)
return try visitor.visitArbitraryPrecisionNumber(utf8Span)
}
return try decodeUnhintedNumberCommon(visitor, isNegative: isNegative)
}
@_lifetime(self: copy self)
mutating func decodeUnhintedNumberCommon<V: DecodingNumberVisitor & ~Copyable & ~Escapable>(_ visitor: borrowing V, isNegative: Bool) throws(CodingError.Decoding) -> V.DecodedValue {
do throws(_JSONDecodingError) {
// TODO: Consider constraining the visited integer type to the smallest that will fit it. Default visitor implementations would promote back to the largest implemented visitor.
if isNegative {
reader.moveReaderIndex(forwardBy: 1) // consume '-'
if case let .pureInteger(integer) = try reader._parseIntegerDigits(isNegative: true) as DocumentReader.IntegerParseResult<Int64> ^^ .jsonError {
return try visitor.visit(integer) ^^ .decodingError
}
// retry as floating point, push back `-`
reader.moveReaderIndex(forwardBy: -1)
} else {
if case let .pureInteger(integer) = try reader._parseIntegerDigits(isNegative: false) as DocumentReader.IntegerParseResult<UInt64> ^^ .jsonError {
return try visitor.visit(integer) ^^ .decodingError
}
}
let double = try reader.parseFloatingPoint(as: Double.self) ^^ .jsonError
return try visitor.visit(double) ^^ .decodingError
} catch {
switch error {
case .json(let error):
throw error.at(self.codingPath)
case .decoding(let error):
throw error
}
}
}
mutating func skipValue() throws(CodingError.Decoding) {
do throws(_JSONDecodingError) {
let byte = try reader.consumeWhitespaceAndPeek() ^^ .jsonError
switch byte {
case ._quote:
try skipString() ^^ .jsonError
case ._openbrace:
var dictionaryNode: InlineArray = [
JSONParserDecoder.CodingPathNode.newDictionaryNode(withParent: self.currentTopCodingPathNode)
]
var nodeSpan = dictionaryNode.mutableSpan
self.currentTopCodingPathNode = nodeSpan.withUnsafeMutableBufferPointer {
$0.baseAddress!
}
defer {
withExtendedLifetime(nodeSpan) {
self.currentTopCodingPathNode.unwindToParent()
}
}
var decoder: JSONParserDecoder.StructDecoder
do throws(JSONError) { decoder = try JSONParserDecoder.StructDecoder(parserState: self, midContainer: false) } catch { throw .json(error) }
_ = try BlackHoleVisitor().visit(decoder: &decoder) ^^ .decodingError
try decoder._finish() ^^ .decodingError
self = decoder.parserState
case ._openbracket:
var arrayNode: InlineArray = [
JSONParserDecoder.CodingPathNode.newArrayNode(withParent: self.currentTopCodingPathNode)
]
var nodeSpan = arrayNode.mutableSpan
self.currentTopCodingPathNode = nodeSpan.withUnsafeMutableBufferPointer {
$0.baseAddress!
}
defer {
withExtendedLifetime(nodeSpan) {
self.currentTopCodingPathNode.unwindToParent()
}
}
var decoder: JSONParserDecoder.ArrayDecoder
do throws(JSONError) { decoder = try JSONParserDecoder.ArrayDecoder(parserState: self, midContainer: false) } catch { throw .json(error) }
_ = try BlackHoleVisitor().visit(decoder: &decoder) ^^ .decodingError
try decoder._finish() ^^ .decodingError
self = decoder.innerParser.state
case UInt8(ascii: "f"), UInt8(ascii: "t"):
_ = try reader.readBool() ^^ .jsonError
case UInt8(ascii: "n"):
try reader.readNull() ^^ .jsonError
case UInt8(ascii: "-"), _asciiNumbers:
reader.skipNumber()
case ._space, ._return, ._newline, ._tab:
assertionFailure("Expected that all white space is consumed")
default:
throw .json(JSONError.unexpectedCharacter(ascii: byte, location: reader.sourceLocation))
}
} catch {
switch error {
case .json(let error):
throw error.at(self.codingPath)
case .decoding(let error):
throw error
}
}
}
@frozen
public struct DocumentReader: ~Escapable {
@usableFromInline
let bytes: RawSpan
@usableFromInline
let utf8Validated: Bool
@usableFromInline
internal var readOffset : Int
@inlinable
var endOffset: Int {
bytes.byteCount
}
@inlinable
@inline(__always)
func checkRemainingBytes(_ count: Int) -> Bool {
(endOffset - readOffset) >= count
}
@inlinable
@inline(__always)
func requireRemainingBytes(_ count: Int) throws(JSONError) {
guard checkRemainingBytes(count) else {
throw JSONError.unexpectedEndOfFile
}
}
@usableFromInline
var sourceLocation : JSONError.SourceLocation {
self.sourceLocation(atOffset: 0)
}
@usableFromInline
func sourceLocation(atOffset offset: Int) -> JSONError.SourceLocation {
.countingLinesAndColumns(upTo: readOffset + offset, in: bytes)
}
@inlinable
@inline(__always)
var isEOF: Bool {
readOffset == endOffset
}
@inlinable
@_lifetime(copy bytes)
init(bytes: RawSpan) {
self.bytes = bytes
self.readOffset = 0
self.utf8Validated = false
}
@inlinable
@_lifetime(copy utf8)
init(utf8: UTF8Span) {
self.bytes = utf8.span.bytes
self.readOffset = 0
self.utf8Validated = true
}
@inlinable
@inline(__always)
mutating func read() -> UInt8? {
guard !isEOF else {
return nil
}
defer { readOffset &+= 1 }
return bytes._loadByteUnchecked(readOffset)
}
@inlinable
@inline(__always)
func peek(offset: Int = 0) -> UInt8? {
assert(offset >= 0)
assert(0 <= readOffset)
let peekIndex = readOffset &+ offset
guard peekIndex < endOffset else {
return nil
}
return bytes._loadByteUnchecked(peekIndex)
}
@inlinable
@inline(__always)
func peek<T: BitwiseCopyable>(as type: T.Type) -> T? {
assert(0 <= readOffset)
guard checkRemainingBytes(MemoryLayout<T>.size) else {
return nil
}
return bytes.unsafeLoadUnaligned(fromUncheckedByteOffset: readOffset, as: type)
}
@inlinable
@inline(__always)
@_lifetime(self: copy self)
mutating func moveReaderIndex(forwardBy offset: Int) {
readOffset &+= offset
}
@inlinable
@inline(__always)
func index(offsetBy offset: Int) -> Int {
readOffset &+ offset
}
@inlinable
@inline(__always)
func distance(from start: Int, to end: Int) -> Int {
end - start
}
@inlinable
@inline(__always)
@_lifetime(copy self)
func remainingBytes() -> RawSpan {
bytes.extracting(readOffset...)
}
@inlinable
static var whitespaceBitmap: UInt64 { 1 << UInt8._space | 1 << UInt8._return | 1 << UInt8._newline | 1 << UInt8._tab }
@_effects(readnone)
@inlinable
internal static func u32LeadingWhitespaceBytes(_ u32: UInt32) -> Int {
let spaceBits = (UInt32(UInt8._space) * (0x01010101 as UInt32))
let returnBits = (UInt32(UInt8._return) * (0x01010101 as UInt32))
let newlineBits = (UInt32(UInt8._newline) * (0x01010101 as UInt32))
let tabBits = (UInt32(UInt8._tab) * (0x01010101 as UInt32))
let spaceScratch = (u32 ^ spaceBits) &- (0x01010101 as UInt32)
let returnScratch = (u32 ^ returnBits) &- (0x01010101 as UInt32)
let newlineScratch = (u32 ^ newlineBits) &- (0x01010101 as UInt32)
let tabScratch = (u32 ^ tabBits) &- (0x01010101 as UInt32)
var scratch = spaceScratch | returnScratch | newlineScratch | tabScratch
scratch = scratch & 0x80808080
scratch = (scratch >> 7) * 255
scratch = ~scratch
return scratch.trailingZeroBitCount >> 3 // /8
}
@_effects(readnone)
@inlinable
internal static func u64LeadingWhitespaceBytes(_ u64: UInt64) -> Int {
let spaceBits = (UInt64(UInt8._space) * (0x0101010101010101 as UInt64))
let returnBits = (UInt64(UInt8._return) * (0x0101010101010101 as UInt64))
let newlineBits = (UInt64(UInt8._newline) * (0x0101010101010101 as UInt64))
let tabBits = (UInt64(UInt8._tab) * (0x0101010101010101 as UInt64))
let spaceScratch = (u64 ^ spaceBits) &- (0x0101010101010101 as UInt64)
let returnScratch = (u64 ^ returnBits) &- (0x0101010101010101 as UInt64)
let newlineScratch = (u64 ^ newlineBits) &- (0x0101010101010101 as UInt64)
let tabScratch = (u64 ^ tabBits) &- (0x0101010101010101 as UInt64)
var scratch = spaceScratch | returnScratch | newlineScratch | tabScratch
// let bits: UInt64 =
// (
// ( (u64 ^ |
// ( (u64 ^ (UInt64(UInt8._return) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64) |
// ( (u64 ^ (UInt64(UInt8._newline) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64) |
// ( (u64 ^ (UInt64(UInt8._tab) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64)
// )
// & 0x8080808080808080
// print("0x\(String(u64, radix: 16)) ", bits != 0 ? "does" : "does NOT", " contain quote, backslash, or invalid characters")
scratch = scratch & 0x8080808080808080
scratch = (scratch >> 7) * 255
scratch = ~scratch
return scratch.trailingZeroBitCount >> 3 // /8
}
@_effects(readnone)
@inlinable
internal static func u128LeadingWhitespaceBytes(_ u64: UInt128) -> Int {
let spaceBits = (UInt128(UInt8._space) * (0x01010101010101010101010101010101 as UInt128))
let returnBits = (UInt128(UInt8._return) * (0x01010101010101010101010101010101 as UInt128))
let newlineBits = (UInt128(UInt8._newline) * (0x01010101010101010101010101010101 as UInt128))
let tabBits = (UInt128(UInt8._tab) * (0x01010101010101010101010101010101 as UInt128))
let spaceScratch = (u64 ^ spaceBits) &- (0x01010101010101010101010101010101 as UInt128)
let returnScratch = (u64 ^ returnBits) &- (0x01010101010101010101010101010101 as UInt128)
let newlineScratch = (u64 ^ newlineBits) &- (0x01010101010101010101010101010101 as UInt128)
let tabScratch = (u64 ^ tabBits) &- (0x01010101010101010101010101010101 as UInt128)
var scratch = spaceScratch | returnScratch | newlineScratch | tabScratch
scratch = scratch & 0x80808080808080808080808080808080
scratch = (scratch >> 7) * 255
scratch = ~scratch
return scratch.trailingZeroBitCount >> 3 // /8
}
@_effects(readnone)
@inlinable
internal static func makeU64WhitespaceBitmap(_ u64: UInt64) -> UInt64 {
// let testValue = UInt64(UInt8._space) << 56 | UInt64(UInt8._return) << 48 | UInt64(UInt8._newline) << 40 | UInt64(UInt8._tab) << 32
let spaceBits = (UInt64(UInt8._space) * (0x0101010101010101 as UInt64))
let returnBits = (UInt64(UInt8._return) * (0x0101010101010101 as UInt64))
let newlineBits = (UInt64(UInt8._newline) * (0x0101010101010101 as UInt64))
let tabBits = (UInt64(UInt8._tab) * (0x0101010101010101 as UInt64))
let spaceScratch = (u64 ^ spaceBits) &- (0x0101010101010101 as UInt64)
let returnScratch = (u64 ^ returnBits) &- (0x0101010101010101 as UInt64)
let newlineScratch = (u64 ^ newlineBits) &- (0x0101010101010101 as UInt64)
let tabScratch = (u64 ^ tabBits) &- (0x0101010101010101 as UInt64)
let scratch = spaceScratch | returnScratch | newlineScratch | tabScratch
// let bits: UInt64 =
// (
// ( (u64 ^ |
// ( (u64 ^ (UInt64(UInt8._return) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64) |
// ( (u64 ^ (UInt64(UInt8._newline) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64) |
// ( (u64 ^ (UInt64(UInt8._tab) * (0x0101010101010101 as UInt64))) &- 0x0101010101010101 as UInt64)
// )
// & 0x8080808080808080
// print("0x\(String(u64, radix: 16)) ", bits != 0 ? "does" : "does NOT", " contain quote, backslash, or invalid characters")
// scratch = scratch & 0x8080808080808080
// scratch = (scratch >> 7) * 255
// scratch = ~scratch
// return scratch.trailingZeroBitCount >> 3 // /8
// let test1 = (testValue ^ spaceBits) &- (0x0101010101010101 as UInt64)
// let test2 = (testValue ^ returnBits) &- (0x0101010101010101 as UInt64)
// let test3 = (testValue ^ newlineBits) &- (0x0101010101010101 as UInt64)
// let test4 = (testValue ^ tabBits) &- (0x0101010101010101 as UInt64)
//
// let scratchTest = test1 | test2 | test3 | test4
return scratch
}
@usableFromInline
// @inline(never)
// @inlinable
@inline(__always)
@discardableResult
mutating func consumeWhitespaceAndPeek() throws(JSONError) -> UInt8 {
// // If the next character is not whitespace, then we're done.
// if readIndex < endIndex {
// let ascii = bytes[unchecked: readIndex]
// if Self.whitespaceBitmap & (1 << ascii) == 0 {
// return ascii
// }
// }
// bytes.formIndex(after: &readIndex)
// TODO: This works, but is too expensive.
// while self.checkRemainingBytes(MemoryLayout<UInt64>.size) {
// if let whitespaceBitmapIndex {
// let originalDistance = whitespaceBitmapIndex.distance(to: readIndex)
// var distance = originalDistance
// var foundNonwhitespace = false
// while distance < MemoryLayout<UInt64>.size {
// if (self.whitespaceBitmap & ((0x80 as UInt64) << (distance*8))) != 0 {
// distance += 1
// } else {
// foundNonwhitespace = true
// break
// }
// }
// let whitespaceBytesSkipped = distance - originalDistance
// if whitespaceBytesSkipped > 0 {
// bytes.formIndex(&readIndex, offsetBy: whitespaceBytesSkipped)
// }
// if foundNonwhitespace {
// return bytes[unchecked: readIndex]
// } else {
// // We exceeded the bitmap.
// self.whitespaceBitmapIndex = nil
// }
// } else {
// self.whitespaceBitmap = Self.makeU64WhitespaceBitmap(bytes.loadUnaligned(from: readIndex, as: UInt64.self))
// self.whitespaceBitmapIndex = readIndex
// }
// }
// // Read 8 bytes worth all at once.
// while self.checkRemainingBytes(MemoryLayout<UInt64>.size) {
// let u64 = bytes.loadUnaligned(from: readIndex, as: UInt64.self)
// let whitespaceToSkip = Self.u64LeadingWhitespaceBytes(u64)
// switch whitespaceToSkip {
// case 0:
// return bytes[unchecked: readIndex]
// case 1...7:
// bytes.formIndex(&readIndex, offsetBy: whitespaceToSkip)
// return bytes[unchecked: readIndex]
// default:
// bytes.formIndex(&readIndex, offsetBy: 8)
// continue
// }
// }
// // Read 16 bytes worth all at once.
// while self.checkRemainingBytes(MemoryLayout<UInt128>.size) {
// let u128 = bytes.loadUnaligned(from: readIndex, as: UInt128.self)
// let whitespaceToSkip = Self.u128LeadingWhitespaceBytes(u128)
// switch whitespaceToSkip {
// case 0:
// return bytes[unchecked: readIndex]
// case 1...15:
// bytes.formIndex(&readIndex, offsetBy: whitespaceToSkip)
// return bytes[unchecked: readIndex]
// default:
// bytes.formIndex(&readIndex, offsetBy: 16)
// continue
// }
// }
// // Read 4 bytes worth all at once.
// while self.checkRemainingBytes(MemoryLayout<UInt32>.size) {
// let u32 = bytes.loadUnaligned(from: readIndex, as: UInt32.self)
// let whitespaceToSkip = Self.u32LeadingWhitespaceBytes(u32)
// switch whitespaceToSkip {
// case 0:
// return bytes[unchecked: readIndex]
// case 1...3:
// bytes.formIndex(&readIndex, offsetBy: whitespaceToSkip)
// return bytes[unchecked: readIndex]
// default:
// bytes.formIndex(&readIndex, offsetBy: 4)
// continue
// }
// }
var localReadIndex = readOffset
defer {
readOffset = localReadIndex
}
while localReadIndex < endOffset {
let ascii = bytes._loadByteUnchecked(localReadIndex)
switch ascii {
case UInt8(ascii: " "), UInt8(ascii: "\r"), UInt8(ascii: "\n"), UInt8(ascii: "\t"):
localReadIndex &+= 1
default:
return ascii
}
}
throw JSONError.unexpectedEndOfFile
}
// @usableFromInline
// @inline(never)
@inlinable
@inline(__always)
@discardableResult
@_lifetime(self: copy self)
mutating func consumeWhitespaceAndPeek(allowingEOF: Bool) throws(JSONError) -> UInt8? {
while readOffset < endOffset {
let ascii = bytes._loadByteUnchecked(readOffset)
switch ascii {
case ._space, ._return, ._newline, ._tab:
readOffset &+= 1
default:
return ascii
}
}
guard allowingEOF else {
throw JSONError.unexpectedEndOfFile
}
return nil
}
@usableFromInline
@inline(never)
@_lifetime(self: copy self)
mutating func errorForUnmatchedCharacter(in str: StaticString, typeDescriptor: String) -> JSONError {
// Figure out the exact character that is wrong.
let badOffset = str.withUTF8Buffer { strBuffer in
let remainingBytes = bytes.extracting(readOffset..<endOffset)
for i in 0..<min(strBuffer.count, remainingBytes.byteCount) {
let strByte = strBuffer[i]
let spanByte = remainingBytes._loadByteUnchecked(i)
if strByte != spanByte {
return i
}
}
return 0 // should be unreachable
}
self.moveReaderIndex(forwardBy: badOffset)
return JSONError.unexpectedCharacter(context: "in expected \(typeDescriptor) value", ascii: self.peek()!, location: sourceLocation)
}
@inlinable
@inline(__always)
@_lifetime(self: copy self)
mutating func matchExpectedString(_ str: StaticString) throws(JSONError) -> Bool {
do {
let cmp = try bytes.extracting(unchecked: readOffset..<endOffset).withUnsafeBytes { buff in
if buff.count < str.utf8CodeUnitCount { throw JSONError.unexpectedEndOfFile }
return memcmp(buff.baseAddress!, str.utf8Start, str.utf8CodeUnitCount)
}
guard cmp == 0 else {
return false
}
// If all looks good, advance past the string.
self.moveReaderIndex(forwardBy: str.utf8CodeUnitCount)
return true
} catch {
// TODO: Remove unsavory workaroud
throw error as! JSONError
}
}
@inlinable
@inline(__always)
@_lifetime(self: copy self)
mutating func readExpectedString(_ str: StaticString, typeDescriptor: String) throws(JSONError) {
do {
let cmp = try bytes.extracting(unchecked: readOffset..<endOffset).withUnsafeBytes { buff in
if buff.count < str.utf8CodeUnitCount { throw JSONError.unexpectedEndOfFile }
return memcmp(buff.baseAddress!, str.utf8Start, str.utf8CodeUnitCount)
}
guard cmp == 0 else {
throw errorForUnmatchedCharacter(in: str, typeDescriptor: typeDescriptor)
}
// If all looks good, advance past the string.
self.moveReaderIndex(forwardBy: str.utf8CodeUnitCount)
} catch {
// TODO: Remove unsavory workaroud
throw error as! JSONError
}
}
@inlinable
@inline(__always)
mutating func readBool() throws(JSONError) -> Bool {
switch self.read() {
case UInt8(ascii: "t"):
try readExpectedString("rue", typeDescriptor: "boolean")
return true
case UInt8(ascii: "f"):
try readExpectedString("alse", typeDescriptor: "boolean")
return false
default:
preconditionFailure("Expected to have `t` or `f` as first character")
}
}
@inlinable
@inline(__always)
mutating func readNull() throws(JSONError) {
try readExpectedString("null", typeDescriptor: "null")
}
// MARK: - Private Methods -
// MARK: String
@inlinable
@_lifetime(self: copy self)
internal mutating func _parseHexIntegerDigits<Result: FixedWidthInteger>(totalDigits: Int, isNegative: Bool) throws(JSONError) -> Result {
let startOffset = self.readOffset
// ASCII constants, named for clarity:
let _0 = 48 as UInt8, _A = 65 as UInt8, _a = 97 as UInt8
let numericalUpperBound = _0 &+ 10
let uppercaseUpperBound = _A &+ 6
let lowercaseUpperBound = _a &+ 6
let multiplicand: Result = 16
var remainingDigits = totalDigits
var result = 0 as Result
while remainingDigits > 0, let digit = read() {
remainingDigits -= 1
let digitValue: Result
if _fastPath(digit >= _0 && digit < numericalUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _0)
} else if _fastPath(digit >= _A && digit < uppercaseUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _A &+ 10)
} else if _fastPath(digit >= _a && digit < lowercaseUpperBound) {
digitValue = Result(truncatingIfNeeded: digit &- _a &+ 10)
} else {
// TODO: Meh `!`
let hexString = String._tryFromUTF8(self.bytes.extracting(unchecked: startOffset ..< self.readOffset))
throw .invalidHexDigitSequence(hexString!, location: .countingLinesAndColumns(upTo: startOffset, in: self.bytes))
}
let overflow1: Bool
(result, overflow1) = result.multipliedReportingOverflow(by: multiplicand)
let overflow2: Bool
(result, overflow2) = isNegative
? result.subtractingReportingOverflow(digitValue)
: result.addingReportingOverflow(digitValue)
guard _fastPath(!overflow1 && !overflow2) else {
// TODO: Meh `!`
let hexString = String._tryFromUTF8(self.bytes.extracting(unchecked: startOffset ..< self.readOffset))
throw .invalidHexDigitSequence(hexString!, location: .countingLinesAndColumns(upTo: startOffset, in: self.bytes))
}
}
if remainingDigits > 0 {
throw .unexpectedEndOfFile
}
return result
}
@inlinable
@_lifetime(self: copy self)
internal mutating func _parseUnicodeHexSequence(allowNulls: Bool = true) throws(JSONError) -> UInt16 {
let startIndex = self.readOffset
let result: UInt16 = try _parseHexIntegerDigits(totalDigits: 4, isNegative: false)
guard allowNulls || result != 0 else {
throw .invalidEscapedNullValue(location: .countingLinesAndColumns(upTo: startIndex, in: bytes))
}
return result
}
// Shared with JSON5, which requires allowNulls = false for compatibility.
@_lifetime(self: copy self)
internal mutating func _parseUnicodeSequence(into string: inout UniqueArray<UInt8>, allowNulls: Bool = true) throws(JSONError) {
// we build this for utf8 only for now.
let startIndex = readOffset
let bitPattern = try _parseUnicodeHexSequence(allowNulls: allowNulls)
// check if lead surrogate
if UTF16.isLeadSurrogate(bitPattern) {
// if we have a lead surrogate we expect a trailing surrogate next
let trailingSurrogateStartIndex = readOffset
let leadingSurrogateBitPattern = bitPattern
guard read() == ._backslash, read() == UInt8(ascii: "u") else {
throw .expectedLowSurrogateUTF8SequenceAfterHighSurrogate(location: .countingLinesAndColumns(upTo: trailingSurrogateStartIndex, in: bytes))
}
let trailingSurrogateBitPattern = try _parseUnicodeHexSequence(allowNulls: true)
guard UTF16.isTrailSurrogate(trailingSurrogateBitPattern) else {
throw .expectedLowSurrogateUTF8SequenceAfterHighSurrogate(location: .countingLinesAndColumns(upTo: trailingSurrogateStartIndex, in: bytes))
}
let encodedScalar = UTF16.EncodedScalar([leadingSurrogateBitPattern, trailingSurrogateBitPattern])
let unicode = UTF16.decode(encodedScalar)
UTF8.encode(unicode) { codeUnit in
string.append(codeUnit)
}
} else {
guard let unicode = Unicode.Scalar(bitPattern) else {
throw .couldNotCreateUnicodeScalarFromUInt32(location: .countingLinesAndColumns(upTo: startIndex, in: bytes), unicodeScalarValue: UInt32(bitPattern))
}
UTF8.encode(unicode) { codeUnit in
string.append(codeUnit)
}
}
}
@_lifetime(self: copy self)
internal mutating func _parseEscapeSequence(into string: inout UniqueArray<UInt8>) throws(JSONError) {
while let next = read() {
switch next {
case UInt8(ascii:"\""):
return string.append(.init(ascii: "\""))
case UInt8(ascii:"\\"):
return string.append(.init(ascii: "\\"))
case UInt8(ascii:"/"):
return string.append(.init(ascii: "/"))
case UInt8(ascii:"b"):
return string.append(0x08) // \b
case UInt8(ascii:"f"):
return string.append(0x0C) // \f
case UInt8(ascii:"n"):
return string.append(0x0A) // \n
case UInt8(ascii:"r"):
return string.append(0x0D) // \r
case UInt8(ascii:"t"):
return string.append(0x09) // \t
case UInt8(ascii:"u"):
return try _parseUnicodeSequence(into: &string)
default:
// TODO: This doesn't work any more, since the offsets don't translate.
throw .unexpectedEscapedCharacter(ascii: next, location: .countingLinesAndColumns(upTo: readOffset, in: bytes))
}
}
throw .unexpectedEndOfFile
}
@_lifetime(self: copy self)
internal mutating func _slowpath_continueParsingString(into output: inout UniqueArray<UInt8>) throws(JSONError) {
// Continue scanning, taking into account escaped sequences and control characters
let startOffset = self.readOffset
var chunkStart = startOffset
while true {
let byte = try skipUTF8StringTillQuoteOrBackslashOrInvalidCharacter()
switch byte {
case ._backslash, ._quote:
if readOffset > chunkStart {
let span = bytes.extracting(unchecked: chunkStart..<readOffset)
span.withUnsafeBytes {
output.append(copying: $0)
}
}
moveReaderIndex(forwardBy: 1)
if byte == ._backslash {
try _parseEscapeSequence(into: &output)
chunkStart = self.readOffset
} else if byte == ._quote {
return
}
default:
// All Unicode characters may be placed within the quotation marks, except for the characters that must be escaped: quotation mark, reverse solidus, and the control characters (U+0000 through U+001F).
// TODO: This doesn't work any more, since the offsets don't translate.
throw JSONError.unescapedControlCharacterInString(ascii: byte, location: .countingLinesAndColumns(upTo: readOffset, in: bytes))
}
}
throw JSONError.unexpectedEndOfFile
}
@frozen
@usableFromInline
enum ParsedString: ~Escapable {
case string(String, UTF8Span)
case span(UTF8Span)
var buffer: UnsafeRawBufferPointer {
switch self {
case .string(_, let span), .span(let span):
span.span.bytes.withUnsafeBytes {
$0
}
}
}
}
// Because this returns a ~Escapable type, throws _JSONDecodingError to improve ergonomics at the callsite.
@usableFromInline
@_lifetime(copy self)
mutating func parsedStringContentAndTrailingQuote() throws(_JSONDecodingError) -> ParsedString {
// Assume easy path first -- no escapes, no characters requiring escapes.
let startIndex = self.readOffset
var foundEndOfString = false
var foundBackslash = false
ReadLoop:
while true {
let byte = try skipUTF8StringTillQuoteOrBackslashOrInvalidCharacter() ^^ .jsonError
guard _fastPath(byte & 0xe0 != 0) else {
// TODO: Wrong index.
// TODO: This doesn't work any more, since the offsets don't translate.
throw .json(.unescapedControlCharacterInString(ascii: byte, location: .countingLinesAndColumns(upTo: readOffset, in: bytes)))
}
switch byte {
case ._backslash:
moveReaderIndex(forwardBy: 1)
foundBackslash = true
break ReadLoop
case ._quote:
moveReaderIndex(forwardBy: 1)
foundEndOfString = true
break ReadLoop
default: break
}
}
let firstSectionSubspan = bytes.extracting(unchecked: startIndex..<readOffset-1)
let firstSectionUTF8Span: UTF8Span
do {
if utf8Validated {
firstSectionUTF8Span = UTF8Span(unchecked: .init(_bytes: firstSectionSubspan), isKnownASCII: false)
} else {
firstSectionUTF8Span = try UTF8Span(validating: .init(_bytes: firstSectionSubspan))
}
} catch {
// TODO: This source location doesn't work any more.
throw .json(.cannotConvertInputStringDataToUTF8(location: .countingLinesAndColumns(upTo: startIndex, in: bytes)))
}