@@ -537,6 +537,8 @@ go_fast !bs da@(ConsumeTag64Canonical k) =
537537go_fast ! bs da@ (ConsumeInteger k) =
538538 case tryConsumeInteger (BS. unsafeHead bs) bs of
539539 DecodedToken sz (BigIntToken _ n) -> k n >>= go_fast (BS. unsafeDrop sz bs)
540+ DecodedToken sz BigNIntNeedBytes -> go_fast_end (BS. unsafeDrop sz bs) (decodeBytesIndefLen (k . nintegerFromBytes) [] )
541+ DecodedToken sz BigUIntNeedBytes -> go_fast_end (BS. unsafeDrop sz bs) (decodeBytesIndefLen (k . uintegerFromBytes) [] )
540542 _ -> go_fast_end bs da
541543
542544go_fast ! bs da@ (ConsumeFloat k) =
@@ -593,6 +595,8 @@ go_fast !bs da@(ConsumeSimple k) =
593595go_fast ! bs da@ (ConsumeIntegerCanonical k) =
594596 case tryConsumeInteger (BS. unsafeHead bs) bs of
595597 DecodedToken sz (BigIntToken True n) -> k n >>= go_fast (BS. unsafeDrop sz bs)
598+ DecodedToken _sz BigUIntNeedBytes -> return $ SlowFail bs " non-canonical integer encoding"
599+ DecodedToken _sz BigNIntNeedBytes -> return $ SlowFail bs " non-canonical integer encoding"
596600 _ -> go_fast_end bs da
597601
598602
@@ -1037,6 +1041,9 @@ go_fast_end !bs (ConsumeInteger k) =
10371041 DecodedToken sz (BigNIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS. unsafeDrop sz bs) (adjustContBigNIntNeedBody k) len
10381042 DecodedToken sz BigUIntNeedHeader -> return $! SlowDecodeAction (BS. unsafeDrop sz bs) (adjustContBigUIntNeedHeader k)
10391043 DecodedToken sz BigNIntNeedHeader -> return $! SlowDecodeAction (BS. unsafeDrop sz bs) (adjustContBigNIntNeedHeader k)
1044+ DecodedToken sz BigUIntNeedBytes -> go_fast_end (BS. unsafeDrop sz bs) (decodeBytesIndefLen (k . uintegerFromBytes) [] )
1045+ DecodedToken sz BigNIntNeedBytes -> go_fast_end (BS. unsafeDrop sz bs) (decodeBytesIndefLen (k . nintegerFromBytes) [] )
1046+
10401047
10411048go_fast_end ! bs (ConsumeFloat k) =
10421049 case tryConsumeFloat (BS. unsafeHead bs) bs of
@@ -2696,12 +2703,26 @@ readBytes64 bs = case word64ToInt (eatTailWord64 bs) of
26962703-- Note that canonicity information is calculated lazily. This way we don't need
26972704-- to concern ourselves with two distinct paths, while according to benchmarks
26982705-- it doesn't affect performance in the non-canonical case.
2706+ --
2707+ -- According to the CBOR specification, big integers can be encoded as
2708+ -- *indefinite-length* byte strings. To support this representation, we return
2709+ -- the 'BigUIntNeedBytes' or 'BigNIntNeedBytes' constructors, which indicate that
2710+ -- more multiple tockens are required before the integer can be fully reconstructed.
2711+ -- In this case we fall back to the slow path.
2712+ --
2713+ -- Note that this is a non-canonical encoding of big integers. The canonical
2714+ -- representation uses a definite-length byte string. Therefore, when canonical
2715+ -- decoding is requested, indefinite-length values are rejected immediately.
2716+
26992717
27002718data BigIntToken a = BigIntToken Bool {- canonical? -} Integer
27012719 | BigUIntNeedBody Bool {- canonical? -} Int
27022720 | BigNIntNeedBody Bool {- canonical? -} Int
27032721 | BigUIntNeedHeader
27042722 | BigNIntNeedHeader
2723+ | BigUIntNeedBytes
2724+ | BigNIntNeedBytes
2725+
27052726
27062727-- So when we have to break out because we can't read the whole bytes body
27072728-- in one go then we need to use SlowConsumeTokenBytes but we can adjust the
@@ -2714,7 +2735,6 @@ adjustContBigUIntNeedBody, adjustContBigNIntNeedBody
27142735
27152736adjustContBigUIntNeedBody k = \ bs -> k $! uintegerFromBytes bs
27162737adjustContBigNIntNeedBody k = \ bs -> k $! nintegerFromBytes bs
2717-
27182738adjustContCanonicalBigUIntNeedBody, adjustContCanonicalBigNIntNeedBody
27192739 :: (Integer -> ST s (DecodeAction s a ))
27202740 -> (ByteString -> ST s (DecodeAction s a ))
@@ -2770,7 +2790,9 @@ readBigUInt bs
27702790 , not (BS. null bs')
27712791 , let ! hdr = BS. unsafeHead bs'
27722792 , BS. length bs' >= tokenSize hdr
2773- = case tryConsumeBytes hdr bs' of
2793+ = if isInfiniteBytes hdr
2794+ then DecodedToken 2 BigUIntNeedBytes
2795+ else case tryConsumeBytes hdr bs' of
27742796 DecodeFailure -> DecodeFailure
27752797 DecodedToken sz (Fits canonical bstr) -> DecodedToken (1 + sz)
27762798 (BigIntToken (canonical && isBigIntRepCanonical bstr)
@@ -2788,7 +2810,9 @@ readBigNInt bs
27882810 , not (BS. null bs')
27892811 , let ! hdr = BS. unsafeHead bs'
27902812 , BS. length bs' >= tokenSize hdr
2791- = case tryConsumeBytes hdr bs' of
2813+ = if isInfiniteBytes hdr
2814+ then DecodedToken 2 BigNIntNeedBytes
2815+ else case tryConsumeBytes hdr bs' of
27922816 DecodeFailure -> DecodeFailure
27932817 DecodedToken sz (Fits canonical bstr) -> DecodedToken (1 + sz)
27942818 (BigIntToken (canonical && isBigIntRepCanonical bstr)
@@ -2805,3 +2829,15 @@ readBigNInt bs
28052829-- representation for the number in question).
28062830isBigIntRepCanonical :: ByteString -> Bool
28072831isBigIntRepCanonical bstr = BS. length bstr > 8 && BS. unsafeHead bstr /= 0x00
2832+
2833+ -- | Check if the given bytes(*) header has an infinite length.
2834+ isInfiniteBytes :: Word8 -> Bool
2835+ isInfiniteBytes hdr = case word8ToWord hdr of
2836+ 0x5f -> True
2837+ _ -> False
2838+
2839+ decodeBytesIndefLen :: (BS. ByteString -> ST s (DecodeAction s a )) -> [BS. ByteString ] -> DecodeAction s a
2840+ decodeBytesIndefLen k acc = ConsumeBreakOr $ \ isBreak ->
2841+ if isBreak
2842+ then k (LBS. toStrict $! LBS. fromChunks $! reverse acc)
2843+ else return $! ConsumeBytes $ \ bs -> return (decodeBytesIndefLen k (bs: acc))
0 commit comments