Skip to content

Commit 4c1a0a3

Browse files
committed
Support indefinite length bytestring
We support indefinite length bytestring in the Read module. In order to do it we add two extra variants for bigint deseralization, both puts us to the slow-path to start consumption of the chunks. This serialization is considered non-canonical thus canonical read rejects is. While specification is silent about it, the cbor.me implementation always converts bigit into a fixed size byte blob. Fixes #263.
1 parent 72a0e73 commit 4c1a0a3

File tree

5 files changed

+86
-3
lines changed

5 files changed

+86
-3
lines changed

cborg/cborg.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ test-suite cborg-tests
135135
Tests.Regress
136136
Tests.Regress.Issue160
137137
Tests.Regress.Issue162
138+
Tests.Regress.Issue263
138139
Tests.Regress.FlatTerm
139140
Tests.Reference
140141
Tests.Reference.Implementation

cborg/src/Codec/CBOR/Read.hs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,8 @@ go_fast !bs da@(ConsumeTag64Canonical k) =
537537
go_fast !bs da@(ConsumeInteger k) =
538538
case tryConsumeInteger (BS.unsafeHead bs) bs of
539539
DecodedToken sz (BigIntToken _ n) -> k n >>= go_fast (BS.unsafeDrop sz bs)
540+
DecodedToken sz BigNIntNeedBytes -> go_fast_end (BS.unsafeDrop sz bs) (decodeBytesIndefLen (k . nintegerFromBytes) [])
541+
DecodedToken sz BigUIntNeedBytes -> go_fast_end (BS.unsafeDrop sz bs) (decodeBytesIndefLen (k . uintegerFromBytes) [])
540542
_ -> go_fast_end bs da
541543

542544
go_fast !bs da@(ConsumeFloat k) =
@@ -593,6 +595,8 @@ go_fast !bs da@(ConsumeSimple k) =
593595
go_fast !bs da@(ConsumeIntegerCanonical k) =
594596
case tryConsumeInteger (BS.unsafeHead bs) bs of
595597
DecodedToken sz (BigIntToken True n) -> k n >>= go_fast (BS.unsafeDrop sz bs)
598+
DecodedToken _sz BigUIntNeedBytes -> return $ SlowFail bs "non-canonical integer encoding"
599+
DecodedToken _sz BigNIntNeedBytes -> return $ SlowFail bs "non-canonical integer encoding"
596600
_ -> go_fast_end bs da
597601

598602

@@ -1037,6 +1041,9 @@ go_fast_end !bs (ConsumeInteger k) =
10371041
DecodedToken sz (BigNIntNeedBody _ len) -> return $! SlowConsumeTokenBytes (BS.unsafeDrop sz bs) (adjustContBigNIntNeedBody k) len
10381042
DecodedToken sz BigUIntNeedHeader -> return $! SlowDecodeAction (BS.unsafeDrop sz bs) (adjustContBigUIntNeedHeader k)
10391043
DecodedToken sz BigNIntNeedHeader -> return $! SlowDecodeAction (BS.unsafeDrop sz bs) (adjustContBigNIntNeedHeader k)
1044+
DecodedToken sz BigUIntNeedBytes -> go_fast_end (BS.unsafeDrop sz bs) (decodeBytesIndefLen (k . uintegerFromBytes) [])
1045+
DecodedToken sz BigNIntNeedBytes -> go_fast_end (BS.unsafeDrop sz bs) (decodeBytesIndefLen (k . nintegerFromBytes) [])
1046+
10401047

10411048
go_fast_end !bs (ConsumeFloat k) =
10421049
case tryConsumeFloat (BS.unsafeHead bs) bs of
@@ -2696,12 +2703,26 @@ readBytes64 bs = case word64ToInt (eatTailWord64 bs) of
26962703
-- Note that canonicity information is calculated lazily. This way we don't need
26972704
-- to concern ourselves with two distinct paths, while according to benchmarks
26982705
-- it doesn't affect performance in the non-canonical case.
2706+
--
2707+
-- According to the CBOR specification, big integers can be encoded as
2708+
-- *indefinite-length* byte strings. To support this representation, we return
2709+
-- the 'BigUIntNeedBytes' or 'BigNIntNeedBytes' constructors, which indicate that
2710+
-- more multiple tockens are required before the integer can be fully reconstructed.
2711+
-- In this case we fall back to the slow path.
2712+
--
2713+
-- Note that this is a non-canonical encoding of big integers. The canonical
2714+
-- representation uses a definite-length byte string. Therefore, when canonical
2715+
-- decoding is requested, indefinite-length values are rejected immediately.
2716+
26992717

27002718
data BigIntToken a = BigIntToken Bool {- canonical? -} Integer
27012719
| BigUIntNeedBody Bool {- canonical? -} Int
27022720
| BigNIntNeedBody Bool {- canonical? -} Int
27032721
| BigUIntNeedHeader
27042722
| BigNIntNeedHeader
2723+
| BigUIntNeedBytes
2724+
| BigNIntNeedBytes
2725+
27052726

27062727
-- So when we have to break out because we can't read the whole bytes body
27072728
-- in one go then we need to use SlowConsumeTokenBytes but we can adjust the
@@ -2714,7 +2735,6 @@ adjustContBigUIntNeedBody, adjustContBigNIntNeedBody
27142735

27152736
adjustContBigUIntNeedBody k = \bs -> k $! uintegerFromBytes bs
27162737
adjustContBigNIntNeedBody k = \bs -> k $! nintegerFromBytes bs
2717-
27182738
adjustContCanonicalBigUIntNeedBody, adjustContCanonicalBigNIntNeedBody
27192739
:: (Integer -> ST s (DecodeAction s a))
27202740
-> (ByteString -> ST s (DecodeAction s a))
@@ -2770,7 +2790,9 @@ readBigUInt bs
27702790
, not (BS.null bs')
27712791
, let !hdr = BS.unsafeHead bs'
27722792
, BS.length bs' >= tokenSize hdr
2773-
= case tryConsumeBytes hdr bs' of
2793+
= if isInfiniteBytes hdr
2794+
then DecodedToken 2 BigUIntNeedBytes
2795+
else case tryConsumeBytes hdr bs' of
27742796
DecodeFailure -> DecodeFailure
27752797
DecodedToken sz (Fits canonical bstr) -> DecodedToken (1+sz)
27762798
(BigIntToken (canonical && isBigIntRepCanonical bstr)
@@ -2788,7 +2810,9 @@ readBigNInt bs
27882810
, not (BS.null bs')
27892811
, let !hdr = BS.unsafeHead bs'
27902812
, BS.length bs' >= tokenSize hdr
2791-
= case tryConsumeBytes hdr bs' of
2813+
= if isInfiniteBytes hdr
2814+
then DecodedToken 2 BigNIntNeedBytes
2815+
else case tryConsumeBytes hdr bs' of
27922816
DecodeFailure -> DecodeFailure
27932817
DecodedToken sz (Fits canonical bstr) -> DecodedToken (1+sz)
27942818
(BigIntToken (canonical && isBigIntRepCanonical bstr)
@@ -2805,3 +2829,15 @@ readBigNInt bs
28052829
-- representation for the number in question).
28062830
isBigIntRepCanonical :: ByteString -> Bool
28072831
isBigIntRepCanonical bstr = BS.length bstr > 8 && BS.unsafeHead bstr /= 0x00
2832+
2833+
-- | Check if the given bytes(*) header has an infinite length.
2834+
isInfiniteBytes :: Word8 -> Bool
2835+
isInfiniteBytes hdr = case word8ToWord hdr of
2836+
0x5f -> True
2837+
_ -> False
2838+
2839+
decodeBytesIndefLen :: (BS.ByteString -> ST s (DecodeAction s a)) -> [BS.ByteString] -> DecodeAction s a
2840+
decodeBytesIndefLen k acc = ConsumeBreakOr $ \isBreak ->
2841+
if isBreak
2842+
then k (LBS.toStrict $! LBS.fromChunks $! reverse acc)
2843+
else return $! ConsumeBytes $ \bs -> return (decodeBytesIndefLen k (bs:acc))

cborg/tests/Tests/Reference/Implementation.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ module Tests.Reference.Implementation (
6565
prop_word32ToFromNet,
6666
prop_word64ToFromNet,
6767
prop_halfToFromFloat,
68+
69+
-- helper functions
70+
integerToBytes,
6871
) where
6972

7073

cborg/tests/Tests/Regress.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import Test.Tasty
66

77
import qualified Tests.Regress.Issue160 as Issue160
88
import qualified Tests.Regress.Issue162 as Issue162
9+
import qualified Tests.Regress.Issue263 as Issue263
910
import qualified Tests.Regress.FlatTerm as FlatTerm
1011

1112
--------------------------------------------------------------------------------
@@ -16,4 +17,5 @@ testTree = testGroup "Regression tests"
1617
[ FlatTerm.testTree
1718
, Issue160.testTree
1819
, Issue162.testTree
20+
, Issue263.testTree
1921
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
module Tests.Regress.Issue263 ( testTree ) where
2+
3+
import Data.Word
4+
import qualified Data.ByteString.Lazy as LBS
5+
import Codec.CBOR.Read
6+
import Codec.CBOR.Term (Term(..), decodeTerm)
7+
import Test.Tasty
8+
import Test.Tasty.HUnit
9+
import qualified Tests.Reference.Implementation as Reference
10+
11+
mkRepr :: Integer -> [Word8]
12+
mkRepr int =
13+
[ -- Tag(2), 0xc2 — positive bigint, 0xc3 — negative bigint
14+
if int>=0 then 0xc2 else 0xc3
15+
-- Indefinite-length byte string
16+
, 0x5f
17+
-- Bytes
18+
] ++ (let b = if int >0
19+
then Reference.integerToBytes int
20+
else Reference.integerToBytes (-(int+1))
21+
l = Reference.lengthUInt b
22+
in Reference.encodeToken (Reference.MT2_ByteString l b)) ++
23+
[ 0xff ]
24+
25+
shouldDecode :: Integer -> IO ()
26+
shouldDecode int =
27+
case deserialiseFromBytes decodeTerm (LBS.pack (mkRepr int)) of
28+
Left err -> fail ("Deserialisation failed for " ++ (show (mkRepr int)) ++ ": " ++ show err)
29+
Right (b,x)
30+
| LBS.null b -> (TInteger int) @=? x
31+
| otherwise -> fail "Trailing bytes"
32+
33+
34+
testTree :: TestTree
35+
testTree =
36+
testGroup "Issue 263 - bigint with indefinite length"
37+
[ testCase "small bigint" $ shouldDecode 1231
38+
, testCase "big bigint" $ shouldDecode 123123123123123123123123123
39+
, testCase "small negative bigint" $ shouldDecode (-123)
40+
, testCase "big negative bigint" $ shouldDecode (-12312312311231231231231234)
41+
]

0 commit comments

Comments
 (0)