Skip to content

Support encoding indefinite containers #256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions cbor2/_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ class CBOREncoder:
"string_referencing",
"string_namespacing",
"_string_references",
"indefinite_containers",
)

_fp: IO[bytes]
Expand All @@ -138,6 +139,7 @@ def __init__(
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
indefinite_containers: bool = False,
):
"""
:param fp:
Expand Down Expand Up @@ -168,6 +170,8 @@ def __init__(
:param string_referencing:
set to ``True`` to allow more efficient serializing of repeated string
values
:param indefinite_containers:
encode containers as indefinite (use stop code instead of specifying length)

"""
self.fp = fp
Expand All @@ -177,6 +181,7 @@ def __init__(
self.value_sharing = value_sharing
self.string_referencing = string_referencing
self.string_namespacing = string_referencing
self.indefinite_containers = indefinite_containers
self.default = default
self._canonical = canonical
self._shared_containers: dict[
Expand Down Expand Up @@ -395,9 +400,11 @@ def _stringref(self, value: str | bytes) -> bool:

return False

def encode_length(self, major_tag: int, length: int) -> None:
def encode_length(self, major_tag: int, length: int | None) -> None:
major_tag <<= 5
if length < 24:
if length is None: # Indefinite
self._fp_write(struct.pack(">B", major_tag | 31))
elif length < 24:
self._fp_write(struct.pack(">B", major_tag | length))
elif length < 256:
self._fp_write(struct.pack(">BB", major_tag | 24, length))
Expand All @@ -408,6 +415,10 @@ def encode_length(self, major_tag: int, length: int) -> None:
else:
self._fp_write(struct.pack(">BQ", major_tag | 27, length))

def encode_break(self) -> None:
# Break stop code for indefinite containers
self._fp_write(struct.pack(">B", (7 << 5) | 31))

def encode_int(self, value: int) -> None:
# Big integers (2 ** 64 and over)
if value >= 18446744073709551616 or value < -18446744073709551616:
Expand Down Expand Up @@ -446,17 +457,23 @@ def encode_string(self, value: str) -> None:

@container_encoder
def encode_array(self, value: Sequence[Any]) -> None:
self.encode_length(4, len(value))
self.encode_length(4, len(value) if not self.indefinite_containers else None)
for item in value:
self.encode(item)

if self.indefinite_containers:
self.encode_break()

@container_encoder
def encode_map(self, value: Mapping[Any, Any]) -> None:
self.encode_length(5, len(value))
self.encode_length(5, len(value) if not self.indefinite_containers else None)
for key, val in value.items():
self.encode(key)
self.encode(val)

if self.indefinite_containers:
self.encode_break()

def encode_sortable_key(self, value: Any) -> tuple[int, bytes]:
"""
Takes a key and calculates the length of its optimal byte
Expand All @@ -471,7 +488,7 @@ def encode_sortable_key(self, value: Any) -> tuple[int, bytes]:
def encode_canonical_map(self, value: Mapping[Any, Any]) -> None:
"""Reorder keys according to Canonical CBOR specification"""
keyed_keys = ((self.encode_sortable_key(key), key, value) for key, value in value.items())
self.encode_length(5, len(value))
self.encode_length(5, len(value) if not self.indefinite_containers else None)
for sortkey, realkey, value in sorted(keyed_keys):
if self.string_referencing:
# String referencing requires that the order encoded is
Expand All @@ -482,6 +499,9 @@ def encode_canonical_map(self, value: Mapping[Any, Any]) -> None:
self._fp_write(sortkey[1])
self.encode(value)

if self.indefinite_containers:
self.encode_break()

def encode_semantic(self, value: CBORTag) -> None:
# Nested string reference domains are distinct
old_string_referencing = self.string_referencing
Expand Down Expand Up @@ -699,6 +719,7 @@ def dumps(
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
indefinite_containers: bool = False,
) -> bytes:
"""
Serialize an object to a bytestring.
Expand Down Expand Up @@ -730,6 +751,8 @@ def dumps(
the default behavior in previous releases (cbor2 <= 4.1.2).
:param string_referencing:
set to ``True`` to allow more efficient serializing of repeated string values
:param indefinite_containers:
encode containers as indefinite (use stop code instead of specifying length)
:return: the serialized output

"""
Expand All @@ -743,6 +766,7 @@ def dumps(
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
indefinite_containers=indefinite_containers,
).encode(obj)
return fp.getvalue()

Expand All @@ -757,6 +781,7 @@ def dump(
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
indefinite_containers: bool = False,
) -> None:
"""
Serialize an object to a file.
Expand Down Expand Up @@ -788,6 +813,8 @@ def dump(
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which was
the default behavior in previous releases (cbor2 <= 4.1.2).
:param indefinite_containers:
encode containers as indefinite (use stop code instead of specifying length)
:param string_referencing:
set to ``True`` to allow more efficient serializing of repeated string values

Expand All @@ -801,4 +828,5 @@ def dump(
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
indefinite_containers=indefinite_containers,
).encode(obj)
1 change: 1 addition & 0 deletions docs/versionhistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ This library adheres to `Semantic Versioning <https://semver.org/>`_.

- Dropped support for Python 3.8
(#247 <https://github.com/agronholm/cbor2/pull/247>_; PR by @hugovk)
- Added support for encoding indefinite containers (PR by @CZDanol)

**5.6.5** (2024-10-09)

Expand Down
107 changes: 92 additions & 15 deletions source/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
self->shared_handler = NULL;
self->string_referencing = false;
self->string_namespacing = false;
self->indefinite_containers = false;
}
return (PyObject *) self;
}
Expand All @@ -126,16 +127,16 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
{
static char *keywords[] = {
"fp", "datetime_as_timestamp", "timezone", "value_sharing", "default",
"canonical", "date_as_datetime", "string_referencing", NULL
"canonical", "date_as_datetime", "string_referencing", "indefinite_containers", NULL
};
PyObject *tmp, *fp = NULL, *default_handler = NULL, *tz = NULL;
int value_sharing = 0, timestamp_format = 0, enc_style = 0,
date_as_datetime = 0, string_referencing = 0;
date_as_datetime = 0, string_referencing = 0, indefinite_containers = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOppp", keywords,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOpppp", keywords,
&fp, &timestamp_format, &tz, &value_sharing,
&default_handler, &enc_style, &date_as_datetime,
&string_referencing))
&string_referencing, &indefinite_containers))
return -1;
// Predicate values are returned as ints, but need to be stored as bool or ubyte
if (timestamp_format == 1)
Expand All @@ -150,6 +151,8 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
self->string_referencing = true;
self->string_namespacing = true;
}
if (indefinite_containers == 1)
self->indefinite_containers = true;


if (_CBOREncoder_set_fp(self, fp, NULL) == -1)
Expand Down Expand Up @@ -345,17 +348,19 @@ CBOREncoder_write(CBOREncoderObject *self, PyObject *data)
Py_RETURN_NONE;
}


static int
encode_length(CBOREncoderObject *self, const uint8_t major_tag,
const uint64_t length)
encode_length_possibly_indefinite(CBOREncoderObject *self, const uint8_t major_tag,
const uint64_t length, const bool indefinite)
{
LeadByte *lead;
char buf[sizeof(LeadByte) + sizeof(uint64_t)];

lead = (LeadByte*)buf;
lead->major = major_tag;
if (length < 24) {
if (indefinite) {
lead->subtype = 31;
return fp_write(self, buf, 1);
} else if (length < 24) {
lead->subtype = (uint8_t) length;
return fp_write(self, buf, 1);
} else if (length <= UCHAR_MAX) {
Expand All @@ -377,18 +382,75 @@ encode_length(CBOREncoderObject *self, const uint8_t major_tag,
}
}

static int
encode_length(CBOREncoderObject *self, const uint8_t major_tag,
const uint64_t length) {
return encode_length_possibly_indefinite(self, major_tag, length, false);
}

typedef struct {
uint64_t value;
bool is_none;
} UInt64OrNone;

static int uint64_or_none(PyObject *obj, void *param) {
if (obj == Py_None) {
const UInt64OrNone result = {
.value = 0,
.is_none = true,
};
*((UInt64OrNone*)param) = result;
return 1;

} else if (PyLong_Check(obj)) {
const uint64_t val = PyLong_AsUnsignedLong(obj);
if (PyErr_Occurred()) {
return 0;
}

const UInt64OrNone result = {
.value = val,
.is_none = false,
};
*((UInt64OrNone*)param) = result;
return 1;

} else {
PyErr_SetString(PyExc_TypeError, "must be int or None");
return 0;
}
}

// CBOREncoder.encode_length(self, major_tag, length)
static PyObject *
CBOREncoder_encode_length(CBOREncoderObject *self, PyObject *args)
{
uint8_t major_tag;
uint64_t length;
UInt64OrNone length;

if (!PyArg_ParseTuple(args, "BK", &major_tag, &length))
if (!PyArg_ParseTuple(args, "BO&", &major_tag, &uint64_or_none, &length))
return NULL;
if (encode_length_possibly_indefinite(self, major_tag, length.value, length.is_none) == -1)
return NULL;
if (encode_length(self, major_tag, length) == -1)
Py_RETURN_NONE;
}

static int
encode_break(CBOREncoderObject *self)
{
LeadByte lead;
lead.major = 7;
lead.subtype = 31;
return fp_write(self, (const char*) &lead, 1);
}

// CBOREncoder.encode_break(self)
static PyObject *
CBOREncoder_encode_break(CBOREncoderObject *self)
{
if (encode_break(self) == -1) {
return NULL;
}
Py_RETURN_NONE;
}

Expand Down Expand Up @@ -761,7 +823,7 @@ encode_array(CBOREncoderObject *self, PyObject *value)
if (fast) {
length = PySequence_Fast_GET_SIZE(fast);
items = PySequence_Fast_ITEMS(fast);
if (encode_length(self, 4, length) == 0) {
if (encode_length_possibly_indefinite(self, 4, length, self->indefinite_containers) == 0) {
while (length) {
ret = CBOREncoder_encode(self, *items);
if (ret)
Expand All @@ -774,6 +836,9 @@ encode_array(CBOREncoderObject *self, PyObject *value)
Py_INCREF(Py_None);
ret = Py_None;
}
if (self->indefinite_containers && encode_break(self) == -1) {
goto error;
}
error:
Py_DECREF(fast);
}
Expand All @@ -796,7 +861,7 @@ encode_dict(CBOREncoderObject *self, PyObject *value)
PyObject *key, *val, *ret;
Py_ssize_t pos = 0;

if (encode_length(self, 5, PyDict_Size(value)) == 0) {
if (encode_length_possibly_indefinite(self, 5, PyDict_Size(value), self->indefinite_containers) == 0) {
while (PyDict_Next(value, &pos, &key, &val)) {
Py_INCREF(key);
ret = CBOREncoder_encode(self, key);
Expand All @@ -813,7 +878,11 @@ encode_dict(CBOREncoderObject *self, PyObject *value)
else
return NULL;
}
if (self->indefinite_containers && encode_break(self) == -1) {
return NULL;
}
}

Py_RETURN_NONE;
}

Expand All @@ -830,7 +899,7 @@ encode_mapping(CBOREncoderObject *self, PyObject *value)
if (fast) {
length = PySequence_Fast_GET_SIZE(fast);
items = PySequence_Fast_ITEMS(fast);
if (encode_length(self, 5, length) == 0) {
if (encode_length_possibly_indefinite(self, 5, length, self->indefinite_containers) == 0) {
while (length) {
ret = CBOREncoder_encode(self, PyTuple_GET_ITEM(*items, 0));
if (ret)
Expand All @@ -845,6 +914,9 @@ encode_mapping(CBOREncoderObject *self, PyObject *value)
items++;
length--;
}
if (self->indefinite_containers && encode_break(self) == -1) {
goto error;
}
ret = Py_None;
Py_INCREF(ret);
}
Expand Down Expand Up @@ -1728,7 +1800,7 @@ encode_canonical_map_list(CBOREncoderObject *self, PyObject *list)

if (PyList_Sort(list) == -1)
return NULL;
if (encode_length(self, 5, PyList_GET_SIZE(list)) == -1)
if (encode_length_possibly_indefinite(self, 5, PyList_GET_SIZE(list), self->indefinite_containers) == -1)
return NULL;
for (index = 0; index < PyList_GET_SIZE(list); ++index) {
// If we are encoding string references, the order of the keys
Expand All @@ -1753,6 +1825,9 @@ encode_canonical_map_list(CBOREncoderObject *self, PyObject *list)
else
return NULL;
}
if (self->indefinite_containers && encode_break(self) == -1) {
return NULL;
}
Py_RETURN_NONE;
}

Expand Down Expand Up @@ -2114,6 +2189,8 @@ static PyMethodDef CBOREncoder_methods[] = {
{"encode_length", (PyCFunction) CBOREncoder_encode_length, METH_VARARGS,
"encode the specified *major_tag* with the specified *length* to "
"the output"},
{"encode_break", (PyCFunction) CBOREncoder_encode_break, METH_NOARGS,
"encode break stop code for indefinite containers"},
{"encode_int", (PyCFunction) CBOREncoder_encode_int, METH_O,
"encode the specified integer *value* to the output"},
{"encode_float", (PyCFunction) CBOREncoder_encode_float, METH_O,
Expand Down
1 change: 1 addition & 0 deletions source/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct {
bool value_sharing;
bool string_referencing;
bool string_namespacing;
bool indefinite_containers;
} CBOREncoderObject;

extern PyTypeObject CBOREncoderType;
Expand Down
Loading
Loading