-
Notifications
You must be signed in to change notification settings - Fork 63
Support encoding indefinite containers #256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b8b0d89
e9c75ea
dd32d18
4051948
bd4c7c1
b875dcb
0604190
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,6 +113,7 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) | |
self->shared_handler = NULL; | ||
self->string_referencing = false; | ||
self->string_namespacing = false; | ||
self->indefinite_containers = false; | ||
} | ||
return (PyObject *) self; | ||
} | ||
|
@@ -126,16 +127,16 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs) | |
{ | ||
static char *keywords[] = { | ||
"fp", "datetime_as_timestamp", "timezone", "value_sharing", "default", | ||
"canonical", "date_as_datetime", "string_referencing", NULL | ||
"canonical", "date_as_datetime", "string_referencing", "indefinite_containers", NULL | ||
}; | ||
PyObject *tmp, *fp = NULL, *default_handler = NULL, *tz = NULL; | ||
int value_sharing = 0, timestamp_format = 0, enc_style = 0, | ||
date_as_datetime = 0, string_referencing = 0; | ||
date_as_datetime = 0, string_referencing = 0, indefinite_containers = 0; | ||
|
||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOppp", keywords, | ||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOpppp", keywords, | ||
&fp, ×tamp_format, &tz, &value_sharing, | ||
&default_handler, &enc_style, &date_as_datetime, | ||
&string_referencing)) | ||
&string_referencing, &indefinite_containers)) | ||
return -1; | ||
// Predicate values are returned as ints, but need to be stored as bool or ubyte | ||
if (timestamp_format == 1) | ||
|
@@ -150,6 +151,8 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs) | |
self->string_referencing = true; | ||
self->string_namespacing = true; | ||
} | ||
if (indefinite_containers == 1) | ||
self->indefinite_containers = true; | ||
|
||
|
||
if (_CBOREncoder_set_fp(self, fp, NULL) == -1) | ||
|
@@ -345,17 +348,19 @@ CBOREncoder_write(CBOREncoderObject *self, PyObject *data) | |
Py_RETURN_NONE; | ||
} | ||
|
||
|
||
static int | ||
encode_length(CBOREncoderObject *self, const uint8_t major_tag, | ||
const uint64_t length) | ||
encode_length_possibly_indefinite(CBOREncoderObject *self, const uint8_t major_tag, | ||
const uint64_t length, const bool indefinite) | ||
{ | ||
LeadByte *lead; | ||
char buf[sizeof(LeadByte) + sizeof(uint64_t)]; | ||
|
||
lead = (LeadByte*)buf; | ||
lead->major = major_tag; | ||
if (length < 24) { | ||
if (indefinite) { | ||
lead->subtype = 31; | ||
return fp_write(self, buf, 1); | ||
} else if (length < 24) { | ||
lead->subtype = (uint8_t) length; | ||
return fp_write(self, buf, 1); | ||
} else if (length <= UCHAR_MAX) { | ||
|
@@ -377,21 +382,62 @@ encode_length(CBOREncoderObject *self, const uint8_t major_tag, | |
} | ||
} | ||
|
||
static int | ||
encode_length(CBOREncoderObject *self, const uint8_t major_tag, | ||
const uint64_t length) { | ||
return encode_length_possibly_indefinite(self, major_tag, length, false); | ||
} | ||
|
||
int uint64_or_none(PyObject *obj, void *param) { | ||
if (obj == Py_None) { | ||
return 1; | ||
} else if (PyLong_Check(obj)) { | ||
const uint64_t val = PyLong_AsUnsignedLong(obj); | ||
if (PyErr_Occurred()) { | ||
return 0; | ||
} | ||
|
||
*((uint64_t*)param) = val; | ||
return 1; | ||
} else { | ||
PyErr_SetString(PyExc_TypeError, "must be int or None"); | ||
return 0; | ||
} | ||
} | ||
|
||
// CBOREncoder.encode_length(self, major_tag, length) | ||
static PyObject * | ||
CBOREncoder_encode_length(CBOREncoderObject *self, PyObject *args) | ||
{ | ||
uint8_t major_tag; | ||
uint64_t length; | ||
uint64_t length = -1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, -1 becomes a very large integer. I did this because I was too lazy to set up a custom structure for the Should I remake it? |
||
|
||
if (!PyArg_ParseTuple(args, "BK", &major_tag, &length)) | ||
if (!PyArg_ParseTuple(args, "BO&", &major_tag, &uint64_or_none, &length)) | ||
return NULL; | ||
if (encode_length(self, major_tag, length) == -1) | ||
if (encode_length_possibly_indefinite(self, major_tag, length, length == -1) == -1) | ||
return NULL; | ||
Py_RETURN_NONE; | ||
} | ||
|
||
static int | ||
encode_break(CBOREncoderObject *self) | ||
{ | ||
LeadByte lead; | ||
lead.major = 7; | ||
lead.subtype = 31; | ||
return fp_write(self, (const char*) &lead, 1); | ||
} | ||
|
||
// CBOREncoder.encode_break(self) | ||
static PyObject * | ||
CBOREncoder_encode_break(CBOREncoderObject *self) | ||
{ | ||
if (encode_break(self) == -1) { | ||
return NULL; | ||
} | ||
Py_RETURN_NONE; | ||
} | ||
|
||
|
||
// Given a deferred type tuple (module-name, type-name), find the specified | ||
// module in sys.modules, get the specified type from within it and return it | ||
|
@@ -761,7 +807,7 @@ encode_array(CBOREncoderObject *self, PyObject *value) | |
if (fast) { | ||
length = PySequence_Fast_GET_SIZE(fast); | ||
items = PySequence_Fast_ITEMS(fast); | ||
if (encode_length(self, 4, length) == 0) { | ||
if (encode_length_possibly_indefinite(self, 4, length, self->indefinite_containers) == 0) { | ||
while (length) { | ||
ret = CBOREncoder_encode(self, *items); | ||
if (ret) | ||
|
@@ -774,6 +820,9 @@ encode_array(CBOREncoderObject *self, PyObject *value) | |
Py_INCREF(Py_None); | ||
ret = Py_None; | ||
} | ||
if (self->indefinite_containers && encode_break(self) == -1) { | ||
goto error; | ||
} | ||
error: | ||
Py_DECREF(fast); | ||
} | ||
|
@@ -796,7 +845,7 @@ encode_dict(CBOREncoderObject *self, PyObject *value) | |
PyObject *key, *val, *ret; | ||
Py_ssize_t pos = 0; | ||
|
||
if (encode_length(self, 5, PyDict_Size(value)) == 0) { | ||
if (encode_length_possibly_indefinite(self, 5, PyDict_Size(value), self->indefinite_containers) == 0) { | ||
while (PyDict_Next(value, &pos, &key, &val)) { | ||
Py_INCREF(key); | ||
ret = CBOREncoder_encode(self, key); | ||
|
@@ -813,7 +862,11 @@ encode_dict(CBOREncoderObject *self, PyObject *value) | |
else | ||
return NULL; | ||
} | ||
if (self->indefinite_containers && encode_break(self) == -1) { | ||
return NULL; | ||
} | ||
} | ||
|
||
Py_RETURN_NONE; | ||
} | ||
|
||
|
@@ -830,7 +883,7 @@ encode_mapping(CBOREncoderObject *self, PyObject *value) | |
if (fast) { | ||
length = PySequence_Fast_GET_SIZE(fast); | ||
items = PySequence_Fast_ITEMS(fast); | ||
if (encode_length(self, 5, length) == 0) { | ||
if (encode_length_possibly_indefinite(self, 5, length, self->indefinite_containers) == 0) { | ||
while (length) { | ||
ret = CBOREncoder_encode(self, PyTuple_GET_ITEM(*items, 0)); | ||
if (ret) | ||
|
@@ -845,6 +898,9 @@ encode_mapping(CBOREncoderObject *self, PyObject *value) | |
items++; | ||
length--; | ||
} | ||
if (self->indefinite_containers && encode_break(self) == -1) { | ||
goto error; | ||
} | ||
ret = Py_None; | ||
Py_INCREF(ret); | ||
} | ||
|
@@ -1728,7 +1784,7 @@ encode_canonical_map_list(CBOREncoderObject *self, PyObject *list) | |
|
||
if (PyList_Sort(list) == -1) | ||
return NULL; | ||
if (encode_length(self, 5, PyList_GET_SIZE(list)) == -1) | ||
if (encode_length_possibly_indefinite(self, 5, PyList_GET_SIZE(list), self->indefinite_containers) == -1) | ||
return NULL; | ||
for (index = 0; index < PyList_GET_SIZE(list); ++index) { | ||
// If we are encoding string references, the order of the keys | ||
|
@@ -1753,6 +1809,9 @@ encode_canonical_map_list(CBOREncoderObject *self, PyObject *list) | |
else | ||
return NULL; | ||
} | ||
if (self->indefinite_containers && encode_break(self) == -1) { | ||
return NULL; | ||
} | ||
Py_RETURN_NONE; | ||
} | ||
|
||
|
@@ -2114,6 +2173,8 @@ static PyMethodDef CBOREncoder_methods[] = { | |
{"encode_length", (PyCFunction) CBOREncoder_encode_length, METH_VARARGS, | ||
"encode the specified *major_tag* with the specified *length* to " | ||
"the output"}, | ||
{"encode_break", (PyCFunction) CBOREncoder_encode_break, METH_NOARGS, | ||
"encode break stop code for indefinite containers"}, | ||
{"encode_int", (PyCFunction) CBOREncoder_encode_int, METH_O, | ||
"encode the specified integer *value* to the output"}, | ||
{"encode_float", (PyCFunction) CBOREncoder_encode_float, METH_O, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain why you had to split this function into two parts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
encode_length_possibly_indefinite
. Most of the use cases don't care about indefinite encoding, so it didn't make sense to drag this function there - so we have two now.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took a better look at this, and it doesn't make a whole lot of sense.
uint64_t
cannot possibly be -1 since it's unsigned. The actual value would then become a very large integer (18446744073709551615) instead, yes? While I doubt anyone will really try to actually encode such enormous structures, it does raise other questions, like if you felt this was an acceptable sentinel value, why then did you have to splitencode_length()
instead of just using -1 as the sentinel for indefinite length? And why can'tencode_length()
just directly look atself->indefinite_containers
and if it's true, ignore the length parameter?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
encode_length cannot directly use indefinite_containers, because it is also used on other places outside of encoding array/map length.