Skip to content

Commit b45c9e5

Browse files
committed
Merge branch 'int_from_bytes' into OWL_master_over_v1.26.1
2 parents a45ee04 + 2579a1a commit b45c9e5

File tree

8 files changed

+306
-39
lines changed

8 files changed

+306
-39
lines changed

py/mpz.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne
850850
return cur - str;
851851
}
852852

853-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) {
853+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool is_signed, size_t len, const byte *buf) {
854854
int delta = 1;
855855
if (big_endian) {
856856
buf += len - 1;
@@ -862,6 +862,9 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf)
862862
mpz_dig_t d = 0;
863863
int num_bits = 0;
864864
z->neg = 0;
865+
if ((is_signed) && (buf[len - 1] & 0x80)) {
866+
z->neg = 1;
867+
}
865868
z->len = 0;
866869
while (len) {
867870
while (len && num_bits < DIG_SIZE) {
@@ -879,7 +882,14 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf)
879882
#endif
880883
num_bits -= DIG_SIZE;
881884
}
882-
885+
if (z->neg) {
886+
// sign extend
887+
while (num_bits < DIG_SIZE) {
888+
d |= DIG_MSB << num_bits;
889+
num_bits += DIG_SIZE;
890+
}
891+
z->dig[z->len++] = d & DIG_MASK;
892+
}
883893
z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len);
884894
}
885895

py/mpz.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
114114
void mpz_set_from_float(mpz_t *z, mp_float_t src);
115115
#endif
116116
size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base);
117-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf);
117+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool is_signed, size_t len, const byte *buf);
118118

119119
static inline bool mpz_is_zero(const mpz_t *z) {
120120
return z->len == 0;

py/objint.c

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -394,37 +394,86 @@ mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp
394394
return MP_OBJ_NULL; // op not supported
395395
}
396396

397-
// this is a classmethod
398-
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
399-
// TODO: Support signed param (assumes signed=False at the moment)
400-
401-
// get the buffer info
402-
mp_buffer_info_t bufinfo;
403-
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
397+
void *reverce_memcpy(void *dest, const void *src, size_t len) {
398+
char *d = (char *)dest + len - 1;
399+
const char *s = src;
400+
while (len--) {
401+
*d-- = *s++;
402+
}
403+
return dest;
404+
}
404405

405-
const byte *buf = (const byte *)bufinfo.buf;
406-
int delta = 1;
407-
bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
408-
if (!big_endian) {
409-
buf += bufinfo.len - 1;
410-
delta = -1;
406+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
407+
if (len > sizeof(mp_int_t)) {
408+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
409+
// Result will overflow a small-int size so construct a big-int
410+
return mp_obj_int_from_bytes_impl(big_endian, is_signed, len, buf);
411+
#else
412+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
413+
#endif
414+
}
415+
union {
416+
mp_int_t value;
417+
mp_uint_t uvalue;
418+
byte buf[sizeof(mp_int_t)];
419+
} result = {0};
420+
// #if sizeof(mp_int_t) != sizeof(mp_uint_t)
421+
// #error "sizeof(mp_int_t) != sizeof(mp_uint_t)"
422+
// #endif
423+
424+
if (big_endian) {
425+
reverce_memcpy(&result, buf, len);
426+
} else { // little-endian
427+
memcpy(&result, buf, len);
411428
}
412429

413-
mp_uint_t value = 0;
414-
size_t len = bufinfo.len;
415-
for (; len--; buf += delta) {
430+
if ((is_signed) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
431+
// Sign propagation in little-endian
432+
// x = 2
433+
// x.to_bytes(1, 'little', True) -> b'\x02'
434+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
435+
// x = -2
436+
// x.to_bytes(1, 'little', True) -> b'\xFE'
437+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
438+
memset(result.buf + len, 0xFF, sizeof(result) - len);
439+
}
440+
if (((!is_signed) && (result.uvalue > MP_SMALL_INT_MAX)) || (is_signed && ((result.value < MP_SMALL_INT_MIN) || (result.value > MP_SMALL_INT_MAX)))) {
441+
// Result will overflow a small-int so construct a big-int
416442
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
417-
if (value > (MP_SMALL_INT_MAX >> 8)) {
418-
// Result will overflow a small-int so construct a big-int
419-
return mp_obj_int_from_bytes_impl(big_endian, bufinfo.len, bufinfo.buf);
420-
}
443+
return mp_obj_int_from_bytes_impl(big_endian, is_signed, len, buf);
444+
#else
445+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
421446
#endif
422-
value = (value << 8) | *buf;
423447
}
424-
return mp_obj_new_int_from_uint(value);
448+
return mp_obj_new_int(result.value);
425449
}
426450

427-
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes);
451+
// this is a classmethod
452+
// result = int.from_bytes(bytearray(), [[length=,] byteorder='big',] signed=False)
453+
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
454+
enum { ARG_length, ARG_byteorder, ARG_signed };
455+
static const mp_arg_t allowed_args[] = {
456+
{ MP_QSTR_length, MP_ARG_INT, { .u_int = 0 } },
457+
{ MP_QSTR_byteorder, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_QSTR(MP_QSTR_big) } },
458+
{ MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} },
459+
};
460+
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
461+
mp_arg_parse_all(n_args - 2, pos_args + 2, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
462+
463+
// get the buffer info
464+
mp_buffer_info_t bufinfo;
465+
mp_get_buffer_raise(pos_args[1], &bufinfo, MP_BUFFER_READ);
466+
467+
size_t len = args[ARG_length].u_int;
468+
bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
469+
bool is_signed = args[ARG_signed].u_bool;
470+
471+
if ((len <= 0) || (len > bufinfo.len)) {
472+
len = bufinfo.len;
473+
}
474+
return mp_obj_integer_from_bytes_impl(big_endian, is_signed, len, bufinfo.buf);
475+
}
476+
static MP_DEFINE_CONST_FUN_OBJ_KW(int_from_bytes_fun_obj, 2, int_from_bytes);
428477
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
429478

430479
static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {

py/objint.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
5454
char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
5555
int base, const char *prefix, char base_char, char comma);
5656
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
57-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
57+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf);
58+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf);
5859
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
5960
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
6061
int mp_obj_int_sign(mp_obj_t self_in);
6162
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6263
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6364
mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6465
mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent, mp_obj_t modulus);
66+
void *reverce_memcpy(void *dest, const void *src, size_t len);
6567

6668
#endif // MICROPY_INCLUDED_PY_OBJINT_H

py/objint_longlong.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,32 @@ static void raise_long_long_overflow(void) {
4848
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage"));
4949
}
5050

51-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
52-
int delta = 1;
53-
if (!big_endian) {
54-
buf += len - 1;
55-
delta = -1;
51+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
52+
if (len > sizeof(mp_longint_impl_t)) {
53+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("big-int overflow"));
5654
}
55+
union {
56+
mp_longint_impl_t value;
57+
byte buf[sizeof(mp_longint_impl_t)];
58+
} result = {0};
5759

58-
mp_longint_impl_t value = 0;
59-
for (; len--; buf += delta) {
60-
value = (value << 8) | *buf;
60+
if (big_endian) {
61+
reverce_memcpy(&result, buf, len);
62+
} else { // little-endian
63+
memcpy(&result, buf, len);
6164
}
62-
return mp_obj_new_int_from_ll(value);
65+
66+
if ((is_signed) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
67+
// Sign propagation in little-endian
68+
// x = 2
69+
// x.to_bytes(1, 'little', True) -> b'\x02'
70+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
71+
// x = -2
72+
// x.to_bytes(1, 'little', True) -> b'\xFE'
73+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
74+
memset(result.buf + len, 0xFF, sizeof(result) - len);
75+
}
76+
return mp_obj_new_int_from_ll(result.value);
6377
}
6478

6579
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {

py/objint_mpz.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
106106
return str;
107107
}
108108

109-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
109+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
110110
mp_obj_int_t *o = mp_obj_int_new_mpz();
111-
mpz_set_from_bytes(&o->mpz, big_endian, len, buf);
111+
mpz_set_from_bytes(&o->mpz, big_endian, is_signed, len, buf);
112112
return MP_OBJ_FROM_PTR(o);
113113
}
114114

tests/basics/int_bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# check that extra zero bytes don't change the internal int value
1111
print(int.from_bytes(bytes(20), "little") == 0)
12-
print(int.from_bytes(b"\x01" + bytes(20), "little") == 1)
12+
print(int.from_bytes(b"\x01" + bytes(7), "little") == 1)
1313

1414
# big-endian conversion
1515
print((10).to_bytes(1, "big"))

0 commit comments

Comments
 (0)