From 5541688fffb8c26aef317a4cfdae88c99da04d20 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Sun, 29 Dec 2024 18:39:34 -0500 Subject: [PATCH 01/11] WIP: bigint, try number two --- compiler.py | 8 ++- compiler_tests.py | 14 ++++ runtime.c | 158 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 170 insertions(+), 10 deletions(-) diff --git a/compiler.py b/compiler.py index 81a9be32..08b0248c 100644 --- a/compiler.py +++ b/compiler.py @@ -329,7 +329,12 @@ def _emit_const(self, exp: Object) -> str: return "hole()" if isinstance(exp, Int): # TODO(max): Bignum - return f"_mksmallint({exp.value})" + # TODO(max): assert not too big. but what we should do is + # mknum_fromstring("") or literally encode the heap object as a + # constant with digits + if exp.value > 0x3fffffffffffffff or exp.value < -0x4000000000000000: + raise NotImplementedError("too big :(") + return f"_mksmallint({exp.value}ULL)" if isinstance(exp, List): items = [self._emit_const(item) for item in exp.items] result = "empty_list()" @@ -496,7 +501,6 @@ def compile_to_string(program: Object, debug: bool) -> str: dirname = os.path.dirname(__file__) with open(os.path.join(dirname, "runtime.c"), "r") as runtime: print(runtime.read(), file=f) - print("#define OBJECT_HANDLE(name, exp) GC_HANDLE(struct object*, name, exp)", file=f) if compiler.record_keys: print("const char* record_keys[] = {", file=f) for key in compiler.record_keys: diff --git a/compiler_tests.py b/compiler_tests.py index 76521cc8..b4cbcff5 100644 --- a/compiler_tests.py +++ b/compiler_tests.py @@ -42,6 +42,20 @@ def _run(self, code: str) -> str: def test_int(self) -> None: self.assertEqual(self._run("1"), "1\n") + def test_int_small_int_max(self) -> None: + self.assertEqual(self._run("4611686018427387903"), "4611686018427387903\n") + + def test_int_small_int_too_big(self) -> None: + with self.assertRaisesRegex(NotImplementedError, "too big"): + self._run("4611686018427387904") + + def test_int_small_int_min(self) -> None: + self.assertEqual(self._run("-4611686018427387904"), "-4611686018427387904\n") + + def test_int_small_int_too_small(self) -> None: + with self.assertRaisesRegex(NotImplementedError, "too big"): + self._run("-4611686018427387905") + def test_small_string(self) -> None: self.assertEqual(self._run('"hello"'), '"hello"\n') diff --git a/runtime.c b/runtime.c index 7b471b96..9ec88124 100644 --- a/runtime.c +++ b/runtime.c @@ -17,6 +17,11 @@ const int kPointerSize = sizeof(void*); typedef intptr_t word; typedef uintptr_t uword; typedef unsigned char byte; +typedef uint64_t large_int_digit; +const int kLargeintDigitSize = sizeof(large_int_digit); +const word kMinWord = INTPTR_MIN; +const word kMaxWord = INTPTR_MAX; +const uword kMaxUword = UINTPTR_MAX; // Garbage collector core by Andy Wingo . @@ -360,7 +365,8 @@ static ALWAYS_INLINE ALLOCATOR struct object* allocate(struct gc_heap* heap, TAG(TAG_CLOSURE) \ TAG(TAG_RECORD) \ TAG(TAG_STRING) \ - TAG(TAG_VARIANT) + TAG(TAG_VARIANT) \ + TAG(TAG_LARGEINT) enum { // All odd becase of the kNotForwardedBit @@ -411,6 +417,11 @@ struct variant { struct object* value; } HEAP_ALIGNED; +struct large_int { + struct gc_obj HEAD; + large_int_digit digits[]; +}; // Not HEAP_ALIGNED; digits is variable size + size_t heap_object_size(struct gc_obj* obj) { size_t result = obj->tag >> kBitsPerByte; assert(is_size_aligned(result)); @@ -458,12 +469,79 @@ struct object* mksmallint(word value) { return _mksmallint(value); } +static ALWAYS_INLINE bool is_large_int(struct object* obj) { + return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_LARGEINT); +} + +static ALWAYS_INLINE struct large_int* as_large_int(struct object* obj) { + assert(is_large_int(obj)); + return (struct large_int*)as_heap_object(obj); +} + +uword large_int_num_digits(struct object* obj) { + assert(is_large_int(obj)); + size_t size = heap_object_size(as_heap_object(obj)) - sizeof(struct gc_obj); + return size / kLargeintDigitSize; +} + +uword num_digits(struct object* obj) { + if (is_small_int(obj)) { + return 1; + } + assert(is_large_int(obj)); + return large_int_num_digits(obj); +} + +large_int_digit large_int_digit_at(struct object* obj, uword index) { + assert(is_large_int(obj)); + assert(index < large_int_num_digits(obj)); + return as_large_int(obj)->digits[index]; +} + +void large_int_digit_at_put(struct object* obj, uword index, large_int_digit digit) { + assert(is_large_int(obj)); + assert(index < large_int_num_digits(obj)); + as_large_int(obj)->digits[index] = digit; +} + +word small_int_value(struct object* obj) { + assert(is_small_int(obj)); + return ((word)obj) >> kSmallIntTagBits; // sign extend +} + +uword digit_at(struct object* obj, uword index) { + if (is_small_int(obj)) { + assert(index == 0); + return small_int_value(obj); + } + assert(is_large_int(obj)); + return large_int_digit_at(obj, index); +} + +struct object* _mklarge_int_uninit_private(struct gc_heap* heap, uword num_digits) { + uword digits_size = num_digits * kLargeintDigitSize; + uword size = align_size(sizeof(struct large_int) + digits_size); + return allocate(heap, TAG_LARGEINT, size); +} + +struct object* _mklarge_int(struct gc_heap* heap, uword num_digits, + large_int_digit* digits) { + struct object* result = _mklarge_int_uninit_private(heap, num_digits); + uword digits_size = num_digits * kLargeintDigitSize; + memcpy(as_large_int(result)->digits, digits, digits_size); + return result; +} + struct object* mknum(struct gc_heap* heap, word value) { - (void)heap; - return mksmallint(value); + if (smallint_is_valid(value)) { + return _mksmallint(value); + } + assert(sizeof(word) == sizeof(large_int_digit)); + large_int_digit digits[] = {value}; + return _mklarge_int(heap, 1, digits); } -bool is_num(struct object* obj) { return is_small_int(obj); } +bool is_num(struct object* obj) { return is_small_int(obj) || is_large_int(obj); } bool is_num_equal_word(struct object* obj, word value) { assert(smallint_is_valid(value)); @@ -472,7 +550,12 @@ bool is_num_equal_word(struct object* obj, word value) { word num_value(struct object* obj) { assert(is_num(obj)); - return ((word)obj) >> 1; // sign extend + if (is_small_int(obj)) { + return small_int_value(obj); + } + assert(is_large_int(obj)); + assert(large_int_num_digits(obj) == 1); + return large_int_digit_at(obj, 0); } bool is_list(struct object* obj) { @@ -688,6 +771,7 @@ void pop_handles(void* local_handles) { #define GC_HANDLE(type, name, val) \ type name = val; \ GC_PROTECT(name) +#define OBJECT_HANDLE(name, exp) GC_HANDLE(struct object*, name, exp) void trace_roots(struct gc_heap* heap, VisitFn visit) { for (struct object*** h = handle_stack; h != handles; h++) { @@ -698,9 +782,67 @@ void trace_roots(struct gc_heap* heap, VisitFn visit) { struct gc_heap heap_object; struct gc_heap* heap = &heap_object; -struct object* num_add(struct object* a, struct object* b) { - // NB: doesn't use pointers after allocating - return mknum(heap, num_value(a) + num_value(b)); +static uword add_with_carry(uword x, uword y, uword carry_in, uword* carry_out) { + assert(carry_in <= 1 && "carry must be 0 or 1"); + uword sum; + uword carry0 = __builtin_add_overflow(x, y, &sum); + uword carry1 = __builtin_add_overflow(sum, carry_in, &sum); + *carry_out = carry0 | carry1; + return sum; +} + +struct object* normalize_large_int(struct gc_heap *, struct object *obj) { + return obj; +} + +bool small_int_is_negative(struct object* obj) { + return small_int_value(obj) < 0; +} + +bool large_int_is_negative(struct object* obj) { + return (word)large_int_digit_at(obj, large_int_num_digits(obj) - 1) < 0; +} + +bool is_negative(struct object* obj) { + if (is_small_int(obj)) { + return small_int_is_negative(obj); + } + return large_int_is_negative(obj); +} + +struct object* num_add(struct object* left, struct object* right) { + if (is_small_int(left) && is_small_int(right)) { + // Take a shortcut because we know the result fits in a word. + word result = num_value(left) + num_value(right); + return mknum(heap, result); + } + HANDLES(); + uword left_digits = num_digits(left); + uword right_digits = num_digits(right); + GC_PROTECT(left); + GC_PROTECT(right); + OBJECT_HANDLE(longer, left_digits > right_digits ? left : right); + OBJECT_HANDLE(shorter, left_digits > right_digits ? right : left); + uword shorter_digits = num_digits(shorter); + uword longer_digits = num_digits(longer); + uword result_digits = longer_digits + 1; + OBJECT_HANDLE(result, _mklarge_int_uninit_private(heap, result_digits)); + uword carry = 0; + for (uword i = 0; i < shorter_digits; i++) { + uword sum = add_with_carry( + digit_at(longer, i), digit_at(shorter, i), carry, &carry); + large_int_digit_at_put(result, i, sum); + } + uword shorter_sign_extension = is_negative(shorter) ? kMaxUword : 0; + for (uword i = shorter_digits; i < longer_digits; i++) { + uword sum = add_with_carry( + digit_at(longer, i), shorter_sign_extension, carry, &carry); + large_int_digit_at_put(result, i, sum); + } + uword longer_sign_extension = is_negative(longer) ? kMaxUword : 0; + uword high_digit = longer_sign_extension + shorter_sign_extension + carry; + large_int_digit_at_put(result, result_digits - 1, high_digit); + return normalize_large_int(heap, result); } struct object* num_sub(struct object* a, struct object* b) { From 135096eb818077474d5b91625b77ecbde28fa3b0 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 10:47:09 -0500 Subject: [PATCH 02/11] Add cases in trace/print --- runtime.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/runtime.c b/runtime.c index 9ec88124..20740e4e 100644 --- a/runtime.c +++ b/runtime.c @@ -446,6 +446,7 @@ size_t trace_heap_object(struct gc_obj* obj, struct gc_heap* heap, } break; case TAG_STRING: + case TAG_LARGEINT: break; case TAG_VARIANT: visit(&((struct variant*)obj)->value, heap); @@ -935,8 +936,19 @@ extern const char* record_keys[]; extern const char* variant_names[]; struct object* print(struct object* obj) { - if (is_num(obj)) { + if (is_small_int(obj)) { printf("%ld", num_value(obj)); + } else if (is_large_int(obj)) { + printf("largeint%d(", kLargeintDigitSize * kPointerSize); + uword num_digits = large_int_num_digits(obj); + for (uword i = 0; i < num_digits; i++) { + if (i > 0) { + fprintf(stdout, ", "); + } + fprintf(stdout, "%lx", large_int_digit_at(obj, num_digits - i - 1)); + } + printf(")"); + return obj; } else if (is_list(obj)) { putchar('['); while (!is_empty_list(obj)) { From d89aef571b501547d5e63e69d56c3532d4a56937 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 10:47:18 -0500 Subject: [PATCH 03/11] Run clang-format --- runtime.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/runtime.c b/runtime.c index 20740e4e..e651200b 100644 --- a/runtime.c +++ b/runtime.c @@ -499,7 +499,8 @@ large_int_digit large_int_digit_at(struct object* obj, uword index) { return as_large_int(obj)->digits[index]; } -void large_int_digit_at_put(struct object* obj, uword index, large_int_digit digit) { +void large_int_digit_at_put(struct object* obj, uword index, + large_int_digit digit) { assert(is_large_int(obj)); assert(index < large_int_num_digits(obj)); as_large_int(obj)->digits[index] = digit; @@ -519,14 +520,15 @@ uword digit_at(struct object* obj, uword index) { return large_int_digit_at(obj, index); } -struct object* _mklarge_int_uninit_private(struct gc_heap* heap, uword num_digits) { +struct object* _mklarge_int_uninit_private(struct gc_heap* heap, + uword num_digits) { uword digits_size = num_digits * kLargeintDigitSize; uword size = align_size(sizeof(struct large_int) + digits_size); return allocate(heap, TAG_LARGEINT, size); } struct object* _mklarge_int(struct gc_heap* heap, uword num_digits, - large_int_digit* digits) { + large_int_digit* digits) { struct object* result = _mklarge_int_uninit_private(heap, num_digits); uword digits_size = num_digits * kLargeintDigitSize; memcpy(as_large_int(result)->digits, digits, digits_size); @@ -542,7 +544,9 @@ struct object* mknum(struct gc_heap* heap, word value) { return _mklarge_int(heap, 1, digits); } -bool is_num(struct object* obj) { return is_small_int(obj) || is_large_int(obj); } +bool is_num(struct object* obj) { + return is_small_int(obj) || is_large_int(obj); +} bool is_num_equal_word(struct object* obj, word value) { assert(smallint_is_valid(value)); @@ -783,7 +787,8 @@ void trace_roots(struct gc_heap* heap, VisitFn visit) { struct gc_heap heap_object; struct gc_heap* heap = &heap_object; -static uword add_with_carry(uword x, uword y, uword carry_in, uword* carry_out) { +static uword add_with_carry(uword x, uword y, uword carry_in, + uword* carry_out) { assert(carry_in <= 1 && "carry must be 0 or 1"); uword sum; uword carry0 = __builtin_add_overflow(x, y, &sum); @@ -792,7 +797,7 @@ static uword add_with_carry(uword x, uword y, uword carry_in, uword* carry_out) return sum; } -struct object* normalize_large_int(struct gc_heap *, struct object *obj) { +struct object* normalize_large_int(struct gc_heap*, struct object* obj) { return obj; } @@ -830,14 +835,14 @@ struct object* num_add(struct object* left, struct object* right) { OBJECT_HANDLE(result, _mklarge_int_uninit_private(heap, result_digits)); uword carry = 0; for (uword i = 0; i < shorter_digits; i++) { - uword sum = add_with_carry( - digit_at(longer, i), digit_at(shorter, i), carry, &carry); + uword sum = add_with_carry(digit_at(longer, i), digit_at(shorter, i), carry, + &carry); large_int_digit_at_put(result, i, sum); } uword shorter_sign_extension = is_negative(shorter) ? kMaxUword : 0; for (uword i = shorter_digits; i < longer_digits; i++) { - uword sum = add_with_carry( - digit_at(longer, i), shorter_sign_extension, carry, &carry); + uword sum = add_with_carry(digit_at(longer, i), shorter_sign_extension, + carry, &carry); large_int_digit_at_put(result, i, sum); } uword longer_sign_extension = is_negative(longer) ? kMaxUword : 0; From 875fbea184b427484a12cec40eb9984400d0e1d0 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 10:49:13 -0500 Subject: [PATCH 04/11] . --- runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime.c b/runtime.c index e651200b..51660f9b 100644 --- a/runtime.c +++ b/runtime.c @@ -950,7 +950,7 @@ struct object* print(struct object* obj) { if (i > 0) { fprintf(stdout, ", "); } - fprintf(stdout, "%lx", large_int_digit_at(obj, num_digits - i - 1)); + fprintf(stdout, "0x%lx", large_int_digit_at(obj, num_digits - i - 1)); } printf(")"); return obj; From 0247109160f2bf6e656163f3d1cad1b0f7ad1b48 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 10:54:03 -0500 Subject: [PATCH 05/11] Normalize --- runtime.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/runtime.c b/runtime.c index 51660f9b..c4ff58d1 100644 --- a/runtime.c +++ b/runtime.c @@ -798,7 +798,30 @@ static uword add_with_carry(uword x, uword y, uword carry_in, } struct object* normalize_large_int(struct gc_heap*, struct object* obj) { - return obj; + word num_digits = large_int_num_digits(obj); + word shrink_to_digits = num_digits; + for (word digit = large_int_digit_at(obj, shrink_to_digits - 1), next_digit; + shrink_to_digits > 1; shrink_to_digits--, digit = next_digit) { + next_digit = large_int_digit_at(obj, shrink_to_digits - 2); + // break if we have neither a redundant sign-extension nor a redundnant + // zero-extension. + if ((digit != -1 || next_digit >= 0) && (digit != 0 || next_digit < 0)) { + break; + } + } + if (shrink_to_digits == 1 && smallint_is_valid(large_int_digit_at(obj, 0))) { + return mksmallint(large_int_digit_at(obj, 0)); + } + if (shrink_to_digits == num_digits) { + return obj; + } + HANDLES(); + GC_PROTECT(obj); + OBJECT_HANDLE(result, _mklarge_int_uninit_private(heap, shrink_to_digits)); + for (word i = 0; i < shrink_to_digits; i++) { + large_int_digit_at_put(result, i, large_int_digit_at(obj, i)); + } + return result; } bool small_int_is_negative(struct object* obj) { From cfff769516d5446b3bbeed945b32d8c2e0f6d02d Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 10:58:55 -0500 Subject: [PATCH 06/11] Add tests --- compiler_tests.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compiler_tests.py b/compiler_tests.py index b4cbcff5..115a6d4a 100644 --- a/compiler_tests.py +++ b/compiler_tests.py @@ -56,6 +56,14 @@ def test_int_small_int_too_small(self) -> None: with self.assertRaisesRegex(NotImplementedError, "too big"): self._run("-4611686018427387905") + def test_int_add_to_large_int(self) -> None: + self.assertEqual(self._run("4611686018427387903 + 1"), "largeint64(0x4000000000000000)\n") + + def test_int_add_to_large_int_two_digits(self) -> None: + program = "4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903" + self.assertEqual(hex(eval(program)), "0x1bffffffffffffff9") + self.assertEqual(self._run(program), "largeint64(0x1, 0xbffffffffffffff9)\n") + def test_small_string(self) -> None: self.assertEqual(self._run('"hello"'), '"hello"\n') From 1e72471165c3975759d10b29cd2e481f3d40061b Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 11:22:54 -0500 Subject: [PATCH 07/11] Support const positive large int --- compiler.py | 20 +++++++++++++++++--- compiler_tests.py | 10 +++++----- runtime.c | 3 ++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/compiler.py b/compiler.py index 08b0248c..5481713c 100644 --- a/compiler.py +++ b/compiler.py @@ -304,7 +304,11 @@ def _is_const(self, exp: Object) -> bool: return True return False + def _make_tag(self, tag: str, size_bytes: str) -> str: + return f"((({size_bytes}) << kBitsPerByte) | {tag})" + def _const_obj(self, type: str, tag: str, contents: str) -> str: + # TODO(max): Emulate make_tag here to encode size result = self.gensym(f"const_{type}") self.const_heap.append(f"CONST_HEAP struct {type} {result} = {{.HEAD.tag={tag}, {contents} }};") return f"ptrto({result})" @@ -332,9 +336,19 @@ def _emit_const(self, exp: Object) -> str: # TODO(max): assert not too big. but what we should do is # mknum_fromstring("") or literally encode the heap object as a # constant with digits - if exp.value > 0x3fffffffffffffff or exp.value < -0x4000000000000000: - raise NotImplementedError("too big :(") - return f"_mksmallint({exp.value}ULL)" + if -0x4000000000000000 <= exp.value <= 0x3fffffffffffffff: + return f"_mksmallint({exp.value}ULL)" + # Divide number into 64-bit digits + if exp.value < 0: + raise NotImplementedError(f"negative largeint64({exp.value})") + value = exp.value + digits = [] + while value: + digits.append(value & 0xffffffffffffffff) + value >>= 64 + tag = self._make_tag("TAG_LARGEINT", f"sizeof(struct large_int)+{len(digits)}ULL*kLargeintDigitSize") + parts = ", ".join(f"{digit}ULL" for digit in digits) + return self._const_obj("large_int", tag, f".digits={{ {parts} }}") if isinstance(exp, List): items = [self._emit_const(item) for item in exp.items] result = "empty_list()" diff --git a/compiler_tests.py b/compiler_tests.py index 115a6d4a..e111f123 100644 --- a/compiler_tests.py +++ b/compiler_tests.py @@ -45,25 +45,25 @@ def test_int(self) -> None: def test_int_small_int_max(self) -> None: self.assertEqual(self._run("4611686018427387903"), "4611686018427387903\n") - def test_int_small_int_too_big(self) -> None: - with self.assertRaisesRegex(NotImplementedError, "too big"): - self._run("4611686018427387904") - def test_int_small_int_min(self) -> None: self.assertEqual(self._run("-4611686018427387904"), "-4611686018427387904\n") def test_int_small_int_too_small(self) -> None: - with self.assertRaisesRegex(NotImplementedError, "too big"): + with self.assertRaisesRegex(NotImplementedError, "negative largeint"): self._run("-4611686018427387905") def test_int_add_to_large_int(self) -> None: self.assertEqual(self._run("4611686018427387903 + 1"), "largeint64(0x4000000000000000)\n") + self.assertEqual(self._run("4611686018427387904"), "largeint64(0x4000000000000000)\n") def test_int_add_to_large_int_two_digits(self) -> None: program = "4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903 + 4611686018427387903" self.assertEqual(hex(eval(program)), "0x1bffffffffffffff9") self.assertEqual(self._run(program), "largeint64(0x1, 0xbffffffffffffff9)\n") + def test_literal_positive_large_int(self) -> None: + self.assertEqual(self._run("340282366920938463463374607431768211456"), "largeint64(0x1, 0x0, 0x0)\n") + def test_small_string(self) -> None: self.assertEqual(self._run('"hello"'), '"hello"\n') diff --git a/runtime.c b/runtime.c index c4ff58d1..f0d3d1b5 100644 --- a/runtime.c +++ b/runtime.c @@ -424,7 +424,8 @@ struct large_int { size_t heap_object_size(struct gc_obj* obj) { size_t result = obj->tag >> kBitsPerByte; - assert(is_size_aligned(result)); + // Size need not be aligned if the object is in the constant heap. + assert(in_const_heap(obj) || is_size_aligned(result)); return result; } From 89f14f18afc924cd6fca22e26a4b29dfccb804e4 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 11:23:55 -0500 Subject: [PATCH 08/11] . --- compiler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler.py b/compiler.py index 5481713c..6371d6f8 100644 --- a/compiler.py +++ b/compiler.py @@ -332,14 +332,11 @@ def _emit_const(self, exp: Object) -> str: if isinstance(exp, Hole): return "hole()" if isinstance(exp, Int): - # TODO(max): Bignum - # TODO(max): assert not too big. but what we should do is - # mknum_fromstring("") or literally encode the heap object as a - # constant with digits if -0x4000000000000000 <= exp.value <= 0x3fffffffffffffff: return f"_mksmallint({exp.value}ULL)" # Divide number into 64-bit digits if exp.value < 0: + # TODO(max): Handle negative largeint raise NotImplementedError(f"negative largeint64({exp.value})") value = exp.value digits = [] From 49665138711f0c2f192d33d0f801138ebc18d3d4 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 11:46:14 -0500 Subject: [PATCH 09/11] Support tcc Needed to add __builtin_uaddl_overflow implementation and make kLargeintDigitSize a preprocessor macro and name a parameter. --- compiler.py | 7 ++++--- runtime.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/compiler.py b/compiler.py index 6371d6f8..390fa083 100644 --- a/compiler.py +++ b/compiler.py @@ -332,7 +332,7 @@ def _emit_const(self, exp: Object) -> str: if isinstance(exp, Hole): return "hole()" if isinstance(exp, Int): - if -0x4000000000000000 <= exp.value <= 0x3fffffffffffffff: + if -0x4000000000000000 <= exp.value <= 0x3FFFFFFFFFFFFFFF: return f"_mksmallint({exp.value}ULL)" # Divide number into 64-bit digits if exp.value < 0: @@ -341,9 +341,9 @@ def _emit_const(self, exp: Object) -> str: value = exp.value digits = [] while value: - digits.append(value & 0xffffffffffffffff) + digits.append(value & 0xFFFFFFFFFFFFFFFF) value >>= 64 - tag = self._make_tag("TAG_LARGEINT", f"sizeof(struct large_int)+{len(digits)}ULL*kLargeintDigitSize") + tag = self._make_tag("TAG_LARGEINT", f"sizeof(struct large_int)+{len(digits)}ULL*kLargeIntDigitSize") parts = ", ".join(f"{digit}ULL" for digit in digits) return self._const_obj("large_int", tag, f".digits={{ {parts} }}") if isinstance(exp, List): @@ -485,6 +485,7 @@ def compile_to_string(program: Object, debug: bool) -> str: ("uword", "kPrimaryTagMask", "(1ULL << kPrimaryTagBits) - 1"), ("uword", "kImmediateTagMask", "(1ULL << kImmediateTagBits) - 1"), ("uword", "kWordSize", "sizeof(word)"), + ("uword", "kLargeIntDigitSize", "sizeof(large_int_digit)"), ("uword", "kMaxSmallStringLength", "kWordSize - 1"), ("uword", "kBitsPerByte", 8), # Up to the five least significant bits are used to tag the object's layout. diff --git a/runtime.c b/runtime.c index f0d3d1b5..51a5f458 100644 --- a/runtime.c +++ b/runtime.c @@ -18,7 +18,6 @@ typedef intptr_t word; typedef uintptr_t uword; typedef unsigned char byte; typedef uint64_t large_int_digit; -const int kLargeintDigitSize = sizeof(large_int_digit); const word kMinWord = INTPTR_MIN; const word kMaxWord = INTPTR_MAX; const uword kMaxUword = UINTPTR_MAX; @@ -483,7 +482,7 @@ static ALWAYS_INLINE struct large_int* as_large_int(struct object* obj) { uword large_int_num_digits(struct object* obj) { assert(is_large_int(obj)); size_t size = heap_object_size(as_heap_object(obj)) - sizeof(struct gc_obj); - return size / kLargeintDigitSize; + return size / kLargeIntDigitSize; } uword num_digits(struct object* obj) { @@ -523,7 +522,7 @@ uword digit_at(struct object* obj, uword index) { struct object* _mklarge_int_uninit_private(struct gc_heap* heap, uword num_digits) { - uword digits_size = num_digits * kLargeintDigitSize; + uword digits_size = num_digits * kLargeIntDigitSize; uword size = align_size(sizeof(struct large_int) + digits_size); return allocate(heap, TAG_LARGEINT, size); } @@ -531,7 +530,7 @@ struct object* _mklarge_int_uninit_private(struct gc_heap* heap, struct object* _mklarge_int(struct gc_heap* heap, uword num_digits, large_int_digit* digits) { struct object* result = _mklarge_int_uninit_private(heap, num_digits); - uword digits_size = num_digits * kLargeintDigitSize; + uword digits_size = num_digits * kLargeIntDigitSize; memcpy(as_large_int(result)->digits, digits, digits_size); return result; } @@ -788,17 +787,25 @@ void trace_roots(struct gc_heap* heap, VisitFn visit) { struct gc_heap heap_object; struct gc_heap* heap = &heap_object; +#if !__has_builtin(__builtin_uaddl_overflow) +bool __builtin_uaddl_overflow(uword left, uword right, uword* result) { + *result = left + right; + return *result < left; +} +#endif + static uword add_with_carry(uword x, uword y, uword carry_in, uword* carry_out) { assert(carry_in <= 1 && "carry must be 0 or 1"); uword sum; - uword carry0 = __builtin_add_overflow(x, y, &sum); - uword carry1 = __builtin_add_overflow(sum, carry_in, &sum); + uword carry0 = __builtin_uaddl_overflow(x, y, &sum); + uword carry1 = __builtin_uaddl_overflow(sum, carry_in, &sum); *carry_out = carry0 | carry1; return sum; } -struct object* normalize_large_int(struct gc_heap*, struct object* obj) { +struct object* normalize_large_int(struct gc_heap* heap, struct object* obj) { + (void)heap; word num_digits = large_int_num_digits(obj); word shrink_to_digits = num_digits; for (word digit = large_int_digit_at(obj, shrink_to_digits - 1), next_digit; @@ -968,7 +975,7 @@ struct object* print(struct object* obj) { if (is_small_int(obj)) { printf("%ld", num_value(obj)); } else if (is_large_int(obj)) { - printf("largeint%d(", kLargeintDigitSize * kPointerSize); + printf("largeint%d(", kLargeIntDigitSize * kPointerSize); uword num_digits = large_int_num_digits(obj); for (uword i = 0; i < num_digits; i++) { if (i > 0) { From 314fa966546e0485db51f4ef1997192701a3b642 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 11:47:53 -0500 Subject: [PATCH 10/11] Add TODOs --- runtime.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runtime.c b/runtime.c index 51a5f458..98f0f496 100644 --- a/runtime.c +++ b/runtime.c @@ -883,11 +883,13 @@ struct object* num_add(struct object* left, struct object* right) { } struct object* num_sub(struct object* a, struct object* b) { + // TODO(max): Implement large_int subtraction // NB: doesn't use pointers after allocating return mknum(heap, num_value(a) - num_value(b)); } struct object* num_mul(struct object* a, struct object* b) { + // TODO(max): Implement large_int multiplication // NB: doesn't use pointers after allocating return mknum(heap, num_value(a) * num_value(b)); } From fc017058d9bb632219a0465c6f80f8118717f313 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 30 Dec 2024 12:02:45 -0500 Subject: [PATCH 11/11] Support older TCC --- runtime.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime.c b/runtime.c index 98f0f496..975138bb 100644 --- a/runtime.c +++ b/runtime.c @@ -787,7 +787,13 @@ void trace_roots(struct gc_heap* heap, VisitFn visit) { struct gc_heap heap_object; struct gc_heap* heap = &heap_object; +#ifndef __has_builtin +// Some versions of TCC don't have __has_builtin. +#define __has_builtin(x) 0 +#endif + #if !__has_builtin(__builtin_uaddl_overflow) +// No version of TCC has __builtin_uaddl_overflow. bool __builtin_uaddl_overflow(uword left, uword right, uword* result) { *result = left + right; return *result < left;