@@ -8571,19 +8571,37 @@ pub const FuncGen = struct {
8571
8571
}
8572
8572
if (scalar_ty.isSignedInt(zcu)) {
8573
8573
const inst_llvm_ty = try o.lowerType(inst_ty);
8574
- const bit_size_minus_one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(
8574
+
8575
+ const ExpectedContents = [std.math.big.int.calcTwosCompLimbCount(256)]std.math.big.Limb;
8576
+ var stack align(@max(
8577
+ @alignOf(std.heap.StackFallbackAllocator(0)),
8578
+ @alignOf(ExpectedContents),
8579
+ )) = std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
8580
+ const allocator = stack.get();
8581
+
8582
+ const scalar_bits = inst_llvm_ty.scalarBits(&o.builder);
8583
+ var smin_big_int: std.math.big.int.Mutable = .{
8584
+ .limbs = try allocator.alloc(
8585
+ std.math.big.Limb,
8586
+ std.math.big.int.calcTwosCompLimbCount(scalar_bits),
8587
+ ),
8588
+ .len = undefined,
8589
+ .positive = undefined,
8590
+ };
8591
+ defer allocator.free(smin_big_int.limbs);
8592
+ smin_big_int.setTwosCompIntLimit(.min, .signed, scalar_bits);
8593
+ const smin = try o.builder.splatValue(inst_llvm_ty, try o.builder.bigIntConst(
8575
8594
inst_llvm_ty.scalarType(&o.builder),
8576
- inst_llvm_ty.scalarBits(&o.builder) - 1 ,
8595
+ smin_big_int.toConst() ,
8577
8596
));
8578
8597
8579
- const div = try self.wip.bin(.sdiv, lhs, rhs, "");
8580
- const rem = try self.wip.bin(.srem, lhs, rhs, "");
8581
- const div_sign = try self.wip.bin(.xor, lhs, rhs, "");
8582
- const div_sign_mask = try self.wip.bin(.ashr, div_sign, bit_size_minus_one, "");
8583
- const zero = try o.builder.zeroInitValue(inst_llvm_ty);
8584
- const rem_nonzero = try self.wip.icmp(.ne, rem, zero, "");
8585
- const correction = try self.wip.select(.normal, rem_nonzero, div_sign_mask, zero, "");
8586
- return self.wip.bin(.@"add nsw", div, correction, "");
8598
+ const div = try self.wip.bin(.sdiv, lhs, rhs, "divFloor.div");
8599
+ const rem = try self.wip.bin(.srem, lhs, rhs, "divFloor.rem");
8600
+ const rhs_sign = try self.wip.bin(.@"and", rhs, smin, "divFloor.rhs_sign");
8601
+ const rem_xor_rhs_sign = try self.wip.bin(.xor, rem, rhs_sign, "divFloor.rem_xor_rhs_sign");
8602
+ const need_correction = try self.wip.icmp(.ugt, rem_xor_rhs_sign, smin, "divFloor.need_correction");
8603
+ const correction = try self.wip.cast(.sext, need_correction, inst_llvm_ty, "divFloor.correction");
8604
+ return self.wip.bin(.@"add nsw", div, correction, "divFloor");
8587
8605
}
8588
8606
return self.wip.bin(.udiv, lhs, rhs, "");
8589
8607
}
@@ -8642,19 +8660,36 @@ pub const FuncGen = struct {
8642
8660
return self.wip.select(fast, ltz, c, a, "");
8643
8661
}
8644
8662
if (scalar_ty.isSignedInt(zcu)) {
8645
- const bit_size_minus_one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(
8663
+ const ExpectedContents = [std.math.big.int.calcTwosCompLimbCount(256)]std.math.big.Limb;
8664
+ var stack align(@max(
8665
+ @alignOf(std.heap.StackFallbackAllocator(0)),
8666
+ @alignOf(ExpectedContents),
8667
+ )) = std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
8668
+ const allocator = stack.get();
8669
+
8670
+ const scalar_bits = inst_llvm_ty.scalarBits(&o.builder);
8671
+ var smin_big_int: std.math.big.int.Mutable = .{
8672
+ .limbs = try allocator.alloc(
8673
+ std.math.big.Limb,
8674
+ std.math.big.int.calcTwosCompLimbCount(scalar_bits),
8675
+ ),
8676
+ .len = undefined,
8677
+ .positive = undefined,
8678
+ };
8679
+ defer allocator.free(smin_big_int.limbs);
8680
+ smin_big_int.setTwosCompIntLimit(.min, .signed, scalar_bits);
8681
+ const smin = try o.builder.splatValue(inst_llvm_ty, try o.builder.bigIntConst(
8646
8682
inst_llvm_ty.scalarType(&o.builder),
8647
- inst_llvm_ty.scalarBits(&o.builder) - 1 ,
8683
+ smin_big_int.toConst() ,
8648
8684
));
8649
8685
8650
- const rem = try self.wip.bin(.srem, lhs, rhs, "");
8651
- const div_sign = try self.wip.bin(.xor, lhs, rhs , "");
8652
- const div_sign_mask = try self.wip.bin(.ashr, div_sign, bit_size_minus_one , "");
8653
- const rhs_masked = try self.wip.bin(.@"and", rhs, div_sign_mask , "");
8686
+ const rem = try self.wip.bin(.srem, lhs, rhs, "mod.rem ");
8687
+ const rhs_sign = try self.wip.bin(.@"and", rhs, smin , "mod.rhs_sign ");
8688
+ const rem_xor_rhs_sign = try self.wip.bin(.xor, rem, rhs_sign , "mod.rem_xor_rhs_sign ");
8689
+ const need_correction = try self.wip.icmp(.ugt, rem_xor_rhs_sign, smin , "mod.need_correction ");
8654
8690
const zero = try o.builder.zeroInitValue(inst_llvm_ty);
8655
- const rem_nonzero = try self.wip.icmp(.ne, rem, zero, "");
8656
- const correction = try self.wip.select(.normal, rem_nonzero, rhs_masked, zero, "");
8657
- return self.wip.bin(.@"add nsw", rem, correction, "");
8691
+ const correction = try self.wip.select(.normal, need_correction, rhs, zero, "mod.correction");
8692
+ return self.wip.bin(.@"add nsw", correction, rem, "mod");
8658
8693
}
8659
8694
return self.wip.bin(.urem, lhs, rhs, "");
8660
8695
}
0 commit comments