Open
Description
Background and motivation
ADCX
and ADOX
have been introduced in combination with MULX
to maintain independent carry chains so large integer multiplication can be parrallelised: Intel: New Instructions Supporting Large Integer Arithmetic and Intel: Large Integer Squaring so they cannot be easily written in C# directly (third column):
API Proposal
namespace System.Runtime.Intrinsics.X86
{
public abstract class Adx
{
public static bool IsSupported { get => IsSupported; }
public abstract class X64
{
public static bool IsSupported { get => IsSupported; }
// ADC r64a, reg/m64
public static ulong AddWithCarry(ulong left, ulong right) => AddWithCarry(left, right);
// ADCX r64a, reg/m64
public static ulong AddWithCarryFlagOnly(ulong left, ulong right) => AddWithCarryFlagOnly(left, right);
// ADOX r64a, reg/m64
public static ulong AddWithOverflowFlagOnly(ulong left, ulong right) => AddWithOverflowFlagOnly(left, right);
}
// XOR r32a, r32a
public static void ClearFlags() => ClearFlags();
// Preserves other flags
// MOV r32a, 0x0
// ADCX r32a, r32a
public static void ClearCarryFlagOnly() => ClearCarryFlagOnly();
// Preserves other flags
// MOV r32a, 0x0
// ADOX r32a, r32a
public static void ClearOverflowFlagOnly() => ClearOverflowFlagOnly();
// SETC r/m8
public static bool ReadCarryFlag() => ReadCarryFlag();
// SETO r/m8
public static bool ReadOverflowFlag() => ReadOverflowFlag();
// MOV r32a, 0x8000_0000
// ADC r32a, r32a
public static void SetOverflowAndCarryFlag() => SetOverflowAndCarryFlag();
// Preserves other flags
// MOV r32a, 0xFFFF_FFFF
// ADCX r32a, r32a
public static void SetCarryFlagOnly() => SetCarryFlagOnly();
// Preserves other flags
// MOV r32a, 0xFFFF_FFFF
// ADOX r32a, r32a
public static void SetOverflowFlagOnly() => SetOverflowFlagOnly();
// ADC r32a, reg/m32
public static uint AddWithCarry(uint left, uint right) => AddWithCarry(left, right);
// ADCX r32a, reg/m32
public static uint AddWithCarryFlagOnly(uint left, uint right) => AddWithCarryFlagOnly(left, right);
// ADOX r32a, reg/m32
public static uint AddWithOverflowFlagOnly(uint left, uint right) => AddWithOverflowFlagOnly(left, right);
}
}
API Usage
public unsafe static void Multiply(in UInt256 x, in UInt256 y, out UInt256 res)
{
if (Adx.X64.IsSupported && Bmi2.X64.IsSupported)
{
var xv = Unsafe.As<ulong, Vector128<ulong>>(ref Unsafe.AsRef(in x.u0));
var yv = Unsafe.As<ulong, Vector128<ulong>>(ref Unsafe.AsRef(in y.u0));
// Mark res as initalized so we can use it as left said of ref assignment
ulong u0, u1, u2, u3, lower, higher;
// Initalise first row of results
u1 = Bmi2.X64.MultiplyNoFlags(x.u0, y.u0, &u0);
u2 = Bmi2.X64.MultiplyNoFlags(x.u1, y.u0, &lower);
u1 += lower;
u3 = Bmi2.X64.MultiplyNoFlags(x.u2, y.u0, &lower);
u2 = Adx.X64.AddWithCarry(u2, lower);
higher = Bmi2.X64.MultiplyNoFlags(x.u3, y.u0, &lower);
u3 = Adx.X64.AddWithCarry(u3, lower);
// Next
var span = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in y.u1), 3);
for (var i = 0; i < span.Length; i++)
{
var u = span[i];
Adx.ClearFlags();
higher = Bmi2.X64.MultiplyNoFlags(x.u0, u, &lower);
u0 = Adx.X64.AddWithCarryFlagOnly(u0, lower);
u1 = Adx.X64.AddWithOverflowFlagOnly(u1, higher);
higher = Bmi2.X64.MultiplyNoFlags(x.u1, u, &lower);
u1 = Adx.X64.AddWithCarryFlagOnly(u1, lower);
u2 = Adx.X64.AddWithOverflowFlagOnly(u2, higher);
higher = Bmi2.X64.MultiplyNoFlags(x.u2, u, &lower);
u2 = Adx.X64.AddWithCarryFlagOnly(u2, lower);
u3 = Adx.X64.AddWithOverflowFlagOnly(u3, higher);
higher = Bmi2.X64.MultiplyNoFlags(x.u3, u, &lower);
u3 = Adx.X64.AddWithCarryFlagOnly(u3, lower);
}
Unsafe.SkipInit(out res);
Unsafe.As<UInt256, Vector256<ulong>>(ref res) = Vector256.Create(u0, u1, u2, u3);
}
else
{
...
}
}
Alternative Designs
Return the CF, OF flags from the methods as per the C/C++ function
unsigned char _addcarryx_u64(
unsigned char c_in,
unsigned __int64 src1,
unsigned __int64 src2,
unsigned __int64 *sum_out);
However this just complicates the usage and the optimization that the Jit needs to do to undo the temporary variables which are just passed to next function.
Additional methods have been added to extract the flag if necessary.
Risks
No response