Skip to content

[API Proposal]: Add ADCX and ADOX intrinsics #80674

Open
@benaadams

Description

@benaadams

Background and motivation

ADCX and ADOX have been introduced in combination with MULX to maintain independent carry chains so large integer multiplication can be parrallelised: Intel: New Instructions Supporting Large Integer Arithmetic and Intel: Large Integer Squaring so they cannot be easily written in C# directly (third column):

image

API Proposal

namespace System.Runtime.Intrinsics.X86
{
    public abstract class Adx
    {
        public static bool IsSupported { get => IsSupported; }

        public abstract class X64
        {
            public static bool IsSupported { get => IsSupported; }

            // ADC r64a, reg/m64
            public static ulong AddWithCarry(ulong left, ulong right) => AddWithCarry(left, right);

            // ADCX r64a, reg/m64
            public static ulong AddWithCarryFlagOnly(ulong left, ulong right) => AddWithCarryFlagOnly(left, right);

            // ADOX r64a, reg/m64
            public static ulong AddWithOverflowFlagOnly(ulong left, ulong right) => AddWithOverflowFlagOnly(left, right);
        }

        //  XOR r32a, r32a
        public static void ClearFlags() => ClearFlags();

        // Preserves other flags
        // MOV r32a, 0x0
        // ADCX r32a, r32a
        public static void ClearCarryFlagOnly() => ClearCarryFlagOnly();

        // Preserves other flags
        // MOV r32a, 0x0
        // ADOX r32a, r32a
        public static void ClearOverflowFlagOnly() => ClearOverflowFlagOnly();

        // SETC r/m8
        public static bool ReadCarryFlag() => ReadCarryFlag();

        // SETO r/m8
        public static bool ReadOverflowFlag() => ReadOverflowFlag();

        // MOV r32a, 0x8000_0000
        // ADC r32a, r32a 
        public static void SetOverflowAndCarryFlag() => SetOverflowAndCarryFlag();

        // Preserves other flags
        // MOV r32a, 0xFFFF_FFFF
        // ADCX r32a, r32a 
        public static void SetCarryFlagOnly() => SetCarryFlagOnly();

        // Preserves other flags
        // MOV r32a, 0xFFFF_FFFF
        // ADOX r32a, r32a 
        public static void SetOverflowFlagOnly() => SetOverflowFlagOnly();

        // ADC r32a, reg/m32
        public static uint AddWithCarry(uint left, uint right) => AddWithCarry(left, right);

        // ADCX r32a, reg/m32
        public static uint AddWithCarryFlagOnly(uint left, uint right) => AddWithCarryFlagOnly(left, right);

        // ADOX r32a, reg/m32
        public static uint AddWithOverflowFlagOnly(uint left, uint right) => AddWithOverflowFlagOnly(left, right);

    }
}

API Usage

public unsafe static void Multiply(in UInt256 x, in UInt256 y, out UInt256 res)
{
    if (Adx.X64.IsSupported && Bmi2.X64.IsSupported)
    {
        var xv = Unsafe.As<ulong, Vector128<ulong>>(ref Unsafe.AsRef(in x.u0));
        var yv = Unsafe.As<ulong, Vector128<ulong>>(ref Unsafe.AsRef(in y.u0));
        // Mark res as initalized so we can use it as left said of ref assignment

        ulong u0, u1, u2, u3, lower, higher;
        // Initalise first row of results
        u1 = Bmi2.X64.MultiplyNoFlags(x.u0, y.u0, &u0);
        u2 = Bmi2.X64.MultiplyNoFlags(x.u1, y.u0, &lower);
        u1 += lower;
                
        u3 = Bmi2.X64.MultiplyNoFlags(x.u2, y.u0, &lower);
        u2 = Adx.X64.AddWithCarry(u2, lower);

        higher = Bmi2.X64.MultiplyNoFlags(x.u3, y.u0, &lower);
        u3 = Adx.X64.AddWithCarry(u3, lower);

        // Next
        var span = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.AsRef(in y.u1), 3);
        for (var i = 0; i < span.Length; i++) 
        {
            var u = span[i];

            Adx.ClearFlags();
            higher = Bmi2.X64.MultiplyNoFlags(x.u0, u, &lower);
            u0 = Adx.X64.AddWithCarryFlagOnly(u0, lower);
            u1 = Adx.X64.AddWithOverflowFlagOnly(u1, higher);

            higher = Bmi2.X64.MultiplyNoFlags(x.u1, u, &lower);
            u1 = Adx.X64.AddWithCarryFlagOnly(u1, lower);
            u2 = Adx.X64.AddWithOverflowFlagOnly(u2, higher);

            higher = Bmi2.X64.MultiplyNoFlags(x.u2, u, &lower);
            u2 = Adx.X64.AddWithCarryFlagOnly(u2, lower);
            u3 = Adx.X64.AddWithOverflowFlagOnly(u3, higher);

            higher = Bmi2.X64.MultiplyNoFlags(x.u3, u, &lower);
            u3 = Adx.X64.AddWithCarryFlagOnly(u3, lower);
        }

        Unsafe.SkipInit(out res);
        Unsafe.As<UInt256, Vector256<ulong>>(ref res) = Vector256.Create(u0, u1, u2, u3);
    }
    else
    {
       ...
    }
}

Alternative Designs

Return the CF, OF flags from the methods as per the C/C++ function

unsigned char _addcarryx_u64(
   unsigned char c_in,
   unsigned __int64 src1,
   unsigned __int64 src2, 
   unsigned __int64 *sum_out);

However this just complicates the usage and the optimization that the Jit needs to do to undo the temporary variables which are just passed to next function.

Additional methods have been added to extract the flag if necessary.

Risks

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    api-suggestionEarly API idea and discussion, it is NOT ready for implementationarea-System.Runtime.IntrinsicsblockedIssue/PR is blocked on something - see comments

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions