Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V] Fused multiply-add #113961

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/coreclr/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,17 @@ function(create_standalone_jit)
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD)
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS)
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_HW_INTRINSICS)
elseif (TARGETDETAILS_ARCH STREQUAL "riscv64")
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS)
endif ()
Comment on lines 69 to 74
Copy link
Contributor Author

@tomeksowi tomeksowi Mar 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still hesitating whether separating FEATURE_HW_INTRINSICS from SIMD and MASKED_HW_INTRINSICS is the right approach, there's been little code separation between them. Until we bring up the "V" extension the intrinsics RISC-V needs are scalar (e.g. FusedMultiplyAdd, LeadingZeroCount, PopCount, MultiplyHigh) and usually covered in common CoreLib parts like System.Math.

An alternative would be to handle them like #113689 and fix the value numbering for 3-operand GT_INTRINSIC to get FMA:

// TODO-CQ-XArch: Ideally we would create a GT_INTRINSIC node for fma, however, that currently
// requires more extensive changes to valuenum to support methods with 3 operands
// We want to generate a GT_INTRINSIC node in the case the call can't be treated as
// a target intrinsic so that we can still benefit from CSE and constant folding.

Not sure if changes would be less extensive than maintaining the above mentioned separation.

@dotnet/jit-contrib @jakobbotsch if you have an opinion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LeadingZeroCount and friends look doable via GT_INTRINSIC (WiP). So that leaves us with FMA as the odd one out.

endfunction()

if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX))
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_SIMD>)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_HW_INTRINSICS>)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MASKED_HW_INTRINSICS>)
elseif (CLR_CMAKE_TARGET_ARCH_RISCV64)
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_HW_INTRINSICS>)
endif ()

# JIT_BUILD disables certain PAL_TRY debugging features
Expand Down Expand Up @@ -274,6 +278,8 @@ set( JIT_RISCV64_SOURCES
lsrariscv64.cpp
targetriscv64.cpp
unwindriscv64.cpp
hwintrinsicriscv64.cpp
hwintrinsiccodegenriscv64.cpp
)

# We include the headers here for better experience in IDEs.
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ bool IntegralRange::Contains(int64_t value) const
// Note: No advantage in using a precise range for IntegralRange.
// Example: IntCns = 42 gives [0..127] with a non -precise range, [42,42] with a precise range.
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ByteMax};
#elif defined(TARGET_RISCV64)
// No integral ranges so far
#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -3121,12 +3123,13 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
{
return false;
}

#ifdef FEATURE_SIMD
// For several of the scenarios we may skip the costing logic
// since we know that the operand is always containable and therefore
// is always cost effective to propagate.

return parent->ShouldConstantProp(dest, value->AsVecCon());
#endif // FEATURE_SIMD
}
#endif // FEATURE_HW_INTRINSICS
}
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3077,9 +3077,7 @@ class Compiler

#ifdef FEATURE_SIMD
void SetOpLclRelatedToSIMDIntrinsic(GenTree* op);
#endif

#ifdef FEATURE_HW_INTRINSICS
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type,
NamedIntrinsic hwIntrinsicID,
CorInfoType simdBaseJitType,
Expand Down Expand Up @@ -3418,6 +3416,7 @@ class Compiler
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize);
#endif // FEATURE_SIMD

GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
Expand All @@ -3435,7 +3434,6 @@ class Compiler
GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount);
GenTreeFieldList* gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd);
#endif
#endif // FEATURE_HW_INTRINSICS

GenTree* gtNewMemoryBarrier(BarrierKind barrierKind);

Expand Down Expand Up @@ -4677,12 +4675,13 @@ class Compiler
bool mustExpand);

#ifdef FEATURE_HW_INTRINSICS
#ifdef FEATURE_SIMD
bool IsValidForShuffle(GenTree* indices,
unsigned simdSize,
var_types simdBaseType,
bool* canBecomeValid,
bool isShuffleNative) const;

#endif
GenTree* impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
Expand Down Expand Up @@ -9639,7 +9638,7 @@ class Compiler
// support/nonsupport for an instruction set
bool compIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const
{
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64)
return opts.compSupportsISA.HasInstructionSet(isa);
#else
return false;
Expand Down
55 changes: 53 additions & 2 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,30 @@ void emitter::emitIns_R_R_I_I(
void emitter::emitIns_R_R_R_R(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
{
NYI_RISCV64("emitIns_R_R_R_R-----unimplemented/unused on RISCV64 yet----");
NYI_IF(!(INS_fmadd_s <= ins && ins <= INS_fnmadd_s) && !(INS_fmadd_d <= ins && ins <= INS_fnmadd_d),
"illegal ins within emitIns_R_R_R_R!");

assert(isFloatReg(reg1));
assert(isFloatReg(reg2));
assert(isFloatReg(reg3));
assert(isFloatReg(reg4));

code_t code = emitInsCode(ins);
code |= (reg1 & 0x1f) << 7;
code |= (reg2 & 0x1f) << 15;
code |= (reg3 & 0x1f) << 20;
code |= (reg4 & 0x1f) << 27;
code |= 0b111 << 12;

instrDesc* id = emitNewInstr(attr);
id->idIns(ins);
id->idReg1(reg1);
id->idReg2(reg2);
id->idReg3(reg3);
id->idReg4(reg4);
id->idAddr()->iiaSetInstrEncode(code);
id->idCodeSize(4);
appendToCurIG(id);
}

/*****************************************************************************
Expand Down Expand Up @@ -3512,7 +3535,8 @@ void emitter::emitDispInsName(

printf(" ");

switch (GetMajorOpcode(code))
MajorOpcode opcode = GetMajorOpcode(code);
switch (opcode)
{
case MajorOpcode::Lui:
{
Expand Down Expand Up @@ -4351,6 +4375,33 @@ void emitter::emitDispInsName(
}
return;
}
case MajorOpcode::MAdd:
case MajorOpcode::MSub:
case MajorOpcode::NmSub:
case MajorOpcode::NmAdd:
{
unsigned int opcode2 = (code >> 25) & 0b11;
if (opcode2 > 1)
return emitDispIllegalInstruction(code);
char width = "sdhq"[opcode2];

unsigned int opcode4 = (code >> 12) & 0b111;
if (opcode4 != 0b111)
return emitDispIllegalInstruction(code);

static const char* names[] = {"fmadd", "fmsub", "fnmsub", "fnmadd"};

unsigned idx = (unsigned)opcode & 0b11;
const char* name = names[idx];
const char* pad = (idx < 2) ? " " : "";

const char* fd = RegNames[((code >> 7) & 0x1f) | 0x20];
const char* fs1 = RegNames[((code >> 15) & 0x1f) | 0x20];
const char* fs2 = RegNames[((code >> 20) & 0x1f) | 0x20];
const char* fs3 = RegNames[((code >> 27) & 0x1f) | 0x20];
printf("%s.%c%s %s, %s, %s, %s\n", name, width, pad, fd, fs1, fs2, fs3);
return;
}
case MajorOpcode::StoreFp:
{
unsigned int opcode2 = (code >> 12) & 0x7;
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,9 +1211,11 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
#endif // TARGET_ARM64
#if defined(FEATURE_SIMD)
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
#endif // FEATURE_SIMD
#if defined(TARGET_XARCH)
case NI_BMI1_TrailingZeroCount:
case NI_BMI1_X64_TrailingZeroCount:
Expand Down Expand Up @@ -1453,6 +1455,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector64_AsUInt64:
case NI_Vector64_op_UnaryPlus:
#endif // TARGET_XARCH
#if defined(FEATURE_SIMD)
case NI_Vector128_As:
case NI_Vector128_AsByte:
case NI_Vector128_AsDouble:
Expand All @@ -1468,6 +1471,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector128_AsUInt64:
case NI_Vector128_AsVector4:
case NI_Vector128_op_UnaryPlus:
#endif // FEATURE_SIMD
#if defined(TARGET_XARCH)
case NI_Vector256_As:
case NI_Vector256_AsByte:
Expand Down Expand Up @@ -1513,7 +1517,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
break;
}

#if defined(FEATURE_HW_INTRINSICS)
#if defined(FEATURE_SIMD)
#if defined(TARGET_ARM64)
case NI_Vector64_get_AllBitsSet:
case NI_Vector64_get_One:
Expand All @@ -1530,7 +1534,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector512_get_One:
case NI_Vector512_get_Zero:
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
{
// These always produce a vector constant

Expand All @@ -1543,6 +1546,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed

break;
}
#endif // FEATURE_SIMD

case NI_SRCS_UNSAFE_NullRef:
case NI_SRCS_UNSAFE_SizeOf:
Expand Down
Loading
Loading